Slightly revised string types in dcb-iso9660.

2026-02-06 01:20:11 +00:00 · 2021-01-30 21:13:46 +00:00 · 2021-01-30 21:13:46 +00:00 · edcd8b45e1
commit edcd8b45e1
parent 66f981d63f
5 changed files with 111 additions and 57 deletions
--- a/dcb-iso9660/src/lib.rs
+++ b/dcb-iso9660/src/lib.rs
@ -8,7 +8,8 @@
 	min_const_generics,
 	array_methods,
 	array_value_iter,
-	external_doc
+	external_doc,
+	str_internals
 )]
 // Lints
 #![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]
--- a/dcb-iso9660/src/string.rs
+++ b/dcb-iso9660/src/string.rs
@ -12,54 +12,64 @@ pub use error::{InvalidCharError, ValidateFileAlphabetError};
 pub use owned::StringAlphabet;
 pub use slice::StrAlphabet;

-
-/// An alphabet for a string
+/// A string alphabet
+///
+/// This type serves to create marker types for strings that may only
+/// contain a subset of characters, or must have them in a certain order.
+///
+/// This is accomplished by the [`validate`](Alphabet::validate) method,
+/// which simply checks if a byte slice is valid for this alphabet.
 pub trait Alphabet {
 	/// Error type
 	type Error;

-	/// Returns if `bytes` are valid for this alphabet
-	fn validate(bytes: &[u8]) -> Result<(), Self::Error>;
+	/// Validates `bytes` for a string of this alphabet and returns
+	/// it, possibly without it's terminator.
+	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error>;
 }

+/// Implements the [`Alphabet`] trait from a list of valid characters
+/// and a possible terminator
+pub trait OnlyValidCharsAlphabet {
+	/// All valid characters
+	fn valid_chars() -> &'static [u8];

-/// Implements the [`Alphabet`] trait from an alphabet
-pub trait ImplFromAlphabet {
-	/// The alphabet
-	fn alphabet() -> &'static [u8];
-
-	/// String terminator
+	/// Terminator for the string.
 	fn terminator() -> u8;
 }

-impl<A: ImplFromAlphabet> Alphabet for A {
+impl<A: OnlyValidCharsAlphabet> Alphabet for A {
 	type Error = InvalidCharError;

-	fn validate(bytes: &[u8]) -> Result<(), Self::Error> {
-		// If any are invalid, return Err
+	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
+		// Go through all bytes and validate them until end of
+		// string or terminator.
 		for (pos, &byte) in bytes.iter().enumerate() {
 			// If we found the terminator, terminate
-			// TODO: Maybe make sure everything after the `;` is valid too
 			if byte == Self::terminator() {
-				break;
+				return Ok(&bytes[..pos]);
 			}

 			// Else make sure it contains this byte
-			if !Self::alphabet().contains(&byte) {
+			if !Self::valid_chars().contains(&byte) {
 				return Err(InvalidCharError { byte, pos });
 			}
 		}

-		Ok(())
+		// If we got, there was no terminator, which is still a valid string.
+		Ok(bytes)
 	}
 }

-/// A-type alphabet
+/// A-character alphabet
+///
+/// The list of valid characters are `A..Z`, `0..9`, `_`, `!`, `"`, `%`, `'`, `(`, `)`, `*`, `+`,
+/// `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>` and `?`.
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
 pub struct AlphabetA;

-impl ImplFromAlphabet for AlphabetA {
-	fn alphabet() -> &'static [u8] {
+impl OnlyValidCharsAlphabet for AlphabetA {
+	fn valid_chars() -> &'static [u8] {
 		&[
 			b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
 			b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'_', b'!', b'"', b'%', b'&', b'\'', b'(', b')', b'*',
@ -72,12 +82,14 @@ impl ImplFromAlphabet for AlphabetA {
 	}
 }

-/// D-type alphabet
+/// D-character alphabet
+///
+/// The list of valid characters are `A..Z`, `0..9` and `_`
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
 pub struct AlphabetD;

-impl ImplFromAlphabet for AlphabetD {
-	fn alphabet() -> &'static [u8] {
+impl OnlyValidCharsAlphabet for AlphabetD {
+	fn valid_chars() -> &'static [u8] {
 		&[
 			b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
 			b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'_',
@ -90,16 +102,27 @@ impl ImplFromAlphabet for AlphabetD {
 }

 /// File alphabet
+///
+/// The file alphabet dictates the format for file names,
+/// which must follow `<name>.<extension>;<version>`, where
+/// `<name>` and `<extension>` are D-character strings,
+/// and `<version>` only contains numeric decimal characters.
+///
+/// There are 3 exceptions to this, which are the root directory
+/// name, current directory name and parent directory name, which
+/// are, "\0", "" and "\x01", respectively.
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
 pub struct FileAlphabet;

 impl Alphabet for FileAlphabet {
 	type Error = ValidateFileAlphabetError;

-	fn validate(bytes: &[u8]) -> Result<(), Self::Error> {
+	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
 		// Special cases for the root, `.` and `..`, respectively
+		// TODO: Remove exceptions from this string and make directories store the
+		//       current and parent separately.
 		if let [b'\0'] | [] | [b'\x01'] = bytes {
-			return Ok(());
+			return Ok(bytes);
 		}

 		// Separate into `<name>.<extension>;<version>`
@ -120,9 +143,9 @@ impl Alphabet for FileAlphabet {
 		// Validate all separately
 		AlphabetD::validate(name).map_err(ValidateFileAlphabetError::InvalidNameChar)?;
 		AlphabetD::validate(extension).map_err(ValidateFileAlphabetError::InvalidExtensionChar)?;
-		match version {
-			[b'0'..=b'9'] => Ok(()),
-			_ => Err(ValidateFileAlphabetError::InvalidVersion),
+		match version.iter().all(|ch| (b'0'..=b'9').contains(ch)) {
+			true => Ok(bytes),
+			false => Err(ValidateFileAlphabetError::InvalidVersion),
 		}
 	}
 }
@ -136,6 +159,7 @@ pub type StringA = StringAlphabet<AlphabetA>;
 /// A-type string slice
 pub type StrA = StrAlphabet<AlphabetA>;

+
 /// D-type string array
 pub type StrArrD<const N: usize> = StrArrAlphabet<AlphabetD, N>;

@ -145,6 +169,7 @@ pub type StringD = StringAlphabet<AlphabetD>;
 /// D-type string slice
 pub type StrD = StrAlphabet<AlphabetD>;

+
 /// File string array
 pub type FileStrArr<const N: usize> = StrArrAlphabet<FileAlphabet, N>;

--- a/dcb-iso9660/src/string/arr.rs
+++ b/dcb-iso9660/src/string/arr.rs
@ -4,15 +4,27 @@
 use super::{Alphabet, StrAlphabet};
 use std::{fmt, marker::PhantomData, ops::Deref};

-
-/// A alphabetic specific string array
+/// An alphabetic string array
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
-pub struct StrArrAlphabet<A: Alphabet, const N: usize>(PhantomData<A>, [u8; N]);
+pub struct StrArrAlphabet<A: Alphabet, const N: usize> {
+	/// Phantom
+	phantom: PhantomData<A>,
+
+	/// Bytes
+	bytes: [u8; N],
+
+	/// Length
+	len: usize,
+}

 impl<A: Alphabet, const N: usize> StrArrAlphabet<A, N> {
 	/// Parses a string from bytes
 	pub fn from_bytes(bytes: &[u8; N]) -> Result<Self, A::Error> {
-		A::validate(bytes).map(|()| Self(PhantomData, *bytes))
+		A::validate(bytes).map(|valid_bytes| Self {
+			phantom: PhantomData,
+			bytes:   *bytes,
+			len:     valid_bytes.len(),
+		})
 	}
 }

@ -20,7 +32,7 @@ impl<A: Alphabet, const N: usize> Deref for StrArrAlphabet<A, N> {
 	type Target = StrAlphabet<A>;

 	fn deref(&self) -> &Self::Target {
-		ref_cast::RefCast::ref_cast(self.1.as_slice())
+		ref_cast::RefCast::ref_cast(&self.bytes.as_slice()[..self.len])
 	}
 }

--- a/dcb-iso9660/src/string/owned.rs
+++ b/dcb-iso9660/src/string/owned.rs
@ -4,14 +4,23 @@
 use super::{Alphabet, StrAlphabet};
 use std::{fmt, marker::PhantomData, ops::Deref};

-/// A alphabetic specific string
+/// An alphabetic owned string
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone)]
-pub struct StringAlphabet<A: Alphabet>(PhantomData<A>, Vec<u8>);
+pub struct StringAlphabet<A: Alphabet> {
+	/// Phantom
+	phantom: PhantomData<A>,
+
+	/// Bytes
+	bytes: Vec<u8>,
+}

 impl<A: Alphabet> StringAlphabet<A> {
 	/// Parses a string from bytes
 	pub fn from_bytes(bytes: &[u8]) -> Result<Self, A::Error> {
-		A::validate(bytes).map(|()| Self(PhantomData, bytes.to_vec()))
+		A::validate(bytes).map(|bytes| Self {
+			phantom: PhantomData,
+			bytes:   bytes.to_vec(),
+		})
 	}
 }

@ -19,7 +28,7 @@ impl<A: Alphabet> Deref for StringAlphabet<A> {
 	type Target = StrAlphabet<A>;

 	fn deref(&self) -> &Self::Target {
-		ref_cast::RefCast::ref_cast(self.1.as_slice())
+		ref_cast::RefCast::ref_cast(self.bytes.as_slice())
 	}
 }

--- a/dcb-iso9660/src/string/slice.rs
+++ b/dcb-iso9660/src/string/slice.rs
@ -2,19 +2,40 @@

 // Imports
 use super::Alphabet;
+use core::str::lossy::Utf8Lossy;
+use ref_cast::RefCast;
 use std::{fmt, marker::PhantomData};

-/// A alphabetic specific string slice
+/// An alphabetic string slice
+// TODO: Not expose `ref_cast` to the outside, as it breaks
+//       this string's encapsulation.
 #[derive(PartialEq, Eq, PartialOrd, Ord)]
 #[derive(ref_cast::RefCast)]
 #[repr(transparent)]
-pub struct StrAlphabet<A: Alphabet>(PhantomData<A>, [u8]);
+pub struct StrAlphabet<A: Alphabet> {
+	/// Phantom
+	phantom: PhantomData<A>,
+
+	/// Bytes
+	bytes: [u8],
+}

 impl<A: Alphabet> StrAlphabet<A> {
 	/// Returns the bytes from this string
 	#[must_use]
 	pub fn as_bytes(&self) -> &[u8] {
-		&self.1
+		&self.bytes
+	}
+
+	/// Parses a string from bytes
+	pub fn from_bytes(bytes: &[u8]) -> Result<&Self, A::Error> {
+		A::validate(bytes).map(|bytes| Self::ref_cast(bytes))
+	}
+
+	/// Returns this string as a lossy `str`
+	#[must_use]
+	pub fn as_lossy_str(&self) -> &Utf8Lossy {
+		Utf8Lossy::from_bytes(self.as_bytes())
 	}

 	/// Returns the length of this string
@ -32,26 +53,12 @@ impl<A: Alphabet> StrAlphabet<A> {

 impl<A: Alphabet> fmt::Debug for StrAlphabet<A> {
 	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-		// Try to get self as a string to debug it
-		// TODO: Not allocate here
-		let s = String::from_utf8_lossy(self.as_bytes());
-
-		// Then trim any spaces we might have
-		let s = s.trim();
-
-		write!(f, "{s:?}")
+		write!(f, "{:?}", self.as_lossy_str())
 	}
 }

 impl<A: Alphabet> fmt::Display for StrAlphabet<A> {
 	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-		// Try to get self as a string to debug it
-		// TODO: Not allocate here
-		let s = String::from_utf8_lossy(self.as_bytes());
-
-		// Then trim any spaces we might have
-		let s = s.trim();
-
-		write!(f, "{s}")
+		write!(f, "{}", self.as_lossy_str())
 	}
 }