Moved dcb_iso9660::string to dcb_util::alphabet.

2026-02-04 00:21:57 +00:00 · 2021-06-04 21:29:06 +01:00 · 2021-06-04 21:29:06 +01:00 · 93cc39ebd3
commit 93cc39ebd3
parent 91ba3b9406
13 changed files with 133 additions and 92 deletions
--- a/dcb-iso9660/Cargo.toml
+++ b/dcb-iso9660/Cargo.toml
@ -21,3 +21,4 @@ bitflags = "1.2.1"
 # Derives
 thiserror = "1.0.23"
 ref-cast = "1.0.6"
+extend = "1.0.1"
--- a/dcb-iso9660/src/string.rs
+++ b/dcb-iso9660/src/string.rs
@ -1,66 +1,13 @@
 //! Filesystem strings

-/// Modules
-pub mod arr;
+// Modules
 pub mod error;
-pub mod owned;
-pub mod slice;

 // Exports
-pub use arr::StrArrAlphabet;
-pub use error::{InvalidCharError, ValidateFileAlphabetError};
-pub use owned::StringAlphabet;
-pub use slice::StrAlphabet;
+pub use error::ValidateFileAlphabetError;

-/// A string alphabet
-///
-/// This type serves to create marker types for strings that may only
-/// contain a subset of characters, or must have them in a certain order.
-///
-/// This is accomplished by the [`validate`](Alphabet::validate) method,
-/// which simply checks if a byte slice is valid for this alphabet.
-pub trait Alphabet {
-	/// Error type
-	type Error;
-
-	/// Validates `bytes` for a string of this alphabet and returns
-	/// it, possibly without it's terminator.
-	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error>;
-}
-
-/// Implements the [`Alphabet`] trait from a list of valid characters
-/// and a possible terminator
-pub trait OnlyValidCharsAlphabet {
-	/// All valid characters
-	fn valid_chars() -> &'static [u8];
-
-	/// Terminator for the string.
-	fn terminator() -> u8;
-}
-
-impl<A: OnlyValidCharsAlphabet> Alphabet for A {
-	type Error = InvalidCharError;
-
-	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
-		// Go through all bytes and validate them until end of
-		// string or terminator.
-		let terminator = Self::terminator();
-		for (pos, &byte) in bytes.iter().enumerate() {
-			// If we found the terminator, terminate
-			if byte == terminator {
-				return Ok(&bytes[..pos]);
-			}
-
-			// Else make sure it contains this byte
-			if !Self::valid_chars().contains(&byte) {
-				return Err(InvalidCharError { byte, pos });
-			}
-		}
-
-		// If we got, there was no terminator, which is still a valid string.
-		Ok(bytes)
-	}
-}
+// Imports
+use dcb_util::{alphabet::OnlyValidCharsAlphabet, Alphabet, StrAlphabet, StrArrAlphabet, StringAlphabet};

 /// A-character alphabet
 ///
@ -185,10 +132,11 @@ pub type FileStrArr<const N: usize> = StrArrAlphabet<FileAlphabet, N>;
 /// File string
 pub type FileString = StringAlphabet<FileAlphabet>;

+#[extend::ext(pub, name = FileStrWithoutVersion)]
 impl FileStr {
 	/// Returns this filename without the version
 	#[must_use]
-	pub fn without_version(&self) -> &str {
+	fn without_version(&self) -> &str {
 		let s = std::str::from_utf8(self.as_bytes()).expect("File string had invalid utf8 characters");

 		match s.split_once(';') {
--- a/dcb-iso9660/src/string/error.rs
+++ b/dcb-iso9660/src/string/error.rs
@ -1,28 +1,18 @@
 //! Errors

-/// Error for [`Alphabet::validate`](super::Alphabet::validate)'s impl of [`AlphabetA`](super::AlphabetA) and
-/// [`AlphabetD`](super::AlphabetD)
-#[derive(Debug, thiserror::Error)]
-#[error("Invalid character '{byte:#x}' at index {pos}")]
-pub struct InvalidCharError {
-	/// Invalid character
-	pub byte: u8,
+// Imports
+use dcb_util::alphabet;

-	/// Position
-	pub pos: usize,
-}
-
-/// Error for [`Alphabet::validate`](super::Alphabet::validate)'s impl of [`AlphabetA`](super::AlphabetA) and
-/// [`AlphabetD`](super::AlphabetD)
+/// Error for [`Alphabet`](dcb_util::Alphabet)'s impl of [`AlphabetFileAlphabet`](super::FileAlphabet)
 #[derive(Debug, thiserror::Error)]
 pub enum ValidateFileAlphabetError {
 	/// Invalid name character
 	#[error("Invalid name character")]
-	InvalidNameChar(#[source] InvalidCharError),
+	InvalidNameChar(#[source] alphabet::InvalidCharError),

 	/// Invalid extension character
 	#[error("Invalid extension character")]
-	InvalidExtensionChar(#[source] InvalidCharError),
+	InvalidExtensionChar(#[source] alphabet::InvalidCharError),

 	/// Missing file name extension
 	#[error("Missing file name extension")]
--- a/dcb-iso9660/src/volume_descriptor/boot/error.rs
+++ b/dcb-iso9660/src/volume_descriptor/boot/error.rs
@ -1,16 +1,16 @@
 //! Errors

 // Imports
-use crate::string;
+use dcb_util::alphabet;

 /// Error type for [`Bytes::deserialize_bytes`](dcb_bytes::Bytes::deserialize_bytes)
 #[derive(Debug, thiserror::Error)]
 pub enum DeserializeBytesError {
 	/// Unable to parse system id
 	#[error("Unable to parse system id")]
-	SystemId(#[source] string::InvalidCharError),
+	SystemId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse boot id
 	#[error("Unable to parse boot id")]
-	BootId(#[source] string::InvalidCharError),
+	BootId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),
 }
--- a/dcb-iso9660/src/volume_descriptor/primary/error.rs
+++ b/dcb-iso9660/src/volume_descriptor/primary/error.rs
@ -1,18 +1,19 @@
 //! Errors

 // Imports
-use crate::{date_time, entry, string};
+use crate::{date_time, entry};
+use dcb_util::alphabet;

 /// Error type for [`Bytes::deserialize_bytes`](dcb_bytes::Bytes::deserialize_bytes)
 #[derive(Debug, thiserror::Error)]
 pub enum DeserializeBytesError {
 	/// Unable to parse system id
 	#[error("Unable to parse system id")]
-	SystemId(#[source] string::InvalidCharError),
+	SystemId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse volume id
 	#[error("Unable to parse volume id")]
-	VolumeId(#[source] string::InvalidCharError),
+	VolumeId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse the root dir entry
 	#[error("Unable to parse the root dir entry")]
@ -20,31 +21,31 @@ pub enum DeserializeBytesError {

 	/// Unable to parse volume set id
 	#[error("Unable to parse volume set id")]
-	VolumeSetId(#[source] string::InvalidCharError),
+	VolumeSetId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse publisher id
 	#[error("Unable to parse publisher id")]
-	PublisherId(#[source] string::InvalidCharError),
+	PublisherId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse data preparer id
 	#[error("Unable to parse data preparer id")]
-	DataPreparerId(#[source] string::InvalidCharError),
+	DataPreparerId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse application id
 	#[error("Unable to parse application id")]
-	ApplicationId(#[source] string::InvalidCharError),
+	ApplicationId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse copyright file id
 	#[error("Unable to parse copyright file id")]
-	CopyrightFileId(#[source] string::InvalidCharError),
+	CopyrightFileId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse abstract file id
 	#[error("Unable to parse abstract file id")]
-	AbstractFileId(#[source] string::InvalidCharError),
+	AbstractFileId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse bibliographic file id
 	#[error("Unable to parse bibliographic file id")]
-	BibliographicFileId(#[source] string::InvalidCharError),
+	BibliographicFileId(#[source] alphabet::arr::FromBytesError<alphabet::InvalidCharError>),

 	/// Unable to parse volume creation date time
 	#[error("Unable to parse volume creation date time")]
--- a/dcb-tools/dcb-file-editor/Cargo.toml
+++ b/dcb-tools/dcb-file-editor/Cargo.toml
@ -9,6 +9,7 @@ edition = "2018"
 dcb-bytes = { path = "../../dcb-bytes" }
 dcb-util = { path = "../../dcb-util", features = ["alert"] }
 dcb-cdrom-xa = { path = "../../dcb-cdrom-xa" }
+dcb-iso9660 = { path = "../../dcb-iso9660" }
 dcb-drv = { path = "../../dcb-drv" }
 dcb-io = { path = "../../dcb-io" }
 dcb-tim = { path = "../../dcb-tim" }
--- a/dcb-tools/dcb-uniso-bin/src/main.rs
+++ b/dcb-tools/dcb-uniso-bin/src/main.rs
@ -11,7 +11,7 @@ use anyhow::Context;
 use cli::CliData;
 use dcb_bytes::Bytes;
 use dcb_cdrom_xa::CdRomReader;
-use dcb_iso9660::{date_time::DecDateTime, FilesystemReader};
+use dcb_iso9660::{date_time::DecDateTime, string::FileStrWithoutVersion, FilesystemReader};
 use std::{fs, io, path::PathBuf};

 fn main() -> Result<(), anyhow::Error> {
--- a/dcb-util/src/alphabet.rs
+++ b/dcb-util/src/alphabet.rs
@ -0,0 +1,63 @@
+//! Alphabet strings
+
+/// Modules
+pub mod arr;
+pub mod error;
+pub mod owned;
+pub mod slice;
+
+// Exports
+pub use arr::StrArrAlphabet;
+pub use error::InvalidCharError;
+pub use owned::StringAlphabet;
+pub use slice::StrAlphabet;
+
+/// A string alphabet
+///
+/// This trait is implemented by marker types that validate bytes as
+/// part of their alphabet.
+///
+/// This is accomplished by the [`validate`](Alphabet::validate) method,
+/// which simply checks if a byte slice is valid for this alphabet.
+pub trait Alphabet {
+	/// Error type
+	type Error: std::error::Error + 'static;
+
+	/// Validates `bytes` for a string of this alphabet and returns
+	/// it, possibly without it's terminator.
+	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error>;
+}
+
+/// Implements the [`Alphabet`] trait from a list of valid characters
+/// and a possible terminator
+pub trait OnlyValidCharsAlphabet {
+	/// All valid characters
+	fn valid_chars() -> &'static [u8];
+
+	/// Terminator for the string.
+	fn terminator() -> u8;
+}
+
+impl<A: OnlyValidCharsAlphabet> Alphabet for A {
+	type Error = InvalidCharError;
+
+	fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
+		// Go through all bytes and validate them until end of
+		// string or terminator.
+		let terminator = Self::terminator();
+		for (pos, &byte) in bytes.iter().enumerate() {
+			// If we found the terminator, terminate
+			if byte == terminator {
+				return Ok(&bytes[..pos]);
+			}
+
+			// Else make sure it contains this byte
+			if !Self::valid_chars().contains(&byte) {
+				return Err(InvalidCharError { byte, pos });
+			}
+		}
+
+		// If we got, there was no terminator, which is still a valid string.
+		Ok(bytes)
+	}
+}
--- a/dcb-iso9660/src/string/arr.rs
+++ b/dcb-iso9660/src/string/arr.rs
@ -19,11 +19,19 @@ pub struct StrArrAlphabet<A: Alphabet, const N: usize> {

 impl<A: Alphabet, const N: usize> StrArrAlphabet<A, N> {
 	/// Parses a string from bytes
-	pub fn from_bytes(bytes: &[u8; N]) -> Result<Self, A::Error> {
-		A::validate(bytes).map(|valid_bytes| Self {
+	#[allow(clippy::shadow_unrelated)] // They're actually related
+	pub fn from_bytes(bytes: &[u8; N]) -> Result<Self, FromBytesError<A::Error>> {
+		// Validate the bytes with the alphabet
+		let valid_bytes = A::validate(bytes).map_err(FromBytesError::Validate)?;
+
+		// Try to copy the bytes over
+		let mut bytes = [0; N];
+		bytes.copy_from_slice(valid_bytes.get(..N).ok_or(FromBytesError::TooLong)?);
+
+		Ok(Self {
 			phantom: PhantomData,
-			bytes:   *bytes,
-			len:     valid_bytes.len(),
+			bytes,
+			len: valid_bytes.len(),
 		})
 	}

@ -55,3 +63,15 @@ impl<A: Alphabet, const N: usize> fmt::Display for StrArrAlphabet<A, N> {
 		write!(f, "{}", s)
 	}
 }
+
+/// Error type for [`StrArrAlphabet::from_bytes`]
+#[derive(Debug, thiserror::Error)]
+pub enum FromBytesError<E: std::error::Error> {
+	/// Unable to validate
+	#[error("Unable to validate")]
+	Validate(E),
+
+	/// Returned string was too long
+	#[error("Validated string was too long")]
+	TooLong,
+}
--- a/dcb-util/src/alphabet/error.rs
+++ b/dcb-util/src/alphabet/error.rs
@ -0,0 +1,13 @@
+//! Errors
+
+/// Error for [`Alphabet::validate`](super::Alphabet::validate)'s impl of [`AlphabetA`](super::AlphabetA) and
+/// [`AlphabetD`](super::AlphabetD)
+#[derive(Debug, thiserror::Error)]
+#[error("Invalid character '{byte:#x}' at index {pos}")]
+pub struct InvalidCharError {
+	/// Invalid character
+	pub byte: u8,
+
+	/// Position
+	pub pos: usize,
+}
--- a/dcb-iso9660/src/string/owned.rs
+++ b/dcb-iso9660/src/string/owned.rs
--- a/dcb-iso9660/src/string/slice.rs
+++ b/dcb-iso9660/src/string/slice.rs
--- a/dcb-util/src/lib.rs
+++ b/dcb-util/src/lib.rs
@ -6,7 +6,9 @@
 	seek_stream_len,
 	unboxed_closures,
 	fn_traits,
-	decl_macro
+	decl_macro,
+	array_methods,
+	str_internals
 )]
 // Lints
 #![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]
@ -67,6 +69,7 @@
 // Modules
 #[cfg(feature = "alert")]
 pub mod alert;
+pub mod alphabet;
 pub mod array_split;
 pub mod ascii_str_arr;
 #[cfg(feature = "gui")]
@ -90,6 +93,7 @@ pub mod task;
 pub mod write_take;

 // Exports
+pub use alphabet::{Alphabet, StrAlphabet, StrArrAlphabet, StringAlphabet};
 pub use ascii_str_arr::AsciiStrArr;
 #[cfg(feature = "gui")]
 pub use ascii_text_buffer::AsciiTextBuffer;