mirror of
https://github.com/Zenithsiz/dcb.git
synced 2026-02-06 01:20:11 +00:00
Slightly revised string types in dcb-iso9660.
This commit is contained in:
parent
66f981d63f
commit
edcd8b45e1
@ -8,7 +8,8 @@
|
||||
min_const_generics,
|
||||
array_methods,
|
||||
array_value_iter,
|
||||
external_doc
|
||||
external_doc,
|
||||
str_internals
|
||||
)]
|
||||
// Lints
|
||||
#![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]
|
||||
|
||||
@ -12,54 +12,64 @@ pub use error::{InvalidCharError, ValidateFileAlphabetError};
|
||||
pub use owned::StringAlphabet;
|
||||
pub use slice::StrAlphabet;
|
||||
|
||||
|
||||
/// An alphabet for a string
|
||||
/// A string alphabet
|
||||
///
|
||||
/// This type serves to create marker types for strings that may only
|
||||
/// contain a subset of characters, or must have them in a certain order.
|
||||
///
|
||||
/// This is accomplished by the [`validate`](Alphabet::validate) method,
|
||||
/// which simply checks if a byte slice is valid for this alphabet.
|
||||
pub trait Alphabet {
|
||||
/// Error type
|
||||
type Error;
|
||||
|
||||
/// Returns if `bytes` are valid for this alphabet
|
||||
fn validate(bytes: &[u8]) -> Result<(), Self::Error>;
|
||||
/// Validates `bytes` for a string of this alphabet and returns
|
||||
/// it, possibly without it's terminator.
|
||||
fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error>;
|
||||
}
|
||||
|
||||
/// Implements the [`Alphabet`] trait from a list of valid characters
|
||||
/// and a possible terminator
|
||||
pub trait OnlyValidCharsAlphabet {
|
||||
/// All valid characters
|
||||
fn valid_chars() -> &'static [u8];
|
||||
|
||||
/// Implements the [`Alphabet`] trait from an alphabet
|
||||
pub trait ImplFromAlphabet {
|
||||
/// The alphabet
|
||||
fn alphabet() -> &'static [u8];
|
||||
|
||||
/// String terminator
|
||||
/// Terminator for the string.
|
||||
fn terminator() -> u8;
|
||||
}
|
||||
|
||||
impl<A: ImplFromAlphabet> Alphabet for A {
|
||||
impl<A: OnlyValidCharsAlphabet> Alphabet for A {
|
||||
type Error = InvalidCharError;
|
||||
|
||||
fn validate(bytes: &[u8]) -> Result<(), Self::Error> {
|
||||
// If any are invalid, return Err
|
||||
fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
|
||||
// Go through all bytes and validate them until end of
|
||||
// string or terminator.
|
||||
for (pos, &byte) in bytes.iter().enumerate() {
|
||||
// If we found the terminator, terminate
|
||||
// TODO: Maybe make sure everything after the `;` is valid too
|
||||
if byte == Self::terminator() {
|
||||
break;
|
||||
return Ok(&bytes[..pos]);
|
||||
}
|
||||
|
||||
// Else make sure it contains this byte
|
||||
if !Self::alphabet().contains(&byte) {
|
||||
if !Self::valid_chars().contains(&byte) {
|
||||
return Err(InvalidCharError { byte, pos });
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
// If we got, there was no terminator, which is still a valid string.
|
||||
Ok(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// A-type alphabet
|
||||
/// A-character alphabet
|
||||
///
|
||||
/// The list of valid characters are `A..Z`, `0..9`, `_`, `!`, `"`, `%`, `'`, `(`, `)`, `*`, `+`,
|
||||
/// `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>` and `?`.
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||
pub struct AlphabetA;
|
||||
|
||||
impl ImplFromAlphabet for AlphabetA {
|
||||
fn alphabet() -> &'static [u8] {
|
||||
impl OnlyValidCharsAlphabet for AlphabetA {
|
||||
fn valid_chars() -> &'static [u8] {
|
||||
&[
|
||||
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
|
||||
b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'_', b'!', b'"', b'%', b'&', b'\'', b'(', b')', b'*',
|
||||
@ -72,12 +82,14 @@ impl ImplFromAlphabet for AlphabetA {
|
||||
}
|
||||
}
|
||||
|
||||
/// D-type alphabet
|
||||
/// D-character alphabet
|
||||
///
|
||||
/// The list of valid characters are `A..Z`, `0..9` and `_`
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||
pub struct AlphabetD;
|
||||
|
||||
impl ImplFromAlphabet for AlphabetD {
|
||||
fn alphabet() -> &'static [u8] {
|
||||
impl OnlyValidCharsAlphabet for AlphabetD {
|
||||
fn valid_chars() -> &'static [u8] {
|
||||
&[
|
||||
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
|
||||
b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'_',
|
||||
@ -90,16 +102,27 @@ impl ImplFromAlphabet for AlphabetD {
|
||||
}
|
||||
|
||||
/// File alphabet
|
||||
///
|
||||
/// The file alphabet dictates the format for file names,
|
||||
/// which must follow `<name>.<extension>;<version>`, where
|
||||
/// `<name>` and `<extension>` are D-character strings,
|
||||
/// and `<version>` only contains numeric decimal characters.
|
||||
///
|
||||
/// There are 3 exceptions to this, which are the root directory
|
||||
/// name, current directory name and parent directory name, which
|
||||
/// are, "\0", "" and "\x01", respectively.
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||
pub struct FileAlphabet;
|
||||
|
||||
impl Alphabet for FileAlphabet {
|
||||
type Error = ValidateFileAlphabetError;
|
||||
|
||||
fn validate(bytes: &[u8]) -> Result<(), Self::Error> {
|
||||
fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
|
||||
// Special cases for the root, `.` and `..`, respectively
|
||||
// TODO: Remove exceptions from this string and make directories store the
|
||||
// current and parent separately.
|
||||
if let [b'\0'] | [] | [b'\x01'] = bytes {
|
||||
return Ok(());
|
||||
return Ok(bytes);
|
||||
}
|
||||
|
||||
// Separate into `<name>.<extension>;<version>`
|
||||
@ -120,9 +143,9 @@ impl Alphabet for FileAlphabet {
|
||||
// Validate all separately
|
||||
AlphabetD::validate(name).map_err(ValidateFileAlphabetError::InvalidNameChar)?;
|
||||
AlphabetD::validate(extension).map_err(ValidateFileAlphabetError::InvalidExtensionChar)?;
|
||||
match version {
|
||||
[b'0'..=b'9'] => Ok(()),
|
||||
_ => Err(ValidateFileAlphabetError::InvalidVersion),
|
||||
match version.iter().all(|ch| (b'0'..=b'9').contains(ch)) {
|
||||
true => Ok(bytes),
|
||||
false => Err(ValidateFileAlphabetError::InvalidVersion),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -136,6 +159,7 @@ pub type StringA = StringAlphabet<AlphabetA>;
|
||||
/// A-type string slice
|
||||
pub type StrA = StrAlphabet<AlphabetA>;
|
||||
|
||||
|
||||
/// D-type string array
|
||||
pub type StrArrD<const N: usize> = StrArrAlphabet<AlphabetD, N>;
|
||||
|
||||
@ -145,6 +169,7 @@ pub type StringD = StringAlphabet<AlphabetD>;
|
||||
/// D-type string slice
|
||||
pub type StrD = StrAlphabet<AlphabetD>;
|
||||
|
||||
|
||||
/// File string array
|
||||
pub type FileStrArr<const N: usize> = StrArrAlphabet<FileAlphabet, N>;
|
||||
|
||||
|
||||
@ -4,15 +4,27 @@
|
||||
use super::{Alphabet, StrAlphabet};
|
||||
use std::{fmt, marker::PhantomData, ops::Deref};
|
||||
|
||||
|
||||
/// A alphabetic specific string array
|
||||
/// An alphabetic string array
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||
pub struct StrArrAlphabet<A: Alphabet, const N: usize>(PhantomData<A>, [u8; N]);
|
||||
pub struct StrArrAlphabet<A: Alphabet, const N: usize> {
|
||||
/// Phantom
|
||||
phantom: PhantomData<A>,
|
||||
|
||||
/// Bytes
|
||||
bytes: [u8; N],
|
||||
|
||||
/// Length
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<A: Alphabet, const N: usize> StrArrAlphabet<A, N> {
|
||||
/// Parses a string from bytes
|
||||
pub fn from_bytes(bytes: &[u8; N]) -> Result<Self, A::Error> {
|
||||
A::validate(bytes).map(|()| Self(PhantomData, *bytes))
|
||||
A::validate(bytes).map(|valid_bytes| Self {
|
||||
phantom: PhantomData,
|
||||
bytes: *bytes,
|
||||
len: valid_bytes.len(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,7 +32,7 @@ impl<A: Alphabet, const N: usize> Deref for StrArrAlphabet<A, N> {
|
||||
type Target = StrAlphabet<A>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
ref_cast::RefCast::ref_cast(self.1.as_slice())
|
||||
ref_cast::RefCast::ref_cast(&self.bytes.as_slice()[..self.len])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -4,14 +4,23 @@
|
||||
use super::{Alphabet, StrAlphabet};
|
||||
use std::{fmt, marker::PhantomData, ops::Deref};
|
||||
|
||||
/// A alphabetic specific string
|
||||
/// An alphabetic owned string
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct StringAlphabet<A: Alphabet>(PhantomData<A>, Vec<u8>);
|
||||
pub struct StringAlphabet<A: Alphabet> {
|
||||
/// Phantom
|
||||
phantom: PhantomData<A>,
|
||||
|
||||
/// Bytes
|
||||
bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<A: Alphabet> StringAlphabet<A> {
|
||||
/// Parses a string from bytes
|
||||
pub fn from_bytes(bytes: &[u8]) -> Result<Self, A::Error> {
|
||||
A::validate(bytes).map(|()| Self(PhantomData, bytes.to_vec()))
|
||||
A::validate(bytes).map(|bytes| Self {
|
||||
phantom: PhantomData,
|
||||
bytes: bytes.to_vec(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -19,7 +28,7 @@ impl<A: Alphabet> Deref for StringAlphabet<A> {
|
||||
type Target = StrAlphabet<A>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
ref_cast::RefCast::ref_cast(self.1.as_slice())
|
||||
ref_cast::RefCast::ref_cast(self.bytes.as_slice())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -2,19 +2,40 @@
|
||||
|
||||
// Imports
|
||||
use super::Alphabet;
|
||||
use core::str::lossy::Utf8Lossy;
|
||||
use ref_cast::RefCast;
|
||||
use std::{fmt, marker::PhantomData};
|
||||
|
||||
/// A alphabetic specific string slice
|
||||
/// An alphabetic string slice
|
||||
// TODO: Not expose `ref_cast` to the outside, as it breaks
|
||||
// this string's encapsulation.
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(ref_cast::RefCast)]
|
||||
#[repr(transparent)]
|
||||
pub struct StrAlphabet<A: Alphabet>(PhantomData<A>, [u8]);
|
||||
pub struct StrAlphabet<A: Alphabet> {
|
||||
/// Phantom
|
||||
phantom: PhantomData<A>,
|
||||
|
||||
/// Bytes
|
||||
bytes: [u8],
|
||||
}
|
||||
|
||||
impl<A: Alphabet> StrAlphabet<A> {
|
||||
/// Returns the bytes from this string
|
||||
#[must_use]
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.1
|
||||
&self.bytes
|
||||
}
|
||||
|
||||
/// Parses a string from bytes
|
||||
pub fn from_bytes(bytes: &[u8]) -> Result<&Self, A::Error> {
|
||||
A::validate(bytes).map(|bytes| Self::ref_cast(bytes))
|
||||
}
|
||||
|
||||
/// Returns this string as a lossy `str`
|
||||
#[must_use]
|
||||
pub fn as_lossy_str(&self) -> &Utf8Lossy {
|
||||
Utf8Lossy::from_bytes(self.as_bytes())
|
||||
}
|
||||
|
||||
/// Returns the length of this string
|
||||
@ -32,26 +53,12 @@ impl<A: Alphabet> StrAlphabet<A> {
|
||||
|
||||
impl<A: Alphabet> fmt::Debug for StrAlphabet<A> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// Try to get self as a string to debug it
|
||||
// TODO: Not allocate here
|
||||
let s = String::from_utf8_lossy(self.as_bytes());
|
||||
|
||||
// Then trim any spaces we might have
|
||||
let s = s.trim();
|
||||
|
||||
write!(f, "{s:?}")
|
||||
write!(f, "{:?}", self.as_lossy_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Alphabet> fmt::Display for StrAlphabet<A> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// Try to get self as a string to debug it
|
||||
// TODO: Not allocate here
|
||||
let s = String::from_utf8_lossy(self.as_bytes());
|
||||
|
||||
// Then trim any spaces we might have
|
||||
let s = s.trim();
|
||||
|
||||
write!(f, "{s}")
|
||||
write!(f, "{}", self.as_lossy_str())
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user