Slightly revised string types in dcb-iso9660.

This commit is contained in:
Filipe Rodrigues 2021-01-30 21:13:46 +00:00
parent 66f981d63f
commit edcd8b45e1
5 changed files with 111 additions and 57 deletions

View File

@ -8,7 +8,8 @@
min_const_generics,
array_methods,
array_value_iter,
external_doc
external_doc,
str_internals
)]
// Lints
#![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]

View File

@ -12,54 +12,64 @@ pub use error::{InvalidCharError, ValidateFileAlphabetError};
pub use owned::StringAlphabet;
pub use slice::StrAlphabet;
/// An alphabet for a string
/// A string alphabet
///
/// This type serves to create marker types for strings that may only
/// contain a subset of characters, or must have them in a certain order.
///
/// This is accomplished by the [`validate`](Alphabet::validate) method,
/// which simply checks if a byte slice is valid for this alphabet.
pub trait Alphabet {
/// Error type
type Error;
/// Returns if `bytes` are valid for this alphabet
fn validate(bytes: &[u8]) -> Result<(), Self::Error>;
/// Validates `bytes` for a string of this alphabet and returns
/// it, possibly without it's terminator.
fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error>;
}
/// Implements the [`Alphabet`] trait from a list of valid characters
/// and a possible terminator
pub trait OnlyValidCharsAlphabet {
/// All valid characters
fn valid_chars() -> &'static [u8];
/// Implements the [`Alphabet`] trait from an alphabet
pub trait ImplFromAlphabet {
/// The alphabet
fn alphabet() -> &'static [u8];
/// String terminator
/// Terminator for the string.
fn terminator() -> u8;
}
impl<A: ImplFromAlphabet> Alphabet for A {
impl<A: OnlyValidCharsAlphabet> Alphabet for A {
type Error = InvalidCharError;
fn validate(bytes: &[u8]) -> Result<(), Self::Error> {
// If any are invalid, return Err
fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
// Go through all bytes and validate them until end of
// string or terminator.
for (pos, &byte) in bytes.iter().enumerate() {
// If we found the terminator, terminate
// TODO: Maybe make sure everything after the `;` is valid too
if byte == Self::terminator() {
break;
return Ok(&bytes[..pos]);
}
// Else make sure it contains this byte
if !Self::alphabet().contains(&byte) {
if !Self::valid_chars().contains(&byte) {
return Err(InvalidCharError { byte, pos });
}
}
Ok(())
// If we got, there was no terminator, which is still a valid string.
Ok(bytes)
}
}
/// A-type alphabet
/// A-character alphabet
///
/// The list of valid characters are `A..Z`, `0..9`, `_`, `!`, `"`, `%`, `'`, `(`, `)`, `*`, `+`,
/// `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>` and `?`.
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct AlphabetA;
impl ImplFromAlphabet for AlphabetA {
fn alphabet() -> &'static [u8] {
impl OnlyValidCharsAlphabet for AlphabetA {
fn valid_chars() -> &'static [u8] {
&[
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'_', b'!', b'"', b'%', b'&', b'\'', b'(', b')', b'*',
@ -72,12 +82,14 @@ impl ImplFromAlphabet for AlphabetA {
}
}
/// D-type alphabet
/// D-character alphabet
///
/// The list of valid characters are `A..Z`, `0..9` and `_`
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct AlphabetD;
impl ImplFromAlphabet for AlphabetD {
fn alphabet() -> &'static [u8] {
impl OnlyValidCharsAlphabet for AlphabetD {
fn valid_chars() -> &'static [u8] {
&[
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'_',
@ -90,16 +102,27 @@ impl ImplFromAlphabet for AlphabetD {
}
/// File alphabet
///
/// The file alphabet dictates the format for file names,
/// which must follow `<name>.<extension>;<version>`, where
/// `<name>` and `<extension>` are D-character strings,
/// and `<version>` only contains numeric decimal characters.
///
/// There are 3 exceptions to this, which are the root directory
/// name, current directory name and parent directory name, which
/// are, "\0", "" and "\x01", respectively.
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct FileAlphabet;
impl Alphabet for FileAlphabet {
type Error = ValidateFileAlphabetError;
fn validate(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate(bytes: &[u8]) -> Result<&[u8], Self::Error> {
// Special cases for the root, `.` and `..`, respectively
// TODO: Remove exceptions from this string and make directories store the
// current and parent separately.
if let [b'\0'] | [] | [b'\x01'] = bytes {
return Ok(());
return Ok(bytes);
}
// Separate into `<name>.<extension>;<version>`
@ -120,9 +143,9 @@ impl Alphabet for FileAlphabet {
// Validate all separately
AlphabetD::validate(name).map_err(ValidateFileAlphabetError::InvalidNameChar)?;
AlphabetD::validate(extension).map_err(ValidateFileAlphabetError::InvalidExtensionChar)?;
match version {
[b'0'..=b'9'] => Ok(()),
_ => Err(ValidateFileAlphabetError::InvalidVersion),
match version.iter().all(|ch| (b'0'..=b'9').contains(ch)) {
true => Ok(bytes),
false => Err(ValidateFileAlphabetError::InvalidVersion),
}
}
}
@ -136,6 +159,7 @@ pub type StringA = StringAlphabet<AlphabetA>;
/// A-type string slice
pub type StrA = StrAlphabet<AlphabetA>;
/// D-type string array
pub type StrArrD<const N: usize> = StrArrAlphabet<AlphabetD, N>;
@ -145,6 +169,7 @@ pub type StringD = StringAlphabet<AlphabetD>;
/// D-type string slice
pub type StrD = StrAlphabet<AlphabetD>;
/// File string array
pub type FileStrArr<const N: usize> = StrArrAlphabet<FileAlphabet, N>;

View File

@ -4,15 +4,27 @@
use super::{Alphabet, StrAlphabet};
use std::{fmt, marker::PhantomData, ops::Deref};
/// A alphabetic specific string array
/// An alphabetic string array
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct StrArrAlphabet<A: Alphabet, const N: usize>(PhantomData<A>, [u8; N]);
pub struct StrArrAlphabet<A: Alphabet, const N: usize> {
/// Phantom
phantom: PhantomData<A>,
/// Bytes
bytes: [u8; N],
/// Length
len: usize,
}
impl<A: Alphabet, const N: usize> StrArrAlphabet<A, N> {
/// Parses a string from bytes
pub fn from_bytes(bytes: &[u8; N]) -> Result<Self, A::Error> {
A::validate(bytes).map(|()| Self(PhantomData, *bytes))
A::validate(bytes).map(|valid_bytes| Self {
phantom: PhantomData,
bytes: *bytes,
len: valid_bytes.len(),
})
}
}
@ -20,7 +32,7 @@ impl<A: Alphabet, const N: usize> Deref for StrArrAlphabet<A, N> {
type Target = StrAlphabet<A>;
fn deref(&self) -> &Self::Target {
ref_cast::RefCast::ref_cast(self.1.as_slice())
ref_cast::RefCast::ref_cast(&self.bytes.as_slice()[..self.len])
}
}

View File

@ -4,14 +4,23 @@
use super::{Alphabet, StrAlphabet};
use std::{fmt, marker::PhantomData, ops::Deref};
/// A alphabetic specific string
/// An alphabetic owned string
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct StringAlphabet<A: Alphabet>(PhantomData<A>, Vec<u8>);
pub struct StringAlphabet<A: Alphabet> {
/// Phantom
phantom: PhantomData<A>,
/// Bytes
bytes: Vec<u8>,
}
impl<A: Alphabet> StringAlphabet<A> {
/// Parses a string from bytes
pub fn from_bytes(bytes: &[u8]) -> Result<Self, A::Error> {
A::validate(bytes).map(|()| Self(PhantomData, bytes.to_vec()))
A::validate(bytes).map(|bytes| Self {
phantom: PhantomData,
bytes: bytes.to_vec(),
})
}
}
@ -19,7 +28,7 @@ impl<A: Alphabet> Deref for StringAlphabet<A> {
type Target = StrAlphabet<A>;
fn deref(&self) -> &Self::Target {
ref_cast::RefCast::ref_cast(self.1.as_slice())
ref_cast::RefCast::ref_cast(self.bytes.as_slice())
}
}

View File

@ -2,19 +2,40 @@
// Imports
use super::Alphabet;
use core::str::lossy::Utf8Lossy;
use ref_cast::RefCast;
use std::{fmt, marker::PhantomData};
/// A alphabetic specific string slice
/// An alphabetic string slice
// TODO: Not expose `ref_cast` to the outside, as it breaks
// this string's encapsulation.
#[derive(PartialEq, Eq, PartialOrd, Ord)]
#[derive(ref_cast::RefCast)]
#[repr(transparent)]
pub struct StrAlphabet<A: Alphabet>(PhantomData<A>, [u8]);
pub struct StrAlphabet<A: Alphabet> {
/// Phantom
phantom: PhantomData<A>,
/// Bytes
bytes: [u8],
}
impl<A: Alphabet> StrAlphabet<A> {
/// Returns the bytes from this string
#[must_use]
pub fn as_bytes(&self) -> &[u8] {
&self.1
&self.bytes
}
/// Parses a string from bytes
pub fn from_bytes(bytes: &[u8]) -> Result<&Self, A::Error> {
A::validate(bytes).map(|bytes| Self::ref_cast(bytes))
}
/// Returns this string as a lossy `str`
#[must_use]
pub fn as_lossy_str(&self) -> &Utf8Lossy {
Utf8Lossy::from_bytes(self.as_bytes())
}
/// Returns the length of this string
@ -32,26 +53,12 @@ impl<A: Alphabet> StrAlphabet<A> {
impl<A: Alphabet> fmt::Debug for StrAlphabet<A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Try to get self as a string to debug it
// TODO: Not allocate here
let s = String::from_utf8_lossy(self.as_bytes());
// Then trim any spaces we might have
let s = s.trim();
write!(f, "{s:?}")
write!(f, "{:?}", self.as_lossy_str())
}
}
impl<A: Alphabet> fmt::Display for StrAlphabet<A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Try to get self as a string to debug it
// TODO: Not allocate here
let s = String::from_utf8_lossy(self.as_bytes());
// Then trim any spaces we might have
let s = s.trim();
write!(f, "{s}")
write!(f, "{}", self.as_lossy_str())
}
}