From adf85387369ecdf3f5a881bfdb36cb3dddc071e4 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Mon, 11 Jan 2021 14:05:10 +0000 Subject: [PATCH] Added `inst::Inst::decode` and changed `ParseIter` to use it. `Directive::decode` now only returns itself. --- dcb-exe/src/exe/inst.rs | 35 +++++++++++++++ dcb-exe/src/exe/inst/directive.rs | 73 +++++++++++++++++-------------- dcb-exe/src/exe/inst/iter.rs | 60 +++++-------------------- dcb-exe/src/lib.rs | 1 + 4 files changed, 85 insertions(+), 84 deletions(-) diff --git a/dcb-exe/src/exe/inst.rs b/dcb-exe/src/exe/inst.rs index cce987c..9fab715 100644 --- a/dcb-exe/src/exe/inst.rs +++ b/dcb-exe/src/exe/inst.rs @@ -35,6 +35,7 @@ pub use reg::Register; pub use size::InstSize; // Imports +use self::{basic::Decodable as _, pseudo::Decodable as _}; use crate::Pos; /// An assembler instruction. @@ -59,6 +60,40 @@ impl Inst { pub const CODE_START: Pos = Pos(0x80013e4c); } +impl Inst { + /// Decodes an instruction from bytes and it's position. + pub fn decode(pos: Pos, bytes: &[u8]) -> Option { + // If we're outside of code range, decode a directive + if !Self::CODE_RANGE.contains(&pos) { + let directive = Directive::decode(pos, bytes)?; + return Some(Self::Directive(directive)); + } + + // Else make the instruction iterator + // Note: We fuse it to make sure that pseudo instructions don't try to skip + // invalid instructions. + let mut insts = bytes + .array_chunks::<4>() + .copied() + .map(u32::from_ne_bytes) + .map_while(|word| basic::Raw::from_u32(word).and_then(basic::Inst::decode)) + .fuse(); + + // Try to decode a pseudo-instruction + if let Some(inst) = pseudo::Inst::decode(insts.clone()) { + return Some(Self::Pseudo(inst)); + } + + // Else try to decode it as an basic instruction + if let Some(inst) = insts.next() { + return Some(Self::Basic(inst)); + } + + // Else read it as a directive + Directive::decode(pos, bytes).map(Self::Directive) + } +} + impl InstSize for Inst { fn size(&self) -> usize { match self { diff --git a/dcb-exe/src/exe/inst/directive.rs b/dcb-exe/src/exe/inst/directive.rs index 329f2f9..4f7dd20 100644 --- a/dcb-exe/src/exe/inst/directive.rs +++ b/dcb-exe/src/exe/inst/directive.rs @@ -99,30 +99,31 @@ impl Directive { impl Directive { /// Decodes a directive #[must_use] - pub fn decode(pos: Pos, bytes: &[u8]) -> Option<(Self, usize)> { + pub fn decode(pos: Pos, bytes: &[u8]) -> Option { // Check if we need to force decode it if let Some(ForceDecodeRange { kind, .. }) = Self::FORCE_DECODE_RANGES.iter().find(|range| range.contains(&pos)) { #[rustfmt::skip] return match kind { - ForceDecodeKind::Word => bytes.next_u32().map(|value| (Self::Dw(value), 4)), - ForceDecodeKind::HalfWord => bytes.next_u16().map(|value| (Self::Dh(value), 2)), - ForceDecodeKind::Byte => bytes.next_u8 ().map(|value| (Self::Db(value), 1)), + ForceDecodeKind::Word => bytes.next_u32().map(Self::Dw), + ForceDecodeKind::HalfWord => bytes.next_u16().map(Self::Dh), + ForceDecodeKind::Byte => bytes.next_u8 ().map(Self::Db), }; } + // TODO: Respect alignment + // Else try to get a string - if let Some((str_len, with_nulls_len)) = self::read_ascii_until_null(pos, bytes) { - debug_assert!(with_nulls_len % 4 == 0, "Ascii string length wasn't multiple of 4"); - return Some((Self::Ascii { len: str_len }, with_nulls_len)); + if let Some(len) = self::read_ascii_until_null(bytes) { + return Some(Self::Ascii { len }); } // Else try to read a `u32` if let Some(value) = bytes.next_u32() { - return Some((Self::Dw(value), 4)); + return Some(Self::Dw(value)); } // Else read a single byte - bytes.next_u8().map(|value| (Self::Db(value), 1)) + bytes.next_u8().map(Self::Db) } } @@ -151,14 +152,13 @@ impl InstFmt for Directive { fn fmt(&self, pos: Pos, bytes: &[u8], f: &mut std::fmt::Formatter) -> std::fmt::Result { let mnemonic = self.mnemonic(); - #[allow(clippy::as_conversions)] // `len` will always fit into a `usize`. match self { Self::Dw(value) => write!(f, "{mnemonic} {value:#x}"), Self::Dh(value) => write!(f, "{mnemonic} {value:#x}"), Self::Db(value) => write!(f, "{mnemonic} {value:#x}"), &Self::Ascii { len } => { let pos = pos.as_mem_idx(); - let string = &bytes[pos..pos + len as usize]; + let string = &bytes[pos..pos + len]; let string = AsciiStr::from_ascii(string).expect("Ascii string was invalid").as_str(); write!(f, "{mnemonic} \"{}\"", string.escape_debug()) }, @@ -166,30 +166,35 @@ impl InstFmt for Directive { } } -/// Reads an ascii string from a byte slice until null. -/// -/// Will always read in multiples of a word (4 bytes), including the null. +/// Reads an ascii string from a byte slice until null, aligned to a word #[allow(clippy::as_conversions, clippy::cast_possible_truncation)] // Our length will always fit into a `u32`. -fn read_ascii_until_null(pos: Pos, bytes: &[u8]) -> Option<(usize, usize)> { - // Get the next null or invalid character - let (idx, null) = bytes.iter().enumerate().find_map(|(idx, &byte)| match AsciiChar::from_ascii(byte) { - Ok(AsciiChar::Null) => Some((idx, true)), - Err(_) => Some((idx, false)), - _ => None, - })?; +fn read_ascii_until_null(bytes: &[u8]) -> Option { + // For each set of 4 bytes in the input + for (bytes, cur_size) in bytes.array_chunks::<4>().zip((0..).step_by(4)) { + // If the bytes aren't all ascii, return + if !bytes.iter().all(|&ch| AsciiChar::from_ascii(ch).is_ok()) { + return None; + } - // If it wasn't a null or the first character was a null, return None - if !null || idx == 0 { - return None; + // Else check if we got any nulls + // Note: In order to return, after the first null, we must have + // all nulls until the end of the word. + #[allow(clippy::match_same_arms)] // We can't change the order of the arms. + return match bytes { + // If we got all nulls, as long as we aren't empty, return the string + [0, 0, 0, 0] => match cur_size { + 0 => None, + _ => Some(cur_size + 4), + }, + [0, _, _, _] => None, + [_, 0, 0, 0] => Some(cur_size + 4), + [_, 0, _, _] => None, + [_, _, 0, 0] => Some(cur_size + 4), + [_, _, 0, _] => None, + [_, _, _, 0] => Some(cur_size + 4), + + _ => continue, + }; } - - // Else make sure until the end of the word it's all nulls - let nulls_len = 4 - ((pos.0 as usize + idx) % 4); - let nulls = bytes.get(idx..idx + nulls_len)?; - if !nulls.iter().all(|&byte| byte == 0) { - return None; - } - - // Else return both lengths - Some((idx, idx + nulls_len)) + None } diff --git a/dcb-exe/src/exe/inst/iter.rs b/dcb-exe/src/exe/inst/iter.rs index aa7f750..3592cc2 100644 --- a/dcb-exe/src/exe/inst/iter.rs +++ b/dcb-exe/src/exe/inst/iter.rs @@ -1,11 +1,7 @@ //! Parsing iterator // Imports -use super::{ - basic::{self, Decodable as _}, - pseudo::{self, Decodable as _}, - Directive, Inst, InstSize, -}; +use super::{Inst, InstSize}; use crate::Pos; /// Parsing iterator. @@ -37,53 +33,17 @@ impl<'a> ParseIter<'a> { impl<'a> Iterator for ParseIter<'a> { type Item = (Pos, Inst); - #[allow(clippy::as_conversions, clippy::cast_possible_truncation)] // Byte lengths will always fit into a `u32`, as `self.bytes.len()` is always smaller than `u32`. fn next(&mut self) -> Option { - // If we're outside of code range, decode a directive - if !Inst::CODE_RANGE.contains(&self.cur_pos) { - let (directive, len) = Directive::decode(self.cur_pos, self.bytes)?; - self.bytes = &self.bytes[len..]; - let pos = self.cur_pos; - self.cur_pos += len as u32; - return Some((pos, Inst::Directive(directive))); - } + // Try to read an instruction + let inst = Inst::decode(self.cur_pos, self.bytes)?; + let pos = self.cur_pos; - // Else make the instruction iterator - // Note: We fuse it to make sure that pseudo instructions don't try to skip - // invalid instructions. - let mut insts = self - .bytes - .chunks(4) - .map(|word| u32::from_ne_bytes([word[0], word[1], word[2], word[3]])) - .map_while(|word| basic::Raw::from_u32(word).and_then(basic::Inst::decode)) - .fuse(); + // Then skip it in our bytes + let len = inst.size(); + self.cur_pos += len; + self.bytes = &self.bytes[len..]; - // Try to decode a pseudo-instruction - if let Some(inst) = pseudo::Inst::decode(insts.clone()) { - let len = inst.size(); - self.bytes = &self.bytes[len..]; - let pos = self.cur_pos; - self.cur_pos += len; - return Some((pos, Inst::Pseudo(inst))); - } - - // Else try to decode it as an basic instruction - if let Some(inst) = insts.next() { - self.bytes = &self.bytes[4..]; - let pos = self.cur_pos; - self.cur_pos += 4; - return Some((pos, Inst::Basic(inst))); - } - - // Else read it as a directive - match Directive::decode(self.cur_pos, self.bytes) { - Some((directive, len)) => { - self.bytes = &self.bytes[len..]; - let pos = self.cur_pos; - self.cur_pos += len as u32; - Some((pos, Inst::Directive(directive))) - }, - None => None, - } + // And return it + Some((pos, inst)) } } diff --git a/dcb-exe/src/lib.rs b/dcb-exe/src/lib.rs index 3f94ad3..d858234 100644 --- a/dcb-exe/src/lib.rs +++ b/dcb-exe/src/lib.rs @@ -15,6 +15,7 @@ associated_type_bounds, bindings_after_at, iter_map_while, + array_chunks, )] // Lints #![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]