diff --git a/dcb-tools/src/decompiler/main.rs b/dcb-tools/src/decompiler/main.rs index 800e691..52a8825 100644 --- a/dcb-tools/src/decompiler/main.rs +++ b/dcb-tools/src/decompiler/main.rs @@ -75,12 +75,11 @@ mod cli; mod logger; // Imports -use std::collections::{HashMap, HashSet}; - use anyhow::Context; use byteorder::{ByteOrder, LittleEndian}; use dcb::{ game::exe::{ + data::AllData, func::Funcs, instruction::{ Directive, @@ -91,8 +90,6 @@ use dcb::{ }, GameFile, }; -use itertools::Itertools; -use ref_cast::RefCast; #[allow(clippy::cognitive_complexity, clippy::too_many_lines)] // TODO: Refactor fn main() -> Result<(), anyhow::Error> { @@ -133,56 +130,12 @@ fn main() -> Result<(), anyhow::Error> { )) .collect(); - // All data / string addresses - log::debug!("Retrieving all data / strings addresses"); - let data_string_addresses: HashSet = instructions - .iter() - .filter_map(|(_, instruction)| match instruction { - Instruction::Pseudo( - PseudoInstruction::La { target: offset, .. } | - PseudoInstruction::Li32 { imm: offset, .. } | - PseudoInstruction::LbImm { offset, .. } | - PseudoInstruction::LbuImm { offset, .. } | - PseudoInstruction::LhImm { offset, .. } | - PseudoInstruction::LhuImm { offset, .. } | - PseudoInstruction::LwlImm { offset, .. } | - PseudoInstruction::LwImm { offset, .. } | - PseudoInstruction::LwrImm { offset, .. } | - PseudoInstruction::SbImm { offset, .. } | - PseudoInstruction::ShImm { offset, .. } | - PseudoInstruction::SwlImm { offset, .. } | - PseudoInstruction::SwImm { offset, .. } | - PseudoInstruction::SwrImm { offset, .. }, - ) | - Instruction::Directive(Directive::Dw(offset) | Directive::DwRepeated { value: offset, .. }) => Some(Pos(*offset)), - _ => None, - }) - .collect(); - - // Get all strings - log::debug!("Retrieving all strings"); - let strings_pos: HashMap = instructions - .iter() - .filter_map(|(cur_pos, instruction)| match instruction { - Instruction::Directive(Directive::Ascii(_)) => Some(*cur_pos), - _ => None, - }) - .filter(|cur_pos| data_string_addresses.contains(cur_pos)) - .unique() - .zip(0..) - .collect(); - // Get all data - log::debug!("Retrieving all data"); - let data_pos: HashMap = instructions - .iter() - .filter_map(|(cur_pos, instruction)| match instruction { - Instruction::Directive(Directive::Dw(_) | Directive::DwRepeated { .. }) => Some(*cur_pos), - _ => None, - }) - .filter(|cur_pos| data_string_addresses.contains(cur_pos)) - .unique() - .zip(0..) + let data_pos: AllData = AllData::known() + .into_string() + .merge(AllData::from_instructions( + instructions.iter().map(|(pos, instruction)| (*pos, instruction)), + )) .collect(); // Build the full instructions iterator @@ -226,11 +179,8 @@ fn main() -> Result<(), anyhow::Error> { println!("\t.{label}:"); } } - if let Some(string_idx) = strings_pos.get(&cur_pos) { - println!("\tstring_{string_idx}:"); - } - if let Some(data_idx) = data_pos.get(&cur_pos) { - println!("\tdata_{data_idx}:"); + if let Some(data) = data_pos.get(cur_pos) { + println!("\t{}:", data.name()); } // Print the instruction and it's location. @@ -277,12 +227,8 @@ fn main() -> Result<(), anyhow::Error> { PseudoInstruction::SwlImm { offset: target, .. } | PseudoInstruction::SwImm { offset: target, .. } | PseudoInstruction::SwrImm { offset: target, .. }, - ) => match strings_pos - .get(Pos::ref_cast(target)) - .map(|idx| (idx, "string_")) - .or_else(|| data_pos.get(Pos::ref_cast(target)).map(|idx| (idx, "data_"))) - { - Some((target, prefix)) => print!("{} {prefix}{target}", strip_last_arg(instruction)), + ) => match data_pos.get(Pos(*target)) { + Some(target) => print!("{} {}", strip_last_arg(instruction), target.name()), None => print!("{instruction}"), }, @@ -294,11 +240,8 @@ fn main() -> Result<(), anyhow::Error> { if let Some(func) = functions.get(Pos(*target)) { print!(" # {}", func.name); } - if let Some(string_idx) = strings_pos.get(Pos::ref_cast(target)) { - print!(" # string_{string_idx}"); - } - if let Some(data_idx) = data_pos.get(Pos::ref_cast(target)) { - print!(" # data_{data_idx}"); + if let Some(data) = data_pos.get(Pos(*target)) { + print!(" # {}", data.name()); } } @@ -308,6 +251,7 @@ fn main() -> Result<(), anyhow::Error> { print!(" # {comment}"); } } + // And finish the line println!(); diff --git a/dcb/src/game/exe.rs b/dcb/src/game/exe.rs index 9537353..70be968 100644 --- a/dcb/src/game/exe.rs +++ b/dcb/src/game/exe.rs @@ -4,6 +4,7 @@ //! as well as tools to decompile and recompile it. // Modules +pub mod data; pub mod error; pub mod func; pub mod header; @@ -11,6 +12,7 @@ pub mod instruction; pub mod pos; // Exports +pub use data::Data; pub use error::DeserializeError; pub use func::Func; pub use header::Header; @@ -18,7 +20,7 @@ pub use instruction::Instruction; pub use pos::Pos; // Imports -use crate::{io::address::Data, GameFile}; +use crate::{io::address::Data as DataAddress, GameFile}; use dcb_bytes::{ByteArray, Bytes}; use std::{ convert::TryFrom, @@ -41,7 +43,7 @@ pub struct Exe { impl Exe { /// Start address of the executable - const START_ADDRESS: Data = Data::from_u64(0x58b9000); + const START_ADDRESS: DataAddress = DataAddress::from_u64(0x58b9000); } impl Exe { diff --git a/dcb/src/game/exe/data.rs b/dcb/src/game/exe/data.rs new file mode 100644 index 0000000..acd67f5 --- /dev/null +++ b/dcb/src/game/exe/data.rs @@ -0,0 +1,129 @@ +//! Executable data + +// Modules +pub mod all_data; + +// Exports +pub use all_data::AllData; + +// Imports +use crate::game::exe::Pos; + +/// Executable data +#[derive(Clone, Debug)] +#[derive(serde::Serialize, serde::Deserialize)] +pub enum Data> { + /// An ascii string + Ascii { + /// Name + name: S, + + /// Description + desc: S, + + /// Start position + start_pos: Pos, + + /// End position (non-inclusive) + end_pos: Pos, + }, + + /// Bytes + Bytes { + /// Name + name: S, + + /// Description + desc: S, + + /// Start position + start_pos: Pos, + + /// End position (non-inclusive) + end_pos: Pos, + }, +} + +impl> std::borrow::Borrow for Data { + fn borrow(&self) -> &Pos { + match self { + Self::Ascii { start_pos, .. } => start_pos, + Self::Bytes { start_pos, .. } => start_pos, + } + } +} + +impl> PartialEq for Data { + fn eq(&self, other: &Self) -> bool { + // Only compare the start position + self.start_pos().eq(&other.start_pos()) + } +} + +impl> Eq for Data {} + +impl> std::hash::Hash for Data { + fn hash(&self, state: &mut H) { + self.start_pos().hash(state); + } +} + +impl> PartialOrd for Data { + fn partial_cmp(&self, other: &Self) -> Option { + // Delegate to `eq` since we have a total order. + Some(self.cmp(other)) + } +} +impl> Ord for Data { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Only compare the start position + self.start_pos().cmp(&other.start_pos()) + } +} + +impl> Data { + /// Accesses the name of this data + pub fn name(&self) -> &S { + match self { + Self::Ascii { name, .. } => name, + Self::Bytes { name, .. } => name, + } + } + + /// Accesses the description of this data + pub fn desc(&self) -> &S { + match self { + Self::Ascii { desc, .. } => desc, + Self::Bytes { desc, .. } => desc, + } + } + + /// Accesses the start position of this data + pub fn start_pos(&self) -> Pos { + match self { + Self::Ascii { start_pos, .. } => *start_pos, + Self::Bytes { start_pos, .. } => *start_pos, + } + } + + /// Accesses the end position of this data + pub fn end_pos(&self) -> Pos { + match self { + Self::Ascii { end_pos, .. } => *end_pos, + Self::Bytes { end_pos, .. } => *end_pos, + } + } +} + + +impl Data<&'static str> { + /// Returns an iterator of all known data + pub fn known() -> impl Iterator { + std::array::IntoIter::new([Self::Bytes { + name: "StackStart", + desc: "Stack position", + start_pos: Pos(0x8006dd44), + end_pos: Pos(0x8006dd48), + }]) + } +} diff --git a/dcb/src/game/exe/data/all_data.rs b/dcb/src/game/exe/data/all_data.rs new file mode 100644 index 0000000..1923d35 --- /dev/null +++ b/dcb/src/game/exe/data/all_data.rs @@ -0,0 +1,142 @@ +//! Data list + +// Imports +use super::Data; +use crate::{ + game::exe::{ + instruction::{Directive, PseudoInstruction}, + Instruction, Pos, + }, + util::merge_iter::MergeSortedIter, +}; +use std::{ + collections::{btree_set, BTreeSet}, + iter::FromIterator, +}; + +/// List of data +pub struct AllData>(BTreeSet>); + +impl> FromIterator> for AllData { + fn from_iter>>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl> AllData { + /// Merges two function lists, discarding any duplicates + /// from `other`. + #[must_use] + pub fn merge(self, other: Self) -> MergeSortedIter, btree_set::IntoIter>, btree_set::IntoIter>> { + MergeSortedIter::new(self.0.into_iter(), other.0.into_iter()) + } + + /// Retrieves a data with start address `pos` + #[must_use] + pub fn get(&self, pos: Pos) -> Option<&Data> { + self.0.get(&pos) + } +} + +#[allow(clippy::use_self)] // We're not using `AllData`, but `AllData` +impl + Into> AllData { + /// Converts all strings to `String`. + #[must_use] + pub fn into_string(self) -> AllData { + AllData( + self.0 + .into_iter() + .map(|data| match data { + Data::Ascii { + name, + desc, + start_pos, + end_pos, + } => Data::Ascii { + name: name.into(), + desc: desc.into(), + start_pos, + end_pos, + }, + Data::Bytes { + name, + desc, + start_pos, + end_pos, + } => Data::Bytes { + name: name.into(), + desc: desc.into(), + start_pos, + end_pos, + }, + }) + .collect(), + ) + } +} + + +impl AllData<&'static str> { + /// Returns all known functions + #[must_use] + pub fn known() -> Self { + Self(Data::known().collect()) + } +} + + +impl AllData { + /// Creates a new list of data from an iterator over instructions + #[must_use] + pub fn from_instructions<'a>(instructions: impl Iterator + Clone) -> Self { + // Get all directive references + let directive_references: BTreeSet = instructions + .clone() + .filter_map(|(_, instruction)| match instruction { + Instruction::Pseudo( + PseudoInstruction::La { target: offset, .. } | + PseudoInstruction::Li32 { imm: offset, .. } | + PseudoInstruction::LbImm { offset, .. } | + PseudoInstruction::LbuImm { offset, .. } | + PseudoInstruction::LhImm { offset, .. } | + PseudoInstruction::LhuImm { offset, .. } | + PseudoInstruction::LwlImm { offset, .. } | + PseudoInstruction::LwImm { offset, .. } | + PseudoInstruction::LwrImm { offset, .. } | + PseudoInstruction::SbImm { offset, .. } | + PseudoInstruction::ShImm { offset, .. } | + PseudoInstruction::SwlImm { offset, .. } | + PseudoInstruction::SwImm { offset, .. } | + PseudoInstruction::SwrImm { offset, .. }, + ) | + Instruction::Directive(Directive::Dw(offset) | Directive::DwRepeated { value: offset, .. }) => Some(Pos(*offset)), + _ => None, + }) + .collect(); + + Self( + instructions + .filter_map(|(pos, instruction)| match instruction { + Instruction::Directive(directive) if directive_references.contains(&pos) => Some((pos, directive)), + _ => None, + }) + .zip(0..) + .map(|((pos, directive), idx)| match directive { + Directive::Ascii(_) => Data::Ascii { + name: format!("string_{idx}"), + desc: "".to_string(), + start_pos: pos, + end_pos: pos + directive.size(), + }, + + Directive::Dw(_) | Directive::DwRepeated { .. } => Data::Bytes { + name: format!("data_{idx}"), + desc: "".to_string(), + start_pos: pos, + end_pos: pos + directive.size(), + }, + }) + .collect(), + ) + } +} diff --git a/dcb/src/game/exe/instruction/directive.rs b/dcb/src/game/exe/instruction/directive.rs index 070e77b..cff92b5 100644 --- a/dcb/src/game/exe/instruction/directive.rs +++ b/dcb/src/game/exe/instruction/directive.rs @@ -21,7 +21,7 @@ pub enum Directive { value: u32, /// Times the value was repeated - len: usize, + len: u32, }, /// Ascii string @@ -30,6 +30,17 @@ pub enum Directive { } impl Directive { + /// Returns the size of this instruction + #[must_use] + pub fn size(&self) -> u32 { + #[allow(clippy::as_conversions, clippy::cast_possible_truncation)] // Our length will always fit into a `u32`. + match self { + Self::Dw(_) => 4, + Self::DwRepeated { len, .. } => 4 * len, + Self::Ascii(ascii) => 4 * (ascii.len() as u32), + } + } + /// Decodes a `dw` instruction pub fn decode_dw(first_raw: Raw, iter: &mut (impl Iterator + Clone)) -> Self { let mut times_repeated = 0;