From dc3b66167e05719615c8e93105faab2ca9b16a99 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Fri, 30 Oct 2020 09:15:46 +0000 Subject: [PATCH] Revamped `game::exe::{Func, FuncTable}`. Added experimentally getting function arguments heuristically. --- .gitignore | 3 + dcb-tools/src/decompiler/main.rs | 44 +- dcb/src/game/exe/data/table.rs | 15 +- dcb/src/game/exe/func.rs | 237 ++--------- dcb/src/game/exe/func/funcs.rs | 164 -------- dcb/src/game/exe/func/known.rs | 196 +++++++++ dcb/src/game/exe/func/table.rs | 489 ++++++++++++++++++++++ dcb/src/game/exe/func/{ => table}/iter.rs | 8 +- dcb/src/game/exe/instruction/reg.rs | 22 + 9 files changed, 788 insertions(+), 390 deletions(-) delete mode 100644 dcb/src/game/exe/func/funcs.rs create mode 100644 dcb/src/game/exe/func/known.rs create mode 100644 dcb/src/game/exe/func/table.rs rename dcb/src/game/exe/func/{ => table}/iter.rs (88%) diff --git a/.gitignore b/.gitignore index 37b19a4..6a40089 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ # Ides /.vscode + +# Logs +/latest.log diff --git a/dcb-tools/src/decompiler/main.rs b/dcb-tools/src/decompiler/main.rs index 6b6f4bf..dd1cf41 100644 --- a/dcb-tools/src/decompiler/main.rs +++ b/dcb-tools/src/decompiler/main.rs @@ -80,13 +80,13 @@ use byteorder::{ByteOrder, LittleEndian}; use dcb::{ game::exe::{ data::DataTable, - func::Funcs, + func::FuncTable, instruction::{ Directive, PseudoInstruction::{self, Nop}, Raw, SimpleInstruction, }, - Instruction, Pos, + Func, Instruction, Pos, }, GameFile, }; @@ -123,19 +123,14 @@ fn main() -> Result<(), anyhow::Error> { // Get all functions log::debug!("Retrieving all functions"); - let functions: Funcs = Funcs::known() - .into_string() - .merge(Funcs::from_instructions( - instructions.iter().map(|(pos, instruction)| (*pos, instruction)), - )) - .collect(); + let functions: FuncTable = FuncTable::known().into_string().merge(FuncTable::from_instructions( + &instructions.iter().map(|(pos, instruction)| (*pos, instruction)), + )); // Get all data - let data_pos: DataTable = DataTable::known() - .into_string() - .merge(DataTable::search_instructions( - instructions.iter().map(|(pos, instruction)| (*pos, instruction)), - )); + let data_pos: DataTable = DataTable::known().into_string().merge(DataTable::search_instructions( + instructions.iter().map(|(pos, instruction)| (*pos, instruction)), + )); // Build the full instructions iterator // TODO: Revamp this, iterate over an enum of `Func | Data | Other` @@ -145,16 +140,20 @@ fn main() -> Result<(), anyhow::Error> { Some((output, last_instruction.replace(cur_instruction))) }) .map(|((cur_pos, instruction, cur_func), last_instruction)| (cur_pos, instruction, last_instruction, cur_func)) - .scan(None, |last_func, output @ (_, _, cur_func, _)| { - Some((output, last_func.replace(cur_func))) + .scan(None, |last_func, output @ (_, _, _, cur_func)| { + Some((output, match cur_func { + Some(cur_func) => last_func.replace(cur_func), + None => *last_func, + })) }) .map(|((cur_pos, instruction, last_instruction, cur_func), last_func)| (cur_pos, instruction, last_instruction, cur_func, last_func)); // Read all instructions let mut skipped_nops = 0; - for (cur_pos, instruction, last_instruction, cur_func, _last_func) in full_iter { + for (cur_pos, instruction, last_instruction, cur_func, last_func) in full_iter { // Note: Required by `rust-analyzer` currently, it can't determine the type of `cur_func`. - let cur_func: Option<&dcb::game::exe::Func> = cur_func; + let cur_func: Option<&Func> = cur_func; + let last_func: Option<&Func> = last_func; // If both last and current instructions are nops, skip if let (Some(Instruction::Pseudo(Nop)), Instruction::Pseudo(Nop)) = (last_instruction, instruction) { @@ -170,12 +169,12 @@ fn main() -> Result<(), anyhow::Error> { } // If we just exited a function, space it out. - /* - if last_func.is_some() && cur_func.is_none() { - println!("####################"); - println!(); + if let Some(last_func) = last_func { + if last_func.end_pos == cur_pos { + println!("####################"); + println!(); + } } - */ // Space out data if it had a name if let Some(data) = data_pos.get(cur_pos) { @@ -204,7 +203,6 @@ fn main() -> Result<(), anyhow::Error> { if let Some(data) = data_pos.get(cur_pos) { if data.pos == cur_pos { println!("{}:", data.name); - println!("# {}", data.kind); for description in data.desc.lines() { println!("# {}", description); } diff --git a/dcb/src/game/exe/data/table.rs b/dcb/src/game/exe/data/table.rs index c1543fa..d1d62fc 100644 --- a/dcb/src/game/exe/data/table.rs +++ b/dcb/src/game/exe/data/table.rs @@ -1,7 +1,7 @@ //! Data table //! //! This module defines the [`DataTable`] type, which -//! stores all data within the executable. +//! stores all data locations within the executable. //! //! Typically this data will be a mix of the known data, //! available through [`DataTable::known`] and heuristically @@ -25,6 +25,12 @@ use std::{collections::BTreeSet, convert::TryInto, iter::FromIterator}; /// Also guarantees all data locations are unique and non-overlapping. pub struct DataTable>(BTreeSet>); +impl> FromIterator> for DataTable { + fn from_iter>>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + impl> DataTable { /// Merges two data tables, discarding duplicates from `other`. /// @@ -57,12 +63,6 @@ impl + Into> DataTable { } } -impl> FromIterator> for DataTable { - fn from_iter>>(iter: T) -> Self { - Self(iter.into_iter().collect()) - } -} - impl DataTable<&'static str> { /// Returns all known functions /// @@ -73,7 +73,6 @@ impl DataTable<&'static str> { } } - impl DataTable { /// Searches all instructions for references to /// executable data using certain heuristics. diff --git a/dcb/src/game/exe/func.rs b/dcb/src/game/exe/func.rs index 9e22700..51453a5 100644 --- a/dcb/src/game/exe/func.rs +++ b/dcb/src/game/exe/func.rs @@ -1,18 +1,23 @@ //! Executable functions +//! +//! This module stores known functions +//! within the executable, as well as +//! info on them, represented by the [`Func`] +//! type. +//! +//! The full list of known function may +//! be found at [`Func::known`]. // Modules -pub mod funcs; -pub mod iter; +pub mod known; +pub mod table; // Exports -pub use funcs::Funcs; -pub use iter::WithInstructionsIter; -use maplit::hashmap; +pub use table::FuncTable; // Imports use crate::game::exe::Pos; -use indoc::indoc; -use std::collections::HashMap; +use std::{borrow::Borrow, collections::HashMap}; /// A function within the executable #[derive(Clone, Debug)] @@ -40,208 +45,56 @@ pub struct Func> { pub end_pos: Pos, } +#[allow(clippy::use_self)] // False positive +impl + Into> Func { + /// Returns this function with owned `String`s. + pub fn into_string(self) -> Func { + Func { + name: self.name.into(), + signature: self.signature.into(), + desc: self.desc.into(), + comments: self.comments.into_iter().map(|(pos, comment)| (pos, comment.into())).collect(), + labels: self.labels.into_iter().map(|(pos, label)| (pos, label.into())).collect(), + start_pos: self.start_pos, + end_pos: self.end_pos, + } + } +} + +impl> Borrow for Func { + fn borrow(&self) -> &Pos { + &self.start_pos + } +} + +/// Two functions are equal if their start position is the same. impl> PartialEq for Func { fn eq(&self, other: &Self) -> bool { - // Only compare the start position self.start_pos.eq(&other.start_pos) } } impl> Eq for Func {} +/// Only the start position is hashed, just as in the [`PartialEq`] impl. +impl> std::hash::Hash for Func { + fn hash(&self, state: &mut H) { + self.start_pos.hash(state); + } +} + +/// Only the start position matters for the order impl> PartialOrd for Func { fn partial_cmp(&self, other: &Self) -> Option { // Delegate to `eq` since we have a total order. Some(self.cmp(other)) } } + +/// Only the start position matters for the order impl> Ord for Func { fn cmp(&self, other: &Self) -> std::cmp::Ordering { // Only compare the start position self.start_pos.cmp(&other.start_pos) } } - -impl Func<&'static str> { - /// Returns an iterator of all known functions - #[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known functions - pub fn known() -> impl Iterator { - std::array::IntoIter::new([ - Self { - name: "InitHeap", - signature: "fn(addr: *u32, size: u32)", - desc: "Calls A(0x39)", - comments: hashmap! { - Pos(0x8006a738) => "Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG", - Pos(0x8006a73c) => "arg: 0x39", - }, - labels: hashmap! {}, - start_pos: Pos(0x8006a734), - end_pos: Pos(0x8006a744), - }, - Self { - name: "start", - signature: "fn()", - desc: "Executable start", - comments: hashmap! { - Pos(0x80056280) => "Zero out ZeroStart .. HeapStart word by word.", - Pos(0x80056284) => "^", - Pos(0x80056288) => "^", - Pos(0x8005628c) => "^", - Pos(0x800562a8) => "Initialize stack to (*StackTop - 0x10) | 0x80000000", - Pos(0x800562f8) => "args: (HeapStart, (*StackTop - 0x10) - *StackSize - (HeapStart & 0x1fff_ffff))", - Pos(0x8005630c) => "args: (HeapStart + 0x4, ...?)", - Pos(0x80056324) => "args: (something1_data2, something1_data2)", - }, - labels: hashmap! { - Pos(0x80056280) => "zero_loop", - }, - start_pos: Pos(0x80056270), - end_pos: Pos(0x80056330), - }, - Self { - name: "something1", - signature: "fn(arg: u32)", - desc: indoc! {" - This function checks if *something1_data1 is positive, if so decreases - it by 1 and calls call_func_arr with (something1_data2, something1_data2). - "}, - comments: hashmap! { - Pos(0x80056348) => "If *something1_data1 == 0, skip", - Pos(0x8005634c) => "Else decrease it by 1 and save it.", - Pos(0x80056368) => "Then call call_func_arr with args (something1_data2, something1_data2)", - }, - labels: hashmap! { - Pos(0x80056370) => "skip", - }, - start_pos: Pos(0x80056330), - end_pos: Pos(0x80056388), - }, - Self { - name: "call_func_arr", - signature: "fn(start: fn(), end: fn())", - desc: "", - comments: hashmap! { - Pos(0x800563a0) => "if `start >= end`, skip", - Pos(0x800563b0) => "If *start == 0, skip call", - Pos(0x800563b8) => "Else call *start", - Pos(0x800563c0) => "start++", - Pos(0x800563c8) => "If `start < end`, restart", - }, - labels: hashmap! { - Pos(0x800563a8) => "loop", - Pos(0x800563c0) => "skip_call", - Pos(0x800563d0) => "exit", - }, - start_pos: Pos(0x80056388), - end_pos: Pos(0x800563e4), - }, - Self { - name: "something2", - signature: "fn(start: *u32)", - desc: "", - comments: hashmap! { - Pos(0x80013e54) => "args: (start)", - Pos(0x80013e6c) => "args: (0)", - }, - labels: hashmap! { - Pos(0x80013ef4) => "0", - Pos(0x80013f48) => "1", - Pos(0x80013f54) => "2", - Pos(0x80013f6c) => "3", - Pos(0x80013f8c) => "4", - }, - start_pos: Pos(0x80013e4c), - end_pos: Pos(0x80013fa4), - }, - Self { - name: "something3", - signature: "fn()", - desc: "", - comments: hashmap! { - Pos(0x80056604) => "Loads FuncList1[3]", - Pos(0x8005660c) => "Calls FuncList1[3] (i.e. something5)", - }, - labels: hashmap! {}, - start_pos: Pos(0x800565f4), - end_pos: Pos(0x80056624), - }, - Self { - name: "something4", - signature: "fn()", - desc: "", - comments: hashmap! {}, - labels: hashmap! { - Pos(0x80056ac0) => "0", - Pos(0x80056ae0) => "1", - Pos(0x80056b04) => "2", - Pos(0x80056b1c) => "3", - Pos(0x80056b34) => "4", - Pos(0x80056b44) => "5", - Pos(0x80056b54) => "6", - Pos(0x80056b58) => "7", - }, - start_pos: Pos(0x80056a30), - end_pos: Pos(0x80056b78), - }, - Self { - name: "something5", - signature: "fn()", - desc: "", - comments: hashmap! { - Pos(0x8005679c) => "Loads *(short*)something5_data1", - Pos(0x800567a4) => "If the loaded value is not zero, exit", - Pos(0x800567c0) => "Zero out the top half of `I_MASK_PTR`, which seems to be garbage", - Pos(0x800567c4) => "Then read the top half of `I_MASK_PTR` and zero-extend it, which is still garbage?", - - Pos(0x800567dc) => "Set the DMA control registers to 0x3333_3333", - Pos(0x800567e0) => "args: (something5_data1, 0x3333_3333)", - - Pos(0x800567e8) => "Save all registers with `save_registers` and check return value", - Pos(0x800567f0) => "If the return value isn't 0, call `func_831`. This shouldn't happen, as `save_registers` always returns 0", - }, - labels: hashmap! { - Pos(0x80056800) => "skip_call", - Pos(0x80056850) => "exit", - }, - start_pos: Pos(0x80056788), - end_pos: Pos(0x80056860), - }, - Self { - name: "save_registers", - signature: "fn(u32* pos)", - desc: indoc! {" - Saves the following registers in `pos[0x0 .. 0x30]`. - $ra, $gp, $sp, $fp, - $s0, $s1, $s2, $s3, - $s4, $s5, $s6, $s7, - "}, - comments: hashmap! {}, - labels: hashmap! {}, - start_pos: Pos(0x8006a674), - end_pos: Pos(0x8006a6b0), - }, - Self { - name: "memset_zero", - signature: "fn(u32* ptr, u32 size)", - desc: indoc! {" - Zeroes out the memory at `ptr` for `size` words. - "}, - comments: hashmap! { - Pos(0x80056c90) => "If size == 0, return", - Pos(0x80056c94) => "size--", - Pos(0x80056c9c) => "*ptr = 0", - Pos(0x80056ca0) => "size--", - Pos(0x80056ca4) => "While size != -1, continue", - Pos(0x80056ca8) => "ptr++" - }, - labels: hashmap! { - Pos(0x80056c9c) => "loop", - Pos(0x80056cac) => "exit", - }, - start_pos: Pos(0x80056c90), - end_pos: Pos(0x80056cb4), - }, - ]) - } -} diff --git a/dcb/src/game/exe/func/funcs.rs b/dcb/src/game/exe/func/funcs.rs deleted file mode 100644 index 49de9e6..0000000 --- a/dcb/src/game/exe/func/funcs.rs +++ /dev/null @@ -1,164 +0,0 @@ -//! Function lists - -// Imports -use super::{Func, WithInstructionsIter}; -use crate::{ - game::exe::{ - instruction::{Directive, PseudoInstruction, Register, SimpleInstruction}, - Instruction, Pos, - }, - util::discarding_sorted_merge_iter::DiscardingSortedMergeIter, -}; -use maplit::hashmap; -use std::{collections::BTreeSet, iter::FromIterator, vec}; - -/// A sorted list of functions by their start address. -pub struct Funcs>(Vec>); - -impl> FromIterator> for Funcs { - fn from_iter>>(iter: T) -> Self { - Self(iter.into_iter().collect()) - } -} - -impl> Funcs { - /// Merges two function lists, discarding any duplicates - /// from `other`. - #[must_use] - pub fn merge(self, other: Self) -> DiscardingSortedMergeIter, vec::IntoIter>, vec::IntoIter>> { - DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter()) - } - - /// Adapts an instruction iterator to extract the current function - pub fn with_instructions<'a, I: Iterator>(&'a self, instructions: I) -> WithInstructionsIter<'a, S, I> { - WithInstructionsIter::new(instructions, self) - } - - /// Retrieves a function with start address `pos` - #[must_use] - pub fn get(&self, pos: Pos) -> Option<&Func> { - // Note: As we're sorted, we can binary search - self.0 - .binary_search_by(|func| func.start_pos.cmp(&pos)) - .ok() - .and_then(|idx| self.0.get(idx)) - } -} - -#[allow(clippy::use_self)] // We're not using `Funcs`, but `Funcs` -impl + Into> Funcs { - /// Converts all strings to `String`. - #[must_use] - pub fn into_string(self) -> Funcs { - Funcs( - self.0 - .into_iter() - .map(|func| Func { - name: func.name.into(), - signature: func.signature.into(), - desc: func.desc.into(), - comments: func.comments.into_iter().map(|(pos, comment)| (pos, comment.into())).collect(), - labels: func.labels.into_iter().map(|(pos, label)| (pos, label.into())).collect(), - start_pos: func.start_pos, - end_pos: func.end_pos, - }) - .collect(), - ) - } -} - - -impl Funcs<&'static str> { - /// Returns all known functions - #[must_use] - pub fn known() -> Self { - let mut functions: Vec<_> = Func::known().collect(); - - functions.sort_by(|lhs, rhs| lhs.start_pos.cmp(&rhs.start_pos)); - Self(functions) - } -} - -impl Funcs { - /// Creates a new list of functions from an iterator over instructions - #[must_use] - pub fn from_instructions<'a>(instructions: impl Iterator + Clone) -> Self { - // Get all instruction offsets present, ignoring directives. - let offsets: BTreeSet = instructions - .clone() - .filter_map(|(pos, instruction)| match instruction { - Instruction::Directive(_) => None, - _ => Some(pos), - }) - .collect(); - - // Get all returns - let returns: BTreeSet = instructions - .clone() - .filter_map(|(pos, instruction)| match instruction { - Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra }) => Some(pos), - _ => None, - }) - .collect(); - - // Get all labels - let labels: BTreeSet = instructions - .clone() - .filter_map(|(_, instruction)| match instruction { - Instruction::Simple( - SimpleInstruction::J { target } | - SimpleInstruction::Beq { target, .. } | - SimpleInstruction::Bne { target, .. } | - SimpleInstruction::Bltz { target, .. } | - SimpleInstruction::Bgez { target, .. } | - SimpleInstruction::Bgtz { target, .. } | - SimpleInstruction::Blez { target, .. } | - SimpleInstruction::Bltzal { target, .. } | - SimpleInstruction::Bgezal { target, .. }, - ) | - Instruction::Pseudo( - PseudoInstruction::Beqz { target, .. } | PseudoInstruction::Bnez { target, .. } | PseudoInstruction::B { target }, - ) => Some(*target), - _ => None, - }) - .filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target)) - .collect(); - - // Now get every function entrance from jumps and `dw`s. - let function_entrances: BTreeSet = instructions - .filter_map(|(_, instruction)| match instruction { - Instruction::Simple(SimpleInstruction::Jal { target }) => Some(*target), - Instruction::Directive(Directive::Dw(target)) => Some(Pos(*target)), - _ => None, - }) - .filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target)) - .collect(); - - // Now combine the function entrances and exits. - // Note: functions will be sorted, as - let functions = function_entrances - .iter() - .zip(0..) - .map(|(&target, idx)| { - // Note: +8 for return + instruction after. - let end_pos = returns.range(target..).next().copied().unwrap_or(target) + 8; - let labels = labels - .range(target..end_pos) - .zip(0..) - .map(|(&pos, idx)| (pos, format!("{idx}"))) - .collect(); - Func { - name: format!("func_{idx}"), - signature: String::new(), - desc: String::new(), - comments: hashmap! {}, - labels, - start_pos: target, - end_pos, - } - }) - .collect(); - - Self(functions) - } -} diff --git a/dcb/src/game/exe/func/known.rs b/dcb/src/game/exe/func/known.rs new file mode 100644 index 0000000..f22c4a0 --- /dev/null +++ b/dcb/src/game/exe/func/known.rs @@ -0,0 +1,196 @@ +//! Known functions +//! +//! This module stores the [`Func::known`] function +//! that returns all known functions. +//! +//! It is a separate module, as the known functions +//! occupy a large amount of space. + +// Imports +use super::{Func, Pos}; +use indoc::indoc; +use maplit::hashmap; + +impl Func<&'static str> { + /// Returns an iterator of all known functions + #[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known functions + pub fn known() -> impl Iterator { + std::array::IntoIter::new([ + Self { + name: "InitHeap", + signature: "fn(addr: *u32, size: u32)", + desc: "Calls A(0x39)", + comments: hashmap! { + Pos(0x8006a738) => "Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG", + Pos(0x8006a73c) => "arg: 0x39", + }, + labels: hashmap! {}, + start_pos: Pos(0x8006a734), + end_pos: Pos(0x8006a744), + }, + Self { + name: "start", + signature: "fn()", + desc: "Executable start", + comments: hashmap! { + Pos(0x80056280) => "Zero out ZeroStart .. HeapStart word by word.", + Pos(0x80056284) => "^", + Pos(0x80056288) => "^", + Pos(0x8005628c) => "^", + Pos(0x800562a8) => "Initialize stack to (*StackTop - 0x10) | 0x80000000", + Pos(0x800562f8) => "args: (HeapStart, (*StackTop - 0x10) - *StackSize - (HeapStart & 0x1fff_ffff))", + Pos(0x8005630c) => "args: (HeapStart + 0x4, ...?)", + Pos(0x80056324) => "args: (something1_data2, something1_data2)", + }, + labels: hashmap! { + Pos(0x80056280) => "zero_loop", + }, + start_pos: Pos(0x80056270), + end_pos: Pos(0x80056330), + }, + Self { + name: "something1", + signature: "fn(arg: u32)", + desc: indoc! {" + This function checks if *something1_data1 is positive, if so decreases + it by 1 and calls call_func_arr with (something1_data2, something1_data2). + "}, + comments: hashmap! { + Pos(0x80056348) => "If *something1_data1 == 0, skip", + Pos(0x8005634c) => "Else decrease it by 1 and save it.", + Pos(0x80056368) => "Then call call_func_arr with args (something1_data2, something1_data2)", + }, + labels: hashmap! { + Pos(0x80056370) => "skip", + }, + start_pos: Pos(0x80056330), + end_pos: Pos(0x80056388), + }, + Self { + name: "call_func_arr", + signature: "fn(start: fn(), end: fn())", + desc: "", + comments: hashmap! { + Pos(0x800563a0) => "if `start >= end`, skip", + Pos(0x800563b0) => "If *start == 0, skip call", + Pos(0x800563b8) => "Else call *start", + Pos(0x800563c0) => "start++", + Pos(0x800563c8) => "If `start < end`, restart", + }, + labels: hashmap! { + Pos(0x800563a8) => "loop", + Pos(0x800563c0) => "skip_call", + Pos(0x800563d0) => "exit", + }, + start_pos: Pos(0x80056388), + end_pos: Pos(0x800563e4), + }, + Self { + name: "something2", + signature: "fn(start: *u32)", + desc: "", + comments: hashmap! { + Pos(0x80013e54) => "args: (start)", + Pos(0x80013e6c) => "args: (0)", + }, + labels: hashmap! { + Pos(0x80013ef4) => "0", + Pos(0x80013f48) => "1", + Pos(0x80013f54) => "2", + Pos(0x80013f6c) => "3", + Pos(0x80013f8c) => "4", + }, + start_pos: Pos(0x80013e4c), + end_pos: Pos(0x80013fa4), + }, + Self { + name: "something3", + signature: "fn()", + desc: "", + comments: hashmap! { + Pos(0x80056604) => "Loads FuncList1[3]", + Pos(0x8005660c) => "Calls FuncList1[3] (i.e. something5)", + }, + labels: hashmap! {}, + start_pos: Pos(0x800565f4), + end_pos: Pos(0x80056624), + }, + Self { + name: "something4", + signature: "fn()", + desc: "", + comments: hashmap! {}, + labels: hashmap! { + Pos(0x80056ac0) => "0", + Pos(0x80056ae0) => "1", + Pos(0x80056b04) => "2", + Pos(0x80056b1c) => "3", + Pos(0x80056b34) => "4", + Pos(0x80056b44) => "5", + Pos(0x80056b54) => "6", + Pos(0x80056b58) => "7", + }, + start_pos: Pos(0x80056a30), + end_pos: Pos(0x80056b78), + }, + Self { + name: "something5", + signature: "fn()", + desc: "", + comments: hashmap! { + Pos(0x8005679c) => "Loads *(short*)something5_data1", + Pos(0x800567a4) => "If the loaded value is not zero, exit", + Pos(0x800567c0) => "Zero out the top half of `I_MASK_PTR`, which seems to be garbage", + Pos(0x800567c4) => "Then read the top half of `I_MASK_PTR` and zero-extend it, which is still garbage?", + + Pos(0x800567dc) => "Set the DMA control registers to 0x3333_3333", + Pos(0x800567e0) => "args: (something5_data1, 0x3333_3333)", + + Pos(0x800567e8) => "Save all registers with `save_registers` and check return value", + Pos(0x800567f0) => "If the return value isn't 0, call `func_831`. This shouldn't happen, as `save_registers` always returns 0", + }, + labels: hashmap! { + Pos(0x80056800) => "skip_call", + Pos(0x80056850) => "exit", + }, + start_pos: Pos(0x80056788), + end_pos: Pos(0x80056860), + }, + Self { + name: "save_registers", + signature: "fn(u32* pos)", + desc: indoc! {" + Saves the following registers in `pos[0x0 .. 0x30]`. + $ra, $gp, $sp, $fp, + $s0, $s1, $s2, $s3, + $s4, $s5, $s6, $s7, + "}, + comments: hashmap! {}, + labels: hashmap! {}, + start_pos: Pos(0x8006a674), + end_pos: Pos(0x8006a6b0), + }, + Self { + name: "memset_zero", + signature: "fn(u32* ptr, u32 size)", + desc: indoc! {" + Zeroes out the memory at `ptr` for `size` words. + "}, + comments: hashmap! { + Pos(0x80056c90) => "If size == 0, return", + Pos(0x80056c94) => "size--", + Pos(0x80056c9c) => "*ptr = 0", + Pos(0x80056ca0) => "size--", + Pos(0x80056ca4) => "While size != -1, continue", + Pos(0x80056ca8) => "ptr++" + }, + labels: hashmap! { + Pos(0x80056c9c) => "loop", + Pos(0x80056cac) => "exit", + }, + start_pos: Pos(0x80056c90), + end_pos: Pos(0x80056cb4), + }, + ]) + } +} diff --git a/dcb/src/game/exe/func/table.rs b/dcb/src/game/exe/func/table.rs new file mode 100644 index 0000000..e8314af --- /dev/null +++ b/dcb/src/game/exe/func/table.rs @@ -0,0 +1,489 @@ +//! Function table +//! +//! This module defines the [`FuncTable`] type, which +//! stores all function within the executable. +//! +//! Typically these functions will be a mix of the known function, +//! available through [`FuncTable::known`] and heuristically +//! discovered functions through instruction references, available +//! through [`FuncTable::search_instructions`]. + +// Modules +pub mod iter; + +// Exports +pub use iter::WithInstructionsIter; + +// Imports +use super::Func; +use crate::{ + game::exe::{ + instruction::{Directive, PseudoInstruction, Register, SimpleInstruction}, + Instruction, Pos, + }, + util::discarding_sorted_merge_iter::DiscardingSortedMergeIter, +}; +use maplit::hashmap; +use std::{collections::BTreeSet, iter::FromIterator}; + +/// Function table +/// +/// Stores all functions sorted by their address. +/// Also guarantees all functions are unique and non-overlapping. +pub struct FuncTable>(BTreeSet>); + +impl> FromIterator> for FuncTable { + fn from_iter>>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl> FuncTable { + /// Merges two data tables, discarding duplicates from `other`. + /// + /// This can be useful when combining known functions and heuristically + /// discovered function, as the known functions are always kept, and the + /// duplicate discovered ones are discarded. + #[must_use] + pub fn merge(self, other: Self) -> Self { + // Note: We don't return the iterator, as we want the user to + // keep the guarantees supplied by this type. + DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter()).collect() + } + + /// Retrieves a function with start address `pos` + #[must_use] + pub fn get(&self, pos: Pos) -> Option<&Func> { + // Note: As we're sorted, we can binary search + self.0.range(..=pos).filter(|func| func.start_pos == pos).next_back() + } + + /// Adapts an instruction iterator to extract the current function + pub fn with_instructions<'a, I: Iterator>(&'a self, instructions: I) -> WithInstructionsIter<'a, S, I> { + WithInstructionsIter::new(instructions, self) + } +} + +#[allow(clippy::use_self)] // We're not using `Funcs`, but `Funcs` +impl + Into> FuncTable { + /// Converts all strings to `String`. + #[must_use] + pub fn into_string(self) -> FuncTable { + FuncTable(self.0.into_iter().map(Func::into_string).collect()) + } +} + + +impl FuncTable<&'static str> { + /// Returns all known functions + #[must_use] + pub fn known() -> Self { + Func::known().collect() + } +} + +impl FuncTable { + /// Creates a new list of functions from an iterator over instructions + #[must_use] + #[allow(clippy::too_many_lines)] // TODO: Refactor? + #[allow(clippy::enum_glob_use)] // It's only for this function + pub fn from_instructions<'a>(instructions: &(impl Iterator + Clone)) -> Self { + use Instruction::{Pseudo, Simple}; + use PseudoInstruction::*; + use SimpleInstruction::*; + + // Get all returns + let returns: BTreeSet = instructions + .clone() + .filter_map(|(pos, instruction)| match instruction { + Simple(Jr { rs: Register::Ra }) => Some(pos), + _ => None, + }) + .collect(); + + // Get all possible tailcalls + let tailcalls: BTreeSet = instructions + .clone() + .filter_map(|(pos, instruction)| match instruction { + Simple(J { .. } | Jr { .. }) => Some(pos), + _ => None, + }) + .collect(); + + // Get all labels + let labels: BTreeSet = instructions + .clone() + .filter_map(|(_, instruction)| match instruction { + Simple( + J { target } | + Beq { target, .. } | + Bne { target, .. } | + Bltz { target, .. } | + Bgez { target, .. } | + Bgtz { target, .. } | + Blez { target, .. } | + Bltzal { target, .. } | + Bgezal { target, .. }, + ) | + Pseudo(Beqz { target, .. } | Bnez { target, .. } | B { target }) => Some(*target), + _ => None, + }) + .filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target)) + .collect(); + + // Now check every `Jal` and `Dw` for possible function entrances + let function_entries: BTreeSet = instructions + .clone() + .filter_map(|(_, instruction)| match instruction { + Simple(Jal { target }) => Some(*target), + Instruction::Directive(Directive::Dw(target)) => Some(Pos(*target)), + _ => None, + }) + .filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target)) + .collect(); + + #[allow(clippy::cognitive_complexity)] // TODO: Fix + function_entries + .iter() + .zip(0..) + .map(|(&func_pos, idx)| { + // Try to get the end position from the returns + // Note: +8 for return + instruction after. + let mut end_pos: Pos = returns.range(func_pos..).next().copied().unwrap_or(func_pos) + 8; + + // If there's a function in between us and the return, use the last tailcall instead + if let Some(next_func_pos) = function_entries.range(func_pos + 4..end_pos).next() { + end_pos = tailcalls.range(..next_func_pos).next_back().copied().unwrap_or(func_pos) + 8; + + // If we got a tailcall before this function, just end it 2 instructions + if end_pos <= func_pos { + end_pos = func_pos + 8; + } + } + + // Get all labels within this function + let labels = labels + .range(func_pos..end_pos) + .zip(0..) + .map(|(&pos, idx)| (pos, format!("{idx}"))) + .collect(); + + // Check if any instructions use `$aX` and for what to try and piece + // together arguments. + // Arguments `$a0` through `$a3` + // TODO: Maybe save the instruction iterator for this function in `function_entries` somehow? + // TODO: Maybe check for return values too. + let mut arguments: [Option<&'static str>; 4] = [None; 4]; + #[allow(clippy::indexing_slicing)] // The returned indexes will always be < 4. + for (_, instruction) in instructions + .clone() + .skip_while(|(pos, _)| *pos < func_pos) + .take_while(|(pos, _)| *pos < end_pos) + { + // TODO: Generalize this in `Instruction` as a method that + // returns all registers used maybe. + match instruction { + Simple(Sb { rt, rs, .. } | Lb { rt, rs, .. } | Lbu { rt, rs, .. }) => { + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u8"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u8"); + } + } + }, + Pseudo(SbImm { rx, .. } | LbImm { rx, .. } | LbuImm { rx, .. }) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u8"); + } + } + }, + + Simple(Sh { rt, rs, .. } | Lh { rt, rs, .. } | Lhu { rt, rs, .. }) => { + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u16"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u16"); + } + } + }, + Pseudo(ShImm { rx, .. } | LhImm { rx, .. } | LhuImm { rx, .. }) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u16"); + } + } + }, + + Simple( + Swl { rt, rs, .. } | Sw { rt, rs, .. } | Swr { rt, rs, .. } | Lwl { rt, rs, .. } | Lw { rt, rs, .. } | Lwr { rt, rs, .. }, + ) => { + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u32"); + } + } + }, + + Pseudo( + LwlImm { rx, .. } | LwImm { rx, .. } | LwrImm { rx, .. } | SwlImm { rx, .. } | SwImm { rx, .. } | SwrImm { rx, .. }, + ) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u32"); + } + } + }, + + Simple( + Addi { rt, rs, .. } | + Addiu { rt, rs, .. } | + Slti { rt, rs, .. } | + Sltiu { rt, rs, .. } | + Andi { rt, rs, .. } | + Ori { rt, rs, .. } | + Xori { rt, rs, .. } | + Mult { rs, rt } | + Multu { rs, rt } | + Div { rs, rt } | + Divu { rs, rt } | + Beq { rs, rt, .. } | + Bne { rs, rt, .. } | + LwcN { rs, rt, .. } | + SwcN { rs, rt, .. }, + ) | + Pseudo(Subi { rt, rs, .. } | Subiu { rt, rs, .. }) => { + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Simple( + Add { rd, rs, rt } | + Addu { rd, rs, rt } | + Sub { rd, rs, rt } | + Subu { rd, rs, rt } | + Slt { rd, rs, rt } | + Sltu { rd, rs, rt } | + And { rd, rs, rt } | + Or { rd, rs, rt } | + Xor { rd, rs, rt } | + Nor { rd, rs, rt } | + Sllv { rd, rt, rs } | + Srlv { rd, rt, rs } | + Srav { rd, rt, rs }, + ) => { + if let Some(idx) = rd.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Simple( + Sll { rd, rt, .. } | + Srl { rd, rt, .. } | + Sra { rd, rt, .. } | + MfcN { rt, rd, .. } | + CfcN { rt, rd, .. } | + MtcN { rt, rd, .. } | + CtcN { rt, rd, .. }, + ) => { + if let Some(idx) = rd.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Simple(Jalr { rd, rs }) => { + if let Some(idx) = rd.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*fn()"); + } + } + }, + + Simple(Lui { rt, .. }) => { + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Simple(Mfhi { rd } | Mflo { rd }) => { + if let Some(idx) = rd.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Simple( + Bltz { rs, .. } | + Bgez { rs, .. } | + Bgtz { rs, .. } | + Blez { rs, .. } | + Bltzal { rs, .. } | + Bgezal { rs, .. } | + Jr { rs } | + Mthi { rs } | + Mtlo { rs }, + ) => { + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Pseudo(MovReg { rx, ry }) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = ry.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Pseudo(La { rx, .. } | Li32 { rx, .. } | LiU16 { rx, .. } | LiI16 { rx, .. } | LiUpper16 { rx, .. }) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("*u32"); + } + } + }, + + Pseudo( + AddAssign { rx, rt } | + AdduAssign { rx, rt } | + SubAssign { rx, rt } | + SubuAssign { rx, rt } | + AndAssign { rx, rt } | + OrAssign { rx, rt } | + XorAssign { rx, rt } | + NorAssign { rx, rt }, + ) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rt.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Pseudo( + AddiAssign { rx, .. } | + AddiuAssign { rx, .. } | + AndiAssign { rx, .. } | + OriAssign { rx, .. } | + XoriAssign { rx, .. } | + SllAssign { rx, .. } | + SrlAssign { rx, .. } | + SraAssign { rx, .. } | + SubiAssign { rx, .. } | + SubiuAssign { rx, .. }, + ) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Pseudo(SllvAssign { rx, rs } | SrlvAssign { rx, rs } | SravAssign { rx, rs }) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + if let Some(idx) = rs.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("u32"); + } + } + }, + + Pseudo(JalrRa { rx } | Beqz { rx, .. } | Bnez { rx, .. }) => { + if let Some(idx) = rx.arg_idx() { + if arguments[idx].is_none() { + arguments[idx] = Some("fn()"); + } + } + }, + + _ => (), + } + } + + #[rustfmt::skip] + let signature = match arguments { + [None , None , None , None ] => String::new(), + [Some(a), None , None , None ] => format!("fn(a: {a})"), + [a , Some(b), None , None ] => format!("fn(a: { }, b: {b})" , a.unwrap_or("???")), + [a , b , Some(c), None ] => format!("fn(a: { }, b: { }, c: {c})" , a.unwrap_or("???"), b.unwrap_or("???")), + [a , b , c , Some(d)] => format!("fn(a: { }, b: { }, c: { } d: {d})", a.unwrap_or("???"), b.unwrap_or("???"), c.unwrap_or("???")), + }; + + Func { + name: format!("func_{idx}"), + signature, + desc: String::new(), + comments: hashmap! {}, + labels, + start_pos: func_pos, + end_pos, + } + }) + .collect() + } +} diff --git a/dcb/src/game/exe/func/iter.rs b/dcb/src/game/exe/func/table/iter.rs similarity index 88% rename from dcb/src/game/exe/func/iter.rs rename to dcb/src/game/exe/func/table/iter.rs index 67c3641..4d55751 100644 --- a/dcb/src/game/exe/func/iter.rs +++ b/dcb/src/game/exe/func/table/iter.rs @@ -1,7 +1,9 @@ //! Iterators +// TODO: Deprecate in favor of a function + data iterator. + // Imports -use super::{Func, Funcs}; +use super::{Func, FuncTable}; use crate::game::exe::{Instruction, Pos}; /// Iterator of instructions along with the current function @@ -10,7 +12,7 @@ pub struct WithInstructionsIter<'a, S: AsRef, I: Iterator, + funcs: &'a FuncTable, /// Current function cur_func: Option<&'a Func>, @@ -18,7 +20,7 @@ pub struct WithInstructionsIter<'a, S: AsRef, I: Iterator, I: Iterator> WithInstructionsIter<'a, S, I> { /// Creates a new instructions iterator - pub(super) fn new(instructions: I, funcs: &'a Funcs) -> Self { + pub(super) fn new(instructions: I, funcs: &'a FuncTable) -> Self { Self { instructions, funcs, diff --git a/dcb/src/game/exe/instruction/reg.rs b/dcb/src/game/exe/instruction/reg.rs index 80e260b..fbf6788 100644 --- a/dcb/src/game/exe/instruction/reg.rs +++ b/dcb/src/game/exe/instruction/reg.rs @@ -68,6 +68,28 @@ macro_rules! generate_register { } } +impl Register { + /// Returns the index of an argument register + #[must_use] + pub const fn arg_idx(self) -> Option { + let idx = match self { + Self::A0 => 0, + Self::A1 => 1, + Self::A2 => 2, + Self::A3 => 3, + _ => return None, + }; + + Some(idx) + } + + /// Checks if this register is an argument register + #[must_use] + pub fn is_arg(self) -> bool { + self.arg_idx().is_some() + } +} + generate_register! { pub enum Register { /// Zero register