Revamped game::exe::{Func, FuncTable}.

Added experimentally getting function arguments heuristically.
This commit is contained in:
2020-10-30 09:15:46 +00:00
parent 0e5ea3967f
commit dc3b66167e
9 changed files with 788 additions and 390 deletions

3
.gitignore vendored
View File

@@ -11,3 +11,6 @@
# Ides
/.vscode
# Logs
/latest.log

View File

@@ -80,13 +80,13 @@ use byteorder::{ByteOrder, LittleEndian};
use dcb::{
game::exe::{
data::DataTable,
func::Funcs,
func::FuncTable,
instruction::{
Directive,
PseudoInstruction::{self, Nop},
Raw, SimpleInstruction,
},
Instruction, Pos,
Func, Instruction, Pos,
},
GameFile,
};
@@ -123,19 +123,14 @@ fn main() -> Result<(), anyhow::Error> {
// Get all functions
log::debug!("Retrieving all functions");
let functions: Funcs<String> = Funcs::known()
.into_string()
.merge(Funcs::from_instructions(
instructions.iter().map(|(pos, instruction)| (*pos, instruction)),
))
.collect();
let functions: FuncTable<String> = FuncTable::known().into_string().merge(FuncTable::from_instructions(
&instructions.iter().map(|(pos, instruction)| (*pos, instruction)),
));
// Get all data
let data_pos: DataTable<String> = DataTable::known()
.into_string()
.merge(DataTable::search_instructions(
instructions.iter().map(|(pos, instruction)| (*pos, instruction)),
));
let data_pos: DataTable<String> = DataTable::known().into_string().merge(DataTable::search_instructions(
instructions.iter().map(|(pos, instruction)| (*pos, instruction)),
));
// Build the full instructions iterator
// TODO: Revamp this, iterate over an enum of `Func | Data | Other`
@@ -145,16 +140,20 @@ fn main() -> Result<(), anyhow::Error> {
Some((output, last_instruction.replace(cur_instruction)))
})
.map(|((cur_pos, instruction, cur_func), last_instruction)| (cur_pos, instruction, last_instruction, cur_func))
.scan(None, |last_func, output @ (_, _, cur_func, _)| {
Some((output, last_func.replace(cur_func)))
.scan(None, |last_func, output @ (_, _, _, cur_func)| {
Some((output, match cur_func {
Some(cur_func) => last_func.replace(cur_func),
None => *last_func,
}))
})
.map(|((cur_pos, instruction, last_instruction, cur_func), last_func)| (cur_pos, instruction, last_instruction, cur_func, last_func));
// Read all instructions
let mut skipped_nops = 0;
for (cur_pos, instruction, last_instruction, cur_func, _last_func) in full_iter {
for (cur_pos, instruction, last_instruction, cur_func, last_func) in full_iter {
// Note: Required by `rust-analyzer` currently, it can't determine the type of `cur_func`.
let cur_func: Option<&dcb::game::exe::Func<String>> = cur_func;
let cur_func: Option<&Func<String>> = cur_func;
let last_func: Option<&Func<String>> = last_func;
// If both last and current instructions are nops, skip
if let (Some(Instruction::Pseudo(Nop)), Instruction::Pseudo(Nop)) = (last_instruction, instruction) {
@@ -170,12 +169,12 @@ fn main() -> Result<(), anyhow::Error> {
}
// If we just exited a function, space it out.
/*
if last_func.is_some() && cur_func.is_none() {
println!("####################");
println!();
if let Some(last_func) = last_func {
if last_func.end_pos == cur_pos {
println!("####################");
println!();
}
}
*/
// Space out data if it had a name
if let Some(data) = data_pos.get(cur_pos) {
@@ -204,7 +203,6 @@ fn main() -> Result<(), anyhow::Error> {
if let Some(data) = data_pos.get(cur_pos) {
if data.pos == cur_pos {
println!("{}:", data.name);
println!("# {}", data.kind);
for description in data.desc.lines() {
println!("# {}", description);
}

View File

@@ -1,7 +1,7 @@
//! Data table
//!
//! This module defines the [`DataTable`] type, which
//! stores all data within the executable.
//! stores all data locations within the executable.
//!
//! Typically this data will be a mix of the known data,
//! available through [`DataTable::known`] and heuristically
@@ -25,6 +25,12 @@ use std::{collections::BTreeSet, convert::TryInto, iter::FromIterator};
/// Also guarantees all data locations are unique and non-overlapping.
pub struct DataTable<S: AsRef<str>>(BTreeSet<Data<S>>);
impl<S: AsRef<str>> FromIterator<Data<S>> for DataTable<S> {
fn from_iter<T: IntoIterator<Item = Data<S>>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
impl<S: AsRef<str>> DataTable<S> {
/// Merges two data tables, discarding duplicates from `other`.
///
@@ -57,12 +63,6 @@ impl<S: AsRef<str> + Into<String>> DataTable<S> {
}
}
impl<S: AsRef<str>> FromIterator<Data<S>> for DataTable<S> {
fn from_iter<T: IntoIterator<Item = Data<S>>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
impl DataTable<&'static str> {
/// Returns all known functions
///
@@ -73,7 +73,6 @@ impl DataTable<&'static str> {
}
}
impl DataTable<String> {
/// Searches all instructions for references to
/// executable data using certain heuristics.

View File

@@ -1,18 +1,23 @@
//! Executable functions
//!
//! This module stores known functions
//! within the executable, as well as
//! info on them, represented by the [`Func`]
//! type.
//!
//! The full list of known function may
//! be found at [`Func::known`].
// Modules
pub mod funcs;
pub mod iter;
pub mod known;
pub mod table;
// Exports
pub use funcs::Funcs;
pub use iter::WithInstructionsIter;
use maplit::hashmap;
pub use table::FuncTable;
// Imports
use crate::game::exe::Pos;
use indoc::indoc;
use std::collections::HashMap;
use std::{borrow::Borrow, collections::HashMap};
/// A function within the executable
#[derive(Clone, Debug)]
@@ -40,208 +45,56 @@ pub struct Func<S: AsRef<str>> {
pub end_pos: Pos,
}
#[allow(clippy::use_self)] // False positive
impl<S: AsRef<str> + Into<String>> Func<S> {
/// Returns this function with owned `String`s.
pub fn into_string(self) -> Func<String> {
Func {
name: self.name.into(),
signature: self.signature.into(),
desc: self.desc.into(),
comments: self.comments.into_iter().map(|(pos, comment)| (pos, comment.into())).collect(),
labels: self.labels.into_iter().map(|(pos, label)| (pos, label.into())).collect(),
start_pos: self.start_pos,
end_pos: self.end_pos,
}
}
}
impl<S: AsRef<str>> Borrow<Pos> for Func<S> {
fn borrow(&self) -> &Pos {
&self.start_pos
}
}
/// Two functions are equal if their start position is the same.
impl<S: AsRef<str>> PartialEq for Func<S> {
fn eq(&self, other: &Self) -> bool {
// Only compare the start position
self.start_pos.eq(&other.start_pos)
}
}
impl<S: AsRef<str>> Eq for Func<S> {}
/// Only the start position is hashed, just as in the [`PartialEq`] impl.
impl<S: AsRef<str>> std::hash::Hash for Func<S> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.start_pos.hash(state);
}
}
/// Only the start position matters for the order
impl<S: AsRef<str>> PartialOrd for Func<S> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
// Delegate to `eq` since we have a total order.
Some(self.cmp(other))
}
}
/// Only the start position matters for the order
impl<S: AsRef<str>> Ord for Func<S> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
// Only compare the start position
self.start_pos.cmp(&other.start_pos)
}
}
impl Func<&'static str> {
/// Returns an iterator of all known functions
#[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known functions
pub fn known() -> impl Iterator<Item = Self> {
std::array::IntoIter::new([
Self {
name: "InitHeap",
signature: "fn(addr: *u32, size: u32)",
desc: "Calls A(0x39)",
comments: hashmap! {
Pos(0x8006a738) => "Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG",
Pos(0x8006a73c) => "arg: 0x39",
},
labels: hashmap! {},
start_pos: Pos(0x8006a734),
end_pos: Pos(0x8006a744),
},
Self {
name: "start",
signature: "fn()",
desc: "Executable start",
comments: hashmap! {
Pos(0x80056280) => "Zero out ZeroStart .. HeapStart word by word.",
Pos(0x80056284) => "^",
Pos(0x80056288) => "^",
Pos(0x8005628c) => "^",
Pos(0x800562a8) => "Initialize stack to (*StackTop - 0x10) | 0x80000000",
Pos(0x800562f8) => "args: (HeapStart, (*StackTop - 0x10) - *StackSize - (HeapStart & 0x1fff_ffff))",
Pos(0x8005630c) => "args: (HeapStart + 0x4, ...?)",
Pos(0x80056324) => "args: (something1_data2, something1_data2)",
},
labels: hashmap! {
Pos(0x80056280) => "zero_loop",
},
start_pos: Pos(0x80056270),
end_pos: Pos(0x80056330),
},
Self {
name: "something1",
signature: "fn(arg: u32)",
desc: indoc! {"
This function checks if *something1_data1 is positive, if so decreases
it by 1 and calls call_func_arr with (something1_data2, something1_data2).
"},
comments: hashmap! {
Pos(0x80056348) => "If *something1_data1 == 0, skip",
Pos(0x8005634c) => "Else decrease it by 1 and save it.",
Pos(0x80056368) => "Then call call_func_arr with args (something1_data2, something1_data2)",
},
labels: hashmap! {
Pos(0x80056370) => "skip",
},
start_pos: Pos(0x80056330),
end_pos: Pos(0x80056388),
},
Self {
name: "call_func_arr",
signature: "fn(start: fn(), end: fn())",
desc: "",
comments: hashmap! {
Pos(0x800563a0) => "if `start >= end`, skip",
Pos(0x800563b0) => "If *start == 0, skip call",
Pos(0x800563b8) => "Else call *start",
Pos(0x800563c0) => "start++",
Pos(0x800563c8) => "If `start < end`, restart",
},
labels: hashmap! {
Pos(0x800563a8) => "loop",
Pos(0x800563c0) => "skip_call",
Pos(0x800563d0) => "exit",
},
start_pos: Pos(0x80056388),
end_pos: Pos(0x800563e4),
},
Self {
name: "something2",
signature: "fn(start: *u32)",
desc: "",
comments: hashmap! {
Pos(0x80013e54) => "args: (start)",
Pos(0x80013e6c) => "args: (0)",
},
labels: hashmap! {
Pos(0x80013ef4) => "0",
Pos(0x80013f48) => "1",
Pos(0x80013f54) => "2",
Pos(0x80013f6c) => "3",
Pos(0x80013f8c) => "4",
},
start_pos: Pos(0x80013e4c),
end_pos: Pos(0x80013fa4),
},
Self {
name: "something3",
signature: "fn()",
desc: "",
comments: hashmap! {
Pos(0x80056604) => "Loads FuncList1[3]",
Pos(0x8005660c) => "Calls FuncList1[3] (i.e. something5)",
},
labels: hashmap! {},
start_pos: Pos(0x800565f4),
end_pos: Pos(0x80056624),
},
Self {
name: "something4",
signature: "fn()",
desc: "",
comments: hashmap! {},
labels: hashmap! {
Pos(0x80056ac0) => "0",
Pos(0x80056ae0) => "1",
Pos(0x80056b04) => "2",
Pos(0x80056b1c) => "3",
Pos(0x80056b34) => "4",
Pos(0x80056b44) => "5",
Pos(0x80056b54) => "6",
Pos(0x80056b58) => "7",
},
start_pos: Pos(0x80056a30),
end_pos: Pos(0x80056b78),
},
Self {
name: "something5",
signature: "fn()",
desc: "",
comments: hashmap! {
Pos(0x8005679c) => "Loads *(short*)something5_data1",
Pos(0x800567a4) => "If the loaded value is not zero, exit",
Pos(0x800567c0) => "Zero out the top half of `I_MASK_PTR`, which seems to be garbage",
Pos(0x800567c4) => "Then read the top half of `I_MASK_PTR` and zero-extend it, which is still garbage?",
Pos(0x800567dc) => "Set the DMA control registers to 0x3333_3333",
Pos(0x800567e0) => "args: (something5_data1, 0x3333_3333)",
Pos(0x800567e8) => "Save all registers with `save_registers` and check return value",
Pos(0x800567f0) => "If the return value isn't 0, call `func_831`. This shouldn't happen, as `save_registers` always returns 0",
},
labels: hashmap! {
Pos(0x80056800) => "skip_call",
Pos(0x80056850) => "exit",
},
start_pos: Pos(0x80056788),
end_pos: Pos(0x80056860),
},
Self {
name: "save_registers",
signature: "fn(u32* pos)",
desc: indoc! {"
Saves the following registers in `pos[0x0 .. 0x30]`.
$ra, $gp, $sp, $fp,
$s0, $s1, $s2, $s3,
$s4, $s5, $s6, $s7,
"},
comments: hashmap! {},
labels: hashmap! {},
start_pos: Pos(0x8006a674),
end_pos: Pos(0x8006a6b0),
},
Self {
name: "memset_zero",
signature: "fn(u32* ptr, u32 size)",
desc: indoc! {"
Zeroes out the memory at `ptr` for `size` words.
"},
comments: hashmap! {
Pos(0x80056c90) => "If size == 0, return",
Pos(0x80056c94) => "size--",
Pos(0x80056c9c) => "*ptr = 0",
Pos(0x80056ca0) => "size--",
Pos(0x80056ca4) => "While size != -1, continue",
Pos(0x80056ca8) => "ptr++"
},
labels: hashmap! {
Pos(0x80056c9c) => "loop",
Pos(0x80056cac) => "exit",
},
start_pos: Pos(0x80056c90),
end_pos: Pos(0x80056cb4),
},
])
}
}

View File

@@ -1,164 +0,0 @@
//! Function lists
// Imports
use super::{Func, WithInstructionsIter};
use crate::{
game::exe::{
instruction::{Directive, PseudoInstruction, Register, SimpleInstruction},
Instruction, Pos,
},
util::discarding_sorted_merge_iter::DiscardingSortedMergeIter,
};
use maplit::hashmap;
use std::{collections::BTreeSet, iter::FromIterator, vec};
/// A sorted list of functions by their start address.
pub struct Funcs<S: AsRef<str>>(Vec<Func<S>>);
impl<S: AsRef<str>> FromIterator<Func<S>> for Funcs<S> {
fn from_iter<T: IntoIterator<Item = Func<S>>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
impl<S: AsRef<str>> Funcs<S> {
/// Merges two function lists, discarding any duplicates
/// from `other`.
#[must_use]
pub fn merge(self, other: Self) -> DiscardingSortedMergeIter<Func<S>, vec::IntoIter<Func<S>>, vec::IntoIter<Func<S>>> {
DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter())
}
/// Adapts an instruction iterator to extract the current function
pub fn with_instructions<'a, I: Iterator<Item = (Pos, &'a Instruction)>>(&'a self, instructions: I) -> WithInstructionsIter<'a, S, I> {
WithInstructionsIter::new(instructions, self)
}
/// Retrieves a function with start address `pos`
#[must_use]
pub fn get(&self, pos: Pos) -> Option<&Func<S>> {
// Note: As we're sorted, we can binary search
self.0
.binary_search_by(|func| func.start_pos.cmp(&pos))
.ok()
.and_then(|idx| self.0.get(idx))
}
}
#[allow(clippy::use_self)] // We're not using `Funcs<S>`, but `Funcs<String>`
impl<S: AsRef<str> + Into<String>> Funcs<S> {
/// Converts all strings to `String`.
#[must_use]
pub fn into_string(self) -> Funcs<String> {
Funcs(
self.0
.into_iter()
.map(|func| Func {
name: func.name.into(),
signature: func.signature.into(),
desc: func.desc.into(),
comments: func.comments.into_iter().map(|(pos, comment)| (pos, comment.into())).collect(),
labels: func.labels.into_iter().map(|(pos, label)| (pos, label.into())).collect(),
start_pos: func.start_pos,
end_pos: func.end_pos,
})
.collect(),
)
}
}
impl Funcs<&'static str> {
/// Returns all known functions
#[must_use]
pub fn known() -> Self {
let mut functions: Vec<_> = Func::known().collect();
functions.sort_by(|lhs, rhs| lhs.start_pos.cmp(&rhs.start_pos));
Self(functions)
}
}
impl Funcs<String> {
/// Creates a new list of functions from an iterator over instructions
#[must_use]
pub fn from_instructions<'a>(instructions: impl Iterator<Item = (Pos, &'a Instruction)> + Clone) -> Self {
// Get all instruction offsets present, ignoring directives.
let offsets: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(pos, instruction)| match instruction {
Instruction::Directive(_) => None,
_ => Some(pos),
})
.collect();
// Get all returns
let returns: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(pos, instruction)| match instruction {
Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra }) => Some(pos),
_ => None,
})
.collect();
// Get all labels
let labels: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(_, instruction)| match instruction {
Instruction::Simple(
SimpleInstruction::J { target } |
SimpleInstruction::Beq { target, .. } |
SimpleInstruction::Bne { target, .. } |
SimpleInstruction::Bltz { target, .. } |
SimpleInstruction::Bgez { target, .. } |
SimpleInstruction::Bgtz { target, .. } |
SimpleInstruction::Blez { target, .. } |
SimpleInstruction::Bltzal { target, .. } |
SimpleInstruction::Bgezal { target, .. },
) |
Instruction::Pseudo(
PseudoInstruction::Beqz { target, .. } | PseudoInstruction::Bnez { target, .. } | PseudoInstruction::B { target },
) => Some(*target),
_ => None,
})
.filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target))
.collect();
// Now get every function entrance from jumps and `dw`s.
let function_entrances: BTreeSet<Pos> = instructions
.filter_map(|(_, instruction)| match instruction {
Instruction::Simple(SimpleInstruction::Jal { target }) => Some(*target),
Instruction::Directive(Directive::Dw(target)) => Some(Pos(*target)),
_ => None,
})
.filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target))
.collect();
// Now combine the function entrances and exits.
// Note: functions will be sorted, as
let functions = function_entrances
.iter()
.zip(0..)
.map(|(&target, idx)| {
// Note: +8 for return + instruction after.
let end_pos = returns.range(target..).next().copied().unwrap_or(target) + 8;
let labels = labels
.range(target..end_pos)
.zip(0..)
.map(|(&pos, idx)| (pos, format!("{idx}")))
.collect();
Func {
name: format!("func_{idx}"),
signature: String::new(),
desc: String::new(),
comments: hashmap! {},
labels,
start_pos: target,
end_pos,
}
})
.collect();
Self(functions)
}
}

View File

@@ -0,0 +1,196 @@
//! Known functions
//!
//! This module stores the [`Func::known`] function
//! that returns all known functions.
//!
//! It is a separate module, as the known functions
//! occupy a large amount of space.
// Imports
use super::{Func, Pos};
use indoc::indoc;
use maplit::hashmap;
impl Func<&'static str> {
/// Returns an iterator of all known functions
#[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known functions
pub fn known() -> impl Iterator<Item = Self> {
std::array::IntoIter::new([
Self {
name: "InitHeap",
signature: "fn(addr: *u32, size: u32)",
desc: "Calls A(0x39)",
comments: hashmap! {
Pos(0x8006a738) => "Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG",
Pos(0x8006a73c) => "arg: 0x39",
},
labels: hashmap! {},
start_pos: Pos(0x8006a734),
end_pos: Pos(0x8006a744),
},
Self {
name: "start",
signature: "fn()",
desc: "Executable start",
comments: hashmap! {
Pos(0x80056280) => "Zero out ZeroStart .. HeapStart word by word.",
Pos(0x80056284) => "^",
Pos(0x80056288) => "^",
Pos(0x8005628c) => "^",
Pos(0x800562a8) => "Initialize stack to (*StackTop - 0x10) | 0x80000000",
Pos(0x800562f8) => "args: (HeapStart, (*StackTop - 0x10) - *StackSize - (HeapStart & 0x1fff_ffff))",
Pos(0x8005630c) => "args: (HeapStart + 0x4, ...?)",
Pos(0x80056324) => "args: (something1_data2, something1_data2)",
},
labels: hashmap! {
Pos(0x80056280) => "zero_loop",
},
start_pos: Pos(0x80056270),
end_pos: Pos(0x80056330),
},
Self {
name: "something1",
signature: "fn(arg: u32)",
desc: indoc! {"
This function checks if *something1_data1 is positive, if so decreases
it by 1 and calls call_func_arr with (something1_data2, something1_data2).
"},
comments: hashmap! {
Pos(0x80056348) => "If *something1_data1 == 0, skip",
Pos(0x8005634c) => "Else decrease it by 1 and save it.",
Pos(0x80056368) => "Then call call_func_arr with args (something1_data2, something1_data2)",
},
labels: hashmap! {
Pos(0x80056370) => "skip",
},
start_pos: Pos(0x80056330),
end_pos: Pos(0x80056388),
},
Self {
name: "call_func_arr",
signature: "fn(start: fn(), end: fn())",
desc: "",
comments: hashmap! {
Pos(0x800563a0) => "if `start >= end`, skip",
Pos(0x800563b0) => "If *start == 0, skip call",
Pos(0x800563b8) => "Else call *start",
Pos(0x800563c0) => "start++",
Pos(0x800563c8) => "If `start < end`, restart",
},
labels: hashmap! {
Pos(0x800563a8) => "loop",
Pos(0x800563c0) => "skip_call",
Pos(0x800563d0) => "exit",
},
start_pos: Pos(0x80056388),
end_pos: Pos(0x800563e4),
},
Self {
name: "something2",
signature: "fn(start: *u32)",
desc: "",
comments: hashmap! {
Pos(0x80013e54) => "args: (start)",
Pos(0x80013e6c) => "args: (0)",
},
labels: hashmap! {
Pos(0x80013ef4) => "0",
Pos(0x80013f48) => "1",
Pos(0x80013f54) => "2",
Pos(0x80013f6c) => "3",
Pos(0x80013f8c) => "4",
},
start_pos: Pos(0x80013e4c),
end_pos: Pos(0x80013fa4),
},
Self {
name: "something3",
signature: "fn()",
desc: "",
comments: hashmap! {
Pos(0x80056604) => "Loads FuncList1[3]",
Pos(0x8005660c) => "Calls FuncList1[3] (i.e. something5)",
},
labels: hashmap! {},
start_pos: Pos(0x800565f4),
end_pos: Pos(0x80056624),
},
Self {
name: "something4",
signature: "fn()",
desc: "",
comments: hashmap! {},
labels: hashmap! {
Pos(0x80056ac0) => "0",
Pos(0x80056ae0) => "1",
Pos(0x80056b04) => "2",
Pos(0x80056b1c) => "3",
Pos(0x80056b34) => "4",
Pos(0x80056b44) => "5",
Pos(0x80056b54) => "6",
Pos(0x80056b58) => "7",
},
start_pos: Pos(0x80056a30),
end_pos: Pos(0x80056b78),
},
Self {
name: "something5",
signature: "fn()",
desc: "",
comments: hashmap! {
Pos(0x8005679c) => "Loads *(short*)something5_data1",
Pos(0x800567a4) => "If the loaded value is not zero, exit",
Pos(0x800567c0) => "Zero out the top half of `I_MASK_PTR`, which seems to be garbage",
Pos(0x800567c4) => "Then read the top half of `I_MASK_PTR` and zero-extend it, which is still garbage?",
Pos(0x800567dc) => "Set the DMA control registers to 0x3333_3333",
Pos(0x800567e0) => "args: (something5_data1, 0x3333_3333)",
Pos(0x800567e8) => "Save all registers with `save_registers` and check return value",
Pos(0x800567f0) => "If the return value isn't 0, call `func_831`. This shouldn't happen, as `save_registers` always returns 0",
},
labels: hashmap! {
Pos(0x80056800) => "skip_call",
Pos(0x80056850) => "exit",
},
start_pos: Pos(0x80056788),
end_pos: Pos(0x80056860),
},
Self {
name: "save_registers",
signature: "fn(u32* pos)",
desc: indoc! {"
Saves the following registers in `pos[0x0 .. 0x30]`.
$ra, $gp, $sp, $fp,
$s0, $s1, $s2, $s3,
$s4, $s5, $s6, $s7,
"},
comments: hashmap! {},
labels: hashmap! {},
start_pos: Pos(0x8006a674),
end_pos: Pos(0x8006a6b0),
},
Self {
name: "memset_zero",
signature: "fn(u32* ptr, u32 size)",
desc: indoc! {"
Zeroes out the memory at `ptr` for `size` words.
"},
comments: hashmap! {
Pos(0x80056c90) => "If size == 0, return",
Pos(0x80056c94) => "size--",
Pos(0x80056c9c) => "*ptr = 0",
Pos(0x80056ca0) => "size--",
Pos(0x80056ca4) => "While size != -1, continue",
Pos(0x80056ca8) => "ptr++"
},
labels: hashmap! {
Pos(0x80056c9c) => "loop",
Pos(0x80056cac) => "exit",
},
start_pos: Pos(0x80056c90),
end_pos: Pos(0x80056cb4),
},
])
}
}

View File

@@ -0,0 +1,489 @@
//! Function table
//!
//! This module defines the [`FuncTable`] type, which
//! stores all function within the executable.
//!
//! Typically these functions will be a mix of the known function,
//! available through [`FuncTable::known`] and heuristically
//! discovered functions through instruction references, available
//! through [`FuncTable::search_instructions`].
// Modules
pub mod iter;
// Exports
pub use iter::WithInstructionsIter;
// Imports
use super::Func;
use crate::{
game::exe::{
instruction::{Directive, PseudoInstruction, Register, SimpleInstruction},
Instruction, Pos,
},
util::discarding_sorted_merge_iter::DiscardingSortedMergeIter,
};
use maplit::hashmap;
use std::{collections::BTreeSet, iter::FromIterator};
/// Function table
///
/// Stores all functions sorted by their address.
/// Also guarantees all functions are unique and non-overlapping.
pub struct FuncTable<S: AsRef<str>>(BTreeSet<Func<S>>);
impl<S: AsRef<str>> FromIterator<Func<S>> for FuncTable<S> {
fn from_iter<T: IntoIterator<Item = Func<S>>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
impl<S: AsRef<str>> FuncTable<S> {
/// Merges two data tables, discarding duplicates from `other`.
///
/// This can be useful when combining known functions and heuristically
/// discovered function, as the known functions are always kept, and the
/// duplicate discovered ones are discarded.
#[must_use]
pub fn merge(self, other: Self) -> Self {
// Note: We don't return the iterator, as we want the user to
// keep the guarantees supplied by this type.
DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter()).collect()
}
/// Retrieves a function with start address `pos`
#[must_use]
pub fn get(&self, pos: Pos) -> Option<&Func<S>> {
// Note: As we're sorted, we can binary search
self.0.range(..=pos).filter(|func| func.start_pos == pos).next_back()
}
/// Adapts an instruction iterator to extract the current function
pub fn with_instructions<'a, I: Iterator<Item = (Pos, &'a Instruction)>>(&'a self, instructions: I) -> WithInstructionsIter<'a, S, I> {
WithInstructionsIter::new(instructions, self)
}
}
#[allow(clippy::use_self)] // We're not using `Funcs<S>`, but `Funcs<String>`
impl<S: AsRef<str> + Into<String>> FuncTable<S> {
/// Converts all strings to `String`.
#[must_use]
pub fn into_string(self) -> FuncTable<String> {
FuncTable(self.0.into_iter().map(Func::into_string).collect())
}
}
impl FuncTable<&'static str> {
/// Returns all known functions
#[must_use]
pub fn known() -> Self {
Func::known().collect()
}
}
impl FuncTable<String> {
/// Creates a new list of functions from an iterator over instructions
#[must_use]
#[allow(clippy::too_many_lines)] // TODO: Refactor?
#[allow(clippy::enum_glob_use)] // It's only for this function
pub fn from_instructions<'a>(instructions: &(impl Iterator<Item = (Pos, &'a Instruction)> + Clone)) -> Self {
use Instruction::{Pseudo, Simple};
use PseudoInstruction::*;
use SimpleInstruction::*;
// Get all returns
let returns: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(pos, instruction)| match instruction {
Simple(Jr { rs: Register::Ra }) => Some(pos),
_ => None,
})
.collect();
// Get all possible tailcalls
let tailcalls: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(pos, instruction)| match instruction {
Simple(J { .. } | Jr { .. }) => Some(pos),
_ => None,
})
.collect();
// Get all labels
let labels: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(_, instruction)| match instruction {
Simple(
J { target } |
Beq { target, .. } |
Bne { target, .. } |
Bltz { target, .. } |
Bgez { target, .. } |
Bgtz { target, .. } |
Blez { target, .. } |
Bltzal { target, .. } |
Bgezal { target, .. },
) |
Pseudo(Beqz { target, .. } | Bnez { target, .. } | B { target }) => Some(*target),
_ => None,
})
.filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target))
.collect();
// Now check every `Jal` and `Dw` for possible function entrances
let function_entries: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(_, instruction)| match instruction {
Simple(Jal { target }) => Some(*target),
Instruction::Directive(Directive::Dw(target)) => Some(Pos(*target)),
_ => None,
})
.filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target))
.collect();
#[allow(clippy::cognitive_complexity)] // TODO: Fix
function_entries
.iter()
.zip(0..)
.map(|(&func_pos, idx)| {
// Try to get the end position from the returns
// Note: +8 for return + instruction after.
let mut end_pos: Pos = returns.range(func_pos..).next().copied().unwrap_or(func_pos) + 8;
// If there's a function in between us and the return, use the last tailcall instead
if let Some(next_func_pos) = function_entries.range(func_pos + 4..end_pos).next() {
end_pos = tailcalls.range(..next_func_pos).next_back().copied().unwrap_or(func_pos) + 8;
// If we got a tailcall before this function, just end it 2 instructions
if end_pos <= func_pos {
end_pos = func_pos + 8;
}
}
// Get all labels within this function
let labels = labels
.range(func_pos..end_pos)
.zip(0..)
.map(|(&pos, idx)| (pos, format!("{idx}")))
.collect();
// Check if any instructions use `$aX` and for what to try and piece
// together arguments.
// Arguments `$a0` through `$a3`
// TODO: Maybe save the instruction iterator for this function in `function_entries` somehow?
// TODO: Maybe check for return values too.
let mut arguments: [Option<&'static str>; 4] = [None; 4];
#[allow(clippy::indexing_slicing)] // The returned indexes will always be < 4.
for (_, instruction) in instructions
.clone()
.skip_while(|(pos, _)| *pos < func_pos)
.take_while(|(pos, _)| *pos < end_pos)
{
// TODO: Generalize this in `Instruction` as a method that
// returns all registers used maybe.
match instruction {
Simple(Sb { rt, rs, .. } | Lb { rt, rs, .. } | Lbu { rt, rs, .. }) => {
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u8");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u8");
}
}
},
Pseudo(SbImm { rx, .. } | LbImm { rx, .. } | LbuImm { rx, .. }) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u8");
}
}
},
Simple(Sh { rt, rs, .. } | Lh { rt, rs, .. } | Lhu { rt, rs, .. }) => {
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u16");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u16");
}
}
},
Pseudo(ShImm { rx, .. } | LhImm { rx, .. } | LhuImm { rx, .. }) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u16");
}
}
},
Simple(
Swl { rt, rs, .. } | Sw { rt, rs, .. } | Swr { rt, rs, .. } | Lwl { rt, rs, .. } | Lw { rt, rs, .. } | Lwr { rt, rs, .. },
) => {
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u32");
}
}
},
Pseudo(
LwlImm { rx, .. } | LwImm { rx, .. } | LwrImm { rx, .. } | SwlImm { rx, .. } | SwImm { rx, .. } | SwrImm { rx, .. },
) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u32");
}
}
},
Simple(
Addi { rt, rs, .. } |
Addiu { rt, rs, .. } |
Slti { rt, rs, .. } |
Sltiu { rt, rs, .. } |
Andi { rt, rs, .. } |
Ori { rt, rs, .. } |
Xori { rt, rs, .. } |
Mult { rs, rt } |
Multu { rs, rt } |
Div { rs, rt } |
Divu { rs, rt } |
Beq { rs, rt, .. } |
Bne { rs, rt, .. } |
LwcN { rs, rt, .. } |
SwcN { rs, rt, .. },
) |
Pseudo(Subi { rt, rs, .. } | Subiu { rt, rs, .. }) => {
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Simple(
Add { rd, rs, rt } |
Addu { rd, rs, rt } |
Sub { rd, rs, rt } |
Subu { rd, rs, rt } |
Slt { rd, rs, rt } |
Sltu { rd, rs, rt } |
And { rd, rs, rt } |
Or { rd, rs, rt } |
Xor { rd, rs, rt } |
Nor { rd, rs, rt } |
Sllv { rd, rt, rs } |
Srlv { rd, rt, rs } |
Srav { rd, rt, rs },
) => {
if let Some(idx) = rd.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Simple(
Sll { rd, rt, .. } |
Srl { rd, rt, .. } |
Sra { rd, rt, .. } |
MfcN { rt, rd, .. } |
CfcN { rt, rd, .. } |
MtcN { rt, rd, .. } |
CtcN { rt, rd, .. },
) => {
if let Some(idx) = rd.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Simple(Jalr { rd, rs }) => {
if let Some(idx) = rd.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*fn()");
}
}
},
Simple(Lui { rt, .. }) => {
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Simple(Mfhi { rd } | Mflo { rd }) => {
if let Some(idx) = rd.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Simple(
Bltz { rs, .. } |
Bgez { rs, .. } |
Bgtz { rs, .. } |
Blez { rs, .. } |
Bltzal { rs, .. } |
Bgezal { rs, .. } |
Jr { rs } |
Mthi { rs } |
Mtlo { rs },
) => {
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Pseudo(MovReg { rx, ry }) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = ry.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Pseudo(La { rx, .. } | Li32 { rx, .. } | LiU16 { rx, .. } | LiI16 { rx, .. } | LiUpper16 { rx, .. }) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("*u32");
}
}
},
Pseudo(
AddAssign { rx, rt } |
AdduAssign { rx, rt } |
SubAssign { rx, rt } |
SubuAssign { rx, rt } |
AndAssign { rx, rt } |
OrAssign { rx, rt } |
XorAssign { rx, rt } |
NorAssign { rx, rt },
) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rt.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Pseudo(
AddiAssign { rx, .. } |
AddiuAssign { rx, .. } |
AndiAssign { rx, .. } |
OriAssign { rx, .. } |
XoriAssign { rx, .. } |
SllAssign { rx, .. } |
SrlAssign { rx, .. } |
SraAssign { rx, .. } |
SubiAssign { rx, .. } |
SubiuAssign { rx, .. },
) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Pseudo(SllvAssign { rx, rs } | SrlvAssign { rx, rs } | SravAssign { rx, rs }) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
if let Some(idx) = rs.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("u32");
}
}
},
Pseudo(JalrRa { rx } | Beqz { rx, .. } | Bnez { rx, .. }) => {
if let Some(idx) = rx.arg_idx() {
if arguments[idx].is_none() {
arguments[idx] = Some("fn()");
}
}
},
_ => (),
}
}
#[rustfmt::skip]
let signature = match arguments {
[None , None , None , None ] => String::new(),
[Some(a), None , None , None ] => format!("fn(a: {a})"),
[a , Some(b), None , None ] => format!("fn(a: { }, b: {b})" , a.unwrap_or("???")),
[a , b , Some(c), None ] => format!("fn(a: { }, b: { }, c: {c})" , a.unwrap_or("???"), b.unwrap_or("???")),
[a , b , c , Some(d)] => format!("fn(a: { }, b: { }, c: { } d: {d})", a.unwrap_or("???"), b.unwrap_or("???"), c.unwrap_or("???")),
};
Func {
name: format!("func_{idx}"),
signature,
desc: String::new(),
comments: hashmap! {},
labels,
start_pos: func_pos,
end_pos,
}
})
.collect()
}
}

View File

@@ -1,7 +1,9 @@
//! Iterators
// TODO: Deprecate in favor of a function + data iterator.
// Imports
use super::{Func, Funcs};
use super::{Func, FuncTable};
use crate::game::exe::{Instruction, Pos};
/// Iterator of instructions along with the current function
@@ -10,7 +12,7 @@ pub struct WithInstructionsIter<'a, S: AsRef<str>, I: Iterator<Item = (Pos, &'a
instructions: I,
/// All functions
funcs: &'a Funcs<S>,
funcs: &'a FuncTable<S>,
/// Current function
cur_func: Option<&'a Func<S>>,
@@ -18,7 +20,7 @@ pub struct WithInstructionsIter<'a, S: AsRef<str>, I: Iterator<Item = (Pos, &'a
impl<'a, S: AsRef<str>, I: Iterator<Item = (Pos, &'a Instruction)>> WithInstructionsIter<'a, S, I> {
/// Creates a new instructions iterator
pub(super) fn new(instructions: I, funcs: &'a Funcs<S>) -> Self {
pub(super) fn new(instructions: I, funcs: &'a FuncTable<S>) -> Self {
Self {
instructions,
funcs,

View File

@@ -68,6 +68,28 @@ macro_rules! generate_register {
}
}
impl Register {
/// Returns the index of an argument register
#[must_use]
pub const fn arg_idx(self) -> Option<usize> {
let idx = match self {
Self::A0 => 0,
Self::A1 => 1,
Self::A2 => 2,
Self::A3 => 3,
_ => return None,
};
Some(idx)
}
/// Checks if this register is an argument register
#[must_use]
pub fn is_arg(self) -> bool {
self.arg_idx().is_some()
}
}
generate_register! {
pub enum Register {
/// Zero register