Added dcb::util::merge_iter.

Improved `dcb::game::exe::Func` yet again.
This commit is contained in:
Filipe Rodrigues 2020-10-28 17:27:25 +00:00
parent 829b8852fe
commit 17b3aa0e4f
8 changed files with 420 additions and 98 deletions

View File

@ -30,6 +30,7 @@ float-ord = "0.2"
itertools = "0.9"
rand = "0.7"
ref-cast = "1.0"
maplit = "1.0"
# Cmd
clap = "2.33"

View File

@ -8,7 +8,8 @@
array_value_iter,
array_chunks,
format_args_capture,
or_patterns
or_patterns,
bindings_after_at
)]
// Lints
#![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]
@ -73,27 +74,19 @@ mod cli;
#[path = "../logger.rs"]
mod logger;
// Exports
use std::collections::{HashMap, HashSet};
// Imports
use anyhow::Context;
use byteorder::{ByteOrder, LittleEndian};
use dcb::{
game::exe::{
instruction::{
Directive,
PseudoInstruction::{self, Nop},
Raw, Register, SimpleInstruction,
},
func::Funcs,
instruction::{Directive, PseudoInstruction::Nop, Raw, Register, SimpleInstruction},
Instruction, Pos,
},
GameFile,
};
use itertools::Itertools;
use ref_cast::RefCast;
#[allow(clippy::too_many_lines)] // TODO: Refactor
#[allow(clippy::cognitive_complexity, clippy::too_many_lines)] // TODO: Refactor
fn main() -> Result<(), anyhow::Error> {
// Initialize the logger and set the panic handler
logger::init();
@ -106,9 +99,11 @@ fn main() -> Result<(), anyhow::Error> {
let mut game_file = GameFile::from_reader(input_file).context("Unable to parse input file as dcb")?;
// Read the executable
log::debug!("Deserializing executable");
let exe = dcb::game::Exe::deserialize(&mut game_file).context("Unable to parse game executable")?;
// Get all instructions
log::debug!("Retrieving all instructions");
let instructions: Vec<(Pos, Instruction)> = Instruction::new_iter(
exe.data
.array_chunks::<4>()
@ -121,10 +116,22 @@ fn main() -> Result<(), anyhow::Error> {
)
.collect();
// Get all functions
log::debug!("Retrieving all functions");
let functions: Funcs<String> = Funcs::known()
.into_string()
.merge(Funcs::from_instructions(
instructions.iter().map(|(pos, instruction)| (*pos, instruction)),
))
.collect();
/*
// All instruction offsets
log::debug!("Retrieving all offsets");
let offsets: HashSet<Pos> = instructions.iter().map(|(offset, _)| offset).copied().collect();
// All data / string addresses
log::debug!("Retrieving all data / strings addresses");
let data_string_addresses: HashSet<Pos> = instructions
.iter()
.filter_map(|(_, instruction)| match instruction {
@ -149,20 +156,8 @@ fn main() -> Result<(), anyhow::Error> {
})
.collect();
// Get all function jumps
let funcs_pos: HashMap<Pos, usize> = instructions
.iter()
.filter_map(|(_, instruction)| match *instruction {
Instruction::Simple(SimpleInstruction::Jal { target }) => Some(target),
Instruction::Directive(Directive::Dw(target) | Directive::DwRepeated { value: target, .. }) => Some(Pos(target)),
_ => None,
})
.filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target))
.unique()
.zip(0..)
.collect();
// Get all local jumps
log::debug!("Retrieving all local jumps");
let locals_pos: HashMap<Pos, usize> = instructions
.iter()
.filter_map(|(_, instruction)| match *instruction {
@ -187,16 +182,8 @@ fn main() -> Result<(), anyhow::Error> {
.zip(0..)
.collect();
// Get all returns
let return_pos: HashSet<Pos> = instructions
.iter()
.filter_map(|(cur_pos, instruction)| match instruction {
Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra }) => Some(*cur_pos),
_ => None,
})
.collect();
// Get all strings
log::debug!("Retrieving all strings");
let strings_pos: HashMap<Pos, usize> = instructions
.iter()
.filter_map(|(cur_pos, instruction)| match instruction {
@ -209,6 +196,7 @@ fn main() -> Result<(), anyhow::Error> {
.collect();
// Get all data
log::debug!("Retrieving all data");
let data_pos: HashMap<Pos, usize> = instructions
.iter()
.filter_map(|(cur_pos, instruction)| match instruction {
@ -219,14 +207,23 @@ fn main() -> Result<(), anyhow::Error> {
.unique()
.zip(0..)
.collect();
*/
// Build the full instructions iterator
let full_iter = functions
.with_instructions(instructions.iter().map(|(pos, instruction)| (*pos, instruction)))
.scan(None, |last_instruction, output @ (_, cur_instruction, _)| {
Some((output, last_instruction.replace(cur_instruction)))
});
// Read all instructions
let mut last_instruction = None;
let mut skipped_nops = 0;
for (offset, instruction) in &instructions {
for ((cur_pos, instruction, cur_func), last_instruction) in full_iter {
// Note: Required by `rust-analyzer` currently, it can't determine the type of `cur_func`.
let cur_func: Option<&dcb::game::exe::Func<String>> = cur_func;
// If both last and current instructions are nops, skip
if let (Some(&Instruction::Pseudo(Nop)), Instruction::Pseudo(Nop)) = (last_instruction, instruction) {
if let (Some(Instruction::Pseudo(Nop)), Instruction::Pseudo(Nop)) = (last_instruction, instruction) {
skipped_nops += 1;
continue;
}
@ -239,9 +236,18 @@ fn main() -> Result<(), anyhow::Error> {
}
// Check if we need to prefix
if let Some(func_idx) = funcs_pos.get(offset) {
println!("\n\tfunc_{func_idx}:");
match cur_func {
Some(cur_func) if cur_func.start_pos == cur_pos => {
println!("####################");
println!("{}:", cur_func.name);
println!("# {}\n#", cur_func.signature);
for description in cur_func.desc.lines() {
println!("# {}", description);
}
},
_ => (),
}
/*
if let Some(local_idx) = locals_pos.get(offset) {
println!("\t.{local_idx}:");
}
@ -251,9 +257,10 @@ fn main() -> Result<(), anyhow::Error> {
if let Some(data_idx) = data_pos.get(offset) {
println!("\tdata_{data_idx}:");
}
*/
// Print the instruction
print!("{offset:#010x}: {instruction}");
print!("{cur_pos:#010x}: {instruction}");
// Check if we should have any comments with this instruction
// TODO: Add Pseudo jumps too
@ -272,12 +279,14 @@ fn main() -> Result<(), anyhow::Error> {
SimpleInstruction::Bgezal { target, .. },
) => {
print!(" #");
if let Some(func_idx) = funcs_pos.get(target) {
print!(" func_{func_idx}");
if let Some(func) = functions.get(*target) {
print!(" {}", func.name);
}
/*
if let Some(local_idx) = locals_pos.get(target) {
print!(" .{local_idx}");
}
*/
},
// Comment returns
@ -285,6 +294,7 @@ fn main() -> Result<(), anyhow::Error> {
print!(" # Return");
},
/*
// Comment loading address, loading and writing values of string and data
// TODO: Maybe check loads / writes to halfway between
// the strings / data.
@ -305,20 +315,23 @@ fn main() -> Result<(), anyhow::Error> {
PseudoInstruction::SwrImm { offset, .. },
) => {
print!(" #");
/*
if let Some(string_idx) = strings_pos.get(Pos::ref_cast(offset)) {
print!(" string_{string_idx}");
}
if let Some(data_idx) = data_pos.get(Pos::ref_cast(offset)) {
print!(" data_{data_idx}");
}
*/
},
*/
// Comment `dw`s with both function and data
Instruction::Directive(Directive::Dw(offset) | Directive::DwRepeated { value: offset, .. }) => {
print!(" #");
if let Some(func_idx) = funcs_pos.get(Pos::ref_cast(offset)) {
print!(" func_{func_idx}");
if let Some(func) = functions.get(Pos(*offset)) {
print!(" {}", func.name);
}
/*
if let Some(local_idx) = locals_pos.get(Pos::ref_cast(offset)) {
print!(" .{local_idx}");
}
@ -328,20 +341,26 @@ fn main() -> Result<(), anyhow::Error> {
if let Some(data_idx) = data_pos.get(Pos::ref_cast(offset)) {
print!(" data_{data_idx}");
}
*/
},
_ => (),
}
// Append any comments in this line
if let Some(cur_func) = cur_func {
if let Some(comment) = cur_func.comments.get(&cur_pos) {
print!(" {comment}");
}
}
// And finish the line
println!();
// If the _last_ instruction was a return, print a newline after this one
if return_pos.contains(&(offset - 4)) {
// If the last instruction was a `return` and we have a function, space it out
if let (Some(Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra })), Some(_cur_func)) = (last_instruction, cur_func) {
println!();
println!("####################");
}
last_instruction = Some(instruction);
}
Ok(())

View File

@ -19,6 +19,8 @@ arrayref = "0.3"
int-conv = "0.1"
indoc = "1.0"
bitmatch = "0.1"
maplit = "1.0"
either = "1.6"
# Serde
serde = { version = "1.0", features = ["derive"] }

View File

@ -1,66 +1,228 @@
//! Executable functions
// Modules
pub mod iter;
// Exports
pub use iter::WithInstructionsIter;
// Imports
use crate::game::exe::Pos;
use crate::{
game::exe::{
instruction::{Directive, Register, SimpleInstruction},
Instruction, Pos,
},
util::merge_iter::MergeSortedIter,
};
use maplit::hashmap;
use std::{
collections::{BTreeSet, HashMap},
iter::FromIterator,
vec,
};
/// A function within the executable
#[derive(PartialEq, Eq, Clone, Hash, Debug)]
#[derive(Clone, Debug)]
#[derive(serde::Serialize, serde::Deserialize)]
pub struct Func<S: AsRef<str>, C: AsRef<[(Pos, S)]>> {
pub struct Func<S: AsRef<str>> {
/// Function name
pub name: S,
/// Function signature
signature: S,
pub signature: S,
/// Description
desc: S,
pub desc: S,
/// Comments
comments: C,
pub comments: HashMap<Pos, S>,
/// Start position
start_pos: Pos,
pub start_pos: Pos,
/// End position (non-inclusive)
end_pos: Pos,
pub end_pos: Pos,
}
impl Func<&'static str, &'static [(Pos, &'static str)]> {
/// List of all known functions
pub const ALL: &'static [Self] = &[
Self {
signature: "void InitHeap(int* addr, unsigned int size)",
desc: "Calls A(0x39)",
comments: &[],
start_pos: Pos(0x8006a734),
end_pos: Pos(0x8006a744),
},
Self {
signature: "void start(void)",
desc: "Executable start",
comments: &[
(Pos(0x80056280), "Zero out 0x80077a08 .. 0x801ddf38 word by word."),
(Pos(0x80056284), "^"),
(Pos(0x80056288), "^"),
(Pos(0x8005628c), "^"),
(Pos(0x800562f8), "InitHeap(0x8007f988, ???)"),
(Pos(0x8005630c), "func_1025(0x8007f98c)"),
(Pos(0x80056324), "func_1026(string_0, string_0)"),
],
start_pos: Pos(0x80056270),
end_pos: Pos(0x80056330),
},
Self {
signature: "void func_1025(int*)",
desc: "",
comments: &[(Pos(0x80013ef4), "Called indefinitely?"), (Pos(0x80013efc), "^ Due to this loop")],
start_pos: Pos(0x80013e4c),
end_pos: Pos(0x80013f04),
},
Self {
signature: "int func_446(int)",
desc: "",
comments: &[],
start_pos: Pos(0x80069124),
end_pos: Pos(0x80069150),
},
];
impl<S: AsRef<str>> PartialEq for Func<S> {
fn eq(&self, other: &Self) -> bool {
// Only compare the start position
self.start_pos.eq(&other.start_pos)
}
}
impl<S: AsRef<str>> Eq for Func<S> {}
impl<S: AsRef<str>> PartialOrd for Func<S> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
// Delegate to `eq` since we have a total order.
Some(self.cmp(other))
}
}
impl<S: AsRef<str>> Ord for Func<S> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
// Only compare the start position
self.start_pos.cmp(&other.start_pos)
}
}
/// A sorted list of functions by their start address.
pub struct Funcs<S: AsRef<str>>(Vec<Func<S>>);
impl<S: AsRef<str>> FromIterator<Func<S>> for Funcs<S> {
fn from_iter<T: IntoIterator<Item = Func<S>>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
impl<S: AsRef<str>> Funcs<S> {
/// Merges two function lists, discarding any duplicates
/// from `other`.
#[must_use]
pub fn merge(self, other: Self) -> MergeSortedIter<Func<S>, vec::IntoIter<Func<S>>, vec::IntoIter<Func<S>>> {
MergeSortedIter::new(self.0.into_iter(), other.0.into_iter())
}
/// Adapts an instruction iterator to extract the current function
pub fn with_instructions<'a, I: Iterator<Item = (Pos, &'a Instruction)>>(&'a self, instructions: I) -> WithInstructionsIter<'a, S, I> {
WithInstructionsIter::new(instructions, self)
}
/// Retrieves a function with start address `pos`
#[must_use]
pub fn get(&self, pos: Pos) -> Option<&Func<S>> {
// Note: As we're sorted, we can binary search
self.0
.binary_search_by(|func| func.start_pos.cmp(&pos))
.ok()
.and_then(|idx| self.0.get(idx))
}
}
#[allow(clippy::use_self)] // We're not using `Funcs<S>`, but `Funcs<String>`
impl<S: AsRef<str> + Into<String>> Funcs<S> {
/// Converts all strings to `String`.
#[must_use]
pub fn into_string(self) -> Funcs<String> {
Funcs(
self.0
.into_iter()
.map(|func| Func {
name: func.name.into(),
signature: func.signature.into(),
desc: func.desc.into(),
comments: func.comments.into_iter().map(|(pos, comment)| (pos, comment.into())).collect(),
start_pos: func.start_pos,
end_pos: func.end_pos,
})
.collect(),
)
}
}
impl Funcs<&'static str> {
/// Returns all known functions
#[must_use]
pub fn known() -> Self {
let mut functions = vec![
Func {
name: "InitHeap",
signature: "void(int* addr, unsigned int size)",
desc: "Calls A(0x39)",
comments: hashmap! {},
start_pos: Pos(0x8006a734),
end_pos: Pos(0x8006a744),
},
Func {
name: "start",
signature: "void(void)",
desc: "Executable start",
comments: hashmap! {
Pos(0x80056280) => "Zero out 0x80077a08 .. 0x801ddf38 word by word.",
Pos(0x80056284) => "^",
Pos(0x80056288) => "^",
Pos(0x8005628c) => "^",
Pos(0x800562f8) => "InitHeap(0x8007f988, ???)",
Pos(0x8005630c) => "func_1025(0x8007f98c)",
Pos(0x80056324) => "func_1026(string_0, string_0)",
},
start_pos: Pos(0x80056270),
end_pos: Pos(0x80056330),
},
Func {
name: "func_1025",
signature: "void(int*)",
desc: "",
comments: hashmap! {
Pos(0x80013ef4) => "Called indefinitely?",
Pos(0x80013efc) => "^ Due to this loop"
},
start_pos: Pos(0x80013e4c),
end_pos: Pos(0x80013f04),
},
Func {
name: "func_446",
signature: "int(int)",
desc: "",
comments: hashmap! {},
start_pos: Pos(0x80069124),
end_pos: Pos(0x80069150),
},
];
functions.sort_by(|lhs, rhs| lhs.start_pos.cmp(&rhs.start_pos));
Self(functions)
}
}
impl Funcs<String> {
/// Creates a new list of functions from an iterator over instructions
#[must_use]
pub fn from_instructions<'a>(instructions: impl Iterator<Item = (Pos, &'a Instruction)> + Clone) -> Self {
// Get all instruction offsets present, ignoring directives.
let offsets: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(pos, instruction)| match instruction {
Instruction::Directive(_) => None,
_ => Some(pos),
})
.collect();
// Get all returns
let returns: BTreeSet<Pos> = instructions
.clone()
.filter_map(|(pos, instruction)| match instruction {
Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra }) => Some(pos),
_ => None,
})
.collect();
// Now get every function entrance from jumps and `dw`s.
let function_entrances: BTreeSet<Pos> = instructions
.filter_map(|(_, instruction)| match instruction {
Instruction::Simple(SimpleInstruction::Jal { target }) => Some(*target),
Instruction::Directive(Directive::Dw(target) | Directive::DwRepeated { value: target, .. }) => Some(Pos(*target)),
_ => None,
})
.filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target))
.collect();
// Now combine the function entrances and exits.
// Note: functions will be sorted, as
let functions = function_entrances
.iter()
.zip(0..)
.map(|(&target, idx)| Func {
name: format!("func_{idx}"),
signature: "".to_string(),
desc: "".to_string(),
comments: hashmap![],
start_pos: target,
end_pos: returns.range(target..).next().copied().unwrap_or(Pos(0xFFFFFFFF)),
})
.collect();
Self(functions)
}
}

View File

@ -0,0 +1,69 @@
//! Iterators
// Imports
use super::{Func, Funcs};
use crate::game::exe::{
instruction::{Register, SimpleInstruction},
Instruction, Pos,
};
/// Iterator of instructions along with the current function
pub struct WithInstructionsIter<'a, S: AsRef<str>, I: Iterator<Item = (Pos, &'a Instruction)>> {
/// The instructions iterator
instructions: I,
/// All functions
funcs: &'a Funcs<S>,
/// Last instruction
last_instruction: Option<&'a Instruction>,
/// Current function
cur_func: Option<&'a Func<S>>,
}
impl<'a, S: AsRef<str>, I: Iterator<Item = (Pos, &'a Instruction)>> WithInstructionsIter<'a, S, I> {
/// Creates a new instructions iterator
pub(super) fn new(instructions: I, funcs: &'a Funcs<S>) -> Self {
Self {
instructions,
funcs,
last_instruction: None,
cur_func: None,
}
}
}
impl<'a, S: AsRef<str>, I: Iterator<Item = (Pos, &'a Instruction)>> Iterator for WithInstructionsIter<'a, S, I> {
type Item = (Pos, &'a Instruction, Option<&'a Func<S>>);
fn next(&mut self) -> Option<Self::Item> {
let (pos, instruction) = self.instructions.next()?;
// Update our last instruction
let last_instruction = self.last_instruction.replace(instruction);
// Check if we had a return last instruction
if let Some(Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra })) = last_instruction {
// Set our cur function to `None` and return it
let cur_func = self.cur_func.take();
return Some((pos, instruction, cur_func));
}
// Else check if we have a current function
match self.cur_func {
// If we go, return it
Some(cur_func) => Some((pos, instruction, Some(cur_func))),
// Else check if we're at the start of a new function.
None => match self.funcs.get(pos) {
Some(cur_func) => {
self.cur_func = Some(cur_func);
Some((pos, instruction, Some(cur_func)))
},
None => Some((pos, instruction, None)),
},
}
}
}

View File

@ -49,7 +49,8 @@
core_intrinsics,
const_assume,
bindings_after_at,
array_value_iter
array_value_iter,
or_patterns
)]
// Lints
#![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]

View File

@ -11,6 +11,7 @@ pub mod array_split;
pub mod null_ascii_string;
#[macro_use]
pub mod impl_bytes;
pub mod merge_iter;
pub mod signed_hex;
// Exports

View File

@ -0,0 +1,67 @@
//! Merging iterator
// Imports
use either::Either;
use std::cmp::Ordering;
/// Merging sorted iterator
///
/// Will discard duplicate items.
pub struct MergeSortedIter<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> {
/// Left iterator
lhs: Li,
/// Right iterator
rhs: Ri,
/// Last element stored
last: Option<Either<T, T>>,
}
impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> MergeSortedIter<T, Li, Ri> {
/// Creates a new merging iterator
pub fn new(lhs: Li, rhs: Ri) -> Self {
Self { lhs, rhs, last: None }
}
/// Chooses between two values, storing the larger one and
/// discarding the `rhs` value if equal.
///
/// `self.last` must not be populated.
fn cmp_next(&mut self, lhs: T, rhs: T) -> T {
match lhs.cmp(&rhs) {
// Note: Discard rhs
Ordering::Equal => lhs,
Ordering::Less => {
self.last = Some(Either::Right(rhs));
lhs
},
Ordering::Greater => {
self.last = Some(Either::Left(lhs));
rhs
},
}
}
}
impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> Iterator for MergeSortedIter<T, Li, Ri> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
match self.last.take() {
Some(Either::Left(lhs)) => match self.rhs.next() {
Some(rhs) => Some(self.cmp_next(lhs, rhs)),
None => Some(lhs),
},
Some(Either::Right(rhs)) => match self.lhs.next() {
Some(lhs) => Some(self.cmp_next(lhs, rhs)),
None => Some(rhs),
},
None => match (self.lhs.next(), self.rhs.next()) {
(None, None) => None,
(None, Some(func)) | (Some(func), None) => Some(func),
(Some(lhs), Some(rhs)) => Some(self.cmp_next(lhs, rhs)),
},
}
}
}