From d73a9ff4156732d7084a8bb4f1a67ea66eb95435 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Wed, 28 Oct 2020 19:18:22 +0000 Subject: [PATCH] Added local labels to functions. Jump instructions are now labeled with their label / function. --- dcb-tools/src/decompiler/main.rs | 98 ++++++++++++-------------------- dcb/src/game/exe/func.rs | 39 +++++-------- dcb/src/game/exe/func/funcs.rs | 49 +++++++++++++--- dcb/src/game/exe/pos.rs | 10 +++- 4 files changed, 102 insertions(+), 94 deletions(-) diff --git a/dcb-tools/src/decompiler/main.rs b/dcb-tools/src/decompiler/main.rs index eba792e..2a31dd5 100644 --- a/dcb-tools/src/decompiler/main.rs +++ b/dcb-tools/src/decompiler/main.rs @@ -85,7 +85,7 @@ use dcb::{ instruction::{ Directive, PseudoInstruction::{self, Nop}, - Raw, Register, SimpleInstruction, + Raw, SimpleInstruction, }, Instruction, Pos, }, @@ -133,10 +133,6 @@ fn main() -> Result<(), anyhow::Error> { )) .collect(); - // All instruction offsets - log::debug!("Retrieving all offsets"); - let offsets: HashSet = instructions.iter().map(|(offset, _)| offset).copied().collect(); - // All data / string addresses log::debug!("Retrieving all data / strings addresses"); let data_string_addresses: HashSet = instructions @@ -163,32 +159,6 @@ fn main() -> Result<(), anyhow::Error> { }) .collect(); - // Get all local jumps - log::debug!("Retrieving all local jumps"); - let locals_pos: HashMap = instructions - .iter() - .filter_map(|(_, instruction)| match *instruction { - Instruction::Simple( - SimpleInstruction::J { target } | - SimpleInstruction::Beq { target, .. } | - SimpleInstruction::Bne { target, .. } | - SimpleInstruction::Bltz { target, .. } | - SimpleInstruction::Bgez { target, .. } | - SimpleInstruction::Bgtz { target, .. } | - SimpleInstruction::Blez { target, .. } | - SimpleInstruction::Bltzal { target, .. } | - SimpleInstruction::Bgezal { target, .. }, - ) | - Instruction::Pseudo( - PseudoInstruction::Beqz { target, .. } | PseudoInstruction::Bnez { target, .. } | PseudoInstruction::B { target }, - ) => Some(target), - _ => None, - }) - .filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target)) - .unique() - .zip(0..) - .collect(); - // Get all strings log::debug!("Retrieving all strings"); let strings_pos: HashMap = instructions @@ -242,19 +212,19 @@ fn main() -> Result<(), anyhow::Error> { } // Check if we need to prefix - match cur_func { - Some(cur_func) if cur_func.start_pos == cur_pos => { + if let Some(cur_func) = cur_func { + if cur_func.start_pos == cur_pos { + println!(); println!("####################"); println!("{}:", cur_func.name); println!("# {}\n#", cur_func.signature); for description in cur_func.desc.lines() { println!("# {}", description); } - }, - _ => (), - } - if let Some(local_idx) = locals_pos.get(&cur_pos) { - println!("\t.{local_idx}:"); + } + if let Some(label) = cur_func.labels.get(&cur_pos) { + println!("\t.{label}:"); + } } if let Some(string_idx) = strings_pos.get(&cur_pos) { println!("\tstring_{string_idx}:"); @@ -263,13 +233,9 @@ fn main() -> Result<(), anyhow::Error> { println!("\tdata_{data_idx}:"); } - // Print the instruction - print!("{cur_pos:#010x}: {instruction}"); - - // Check if we should have any comments with this instruction - // TODO: Add Pseudo jumps too + // Print the instruction and it's location. + print!("{cur_pos:#010x}: "); match instruction { - // If we have a jump, make a comment with it's target Instruction::Simple( SimpleInstruction::J { target } | SimpleInstruction::Jal { target } | @@ -281,20 +247,31 @@ fn main() -> Result<(), anyhow::Error> { SimpleInstruction::Blez { target, .. } | SimpleInstruction::Bltzal { target, .. } | SimpleInstruction::Bgezal { target, .. }, + ) | + Instruction::Pseudo( + PseudoInstruction::B { target } | PseudoInstruction::Beqz { target, .. } | PseudoInstruction::Bnez { target, .. }, ) => { - if let Some(func) = functions.get(*target) { - print!(" # {}", func.name); - } - if let Some(local_idx) = locals_pos.get(target) { - print!(" # .{local_idx}"); + if let Some((target, prefix)) = functions + .get(*target) + .map(|func| (&func.name, "")) + .or_else(|| cur_func.and_then(|func| func.labels.get(target).map(|label| (label, ".")))) + { + // TODO: Improve solution, removing the target like this isn't + // a good way of going about it. + let instruction = instruction.to_string(); + #[allow(clippy::indexing_slicing)] // This can't panic, it's index is `..{0..len}`. + let instruction = &instruction[..instruction.rfind(' ').unwrap_or_else(|| instruction.len())]; + print!("{instruction} {prefix}{target}"); + } else { + print!("{instruction}"); } }, + _ => print!("{instruction}"), + } - // Comment returns - Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra }) => { - print!(" # Return"); - }, + // Check if we should have any comments with this instruction + match instruction { // Comment loading address, loading and writing values of string and data // TODO: Maybe check loads / writes to halfway between // the strings / data. @@ -324,13 +301,9 @@ fn main() -> Result<(), anyhow::Error> { // Comment `dw`s with both function and data Instruction::Directive(Directive::Dw(offset) | Directive::DwRepeated { value: offset, .. }) => { - print!(" #"); if let Some(func) = functions.get(Pos(*offset)) { print!(" # {}", func.name); } - if let Some(local_idx) = locals_pos.get(Pos::ref_cast(offset)) { - print!(" # .{local_idx}"); - } if let Some(string_idx) = strings_pos.get(Pos::ref_cast(offset)) { print!(" # string_{string_idx}"); } @@ -351,10 +324,13 @@ fn main() -> Result<(), anyhow::Error> { // And finish the line println!(); - // If the last instruction was a `return` and we have a function, space it out - if let (Some(Instruction::Simple(SimpleInstruction::Jr { rs: Register::Ra })), Some(_cur_func)) = (last_instruction, cur_func) { - println!(); - println!("####################"); + // If this is the last instruction in this function, space it out + // TODO: This can fail when the last instruction is more than 4 bytes + if let Some(cur_func) = cur_func { + if cur_func.end_pos == cur_pos + 4 { + println!("####################"); + println!(); + } } } diff --git a/dcb/src/game/exe/func.rs b/dcb/src/game/exe/func.rs index 0c464c6..263fafa 100644 --- a/dcb/src/game/exe/func.rs +++ b/dcb/src/game/exe/func.rs @@ -29,6 +29,9 @@ pub struct Func> { /// Comments pub comments: HashMap, + /// Labels + pub labels: HashMap, + /// Start position pub start_pos: Pos, @@ -66,7 +69,11 @@ impl Func<&'static str> { name: "InitHeap", signature: "void(int* addr, unsigned int size)", desc: "Calls A(0x39)", - comments: hashmap! {}, + comments: hashmap! { + Pos(0x8006a738) => "Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG", + Pos(0x8006a73c) => "arg: 0x39", + }, + labels: hashmap! {}, start_pos: Pos(0x8006a734), end_pos: Pos(0x8006a744), }, @@ -79,31 +86,15 @@ impl Func<&'static str> { Pos(0x80056284) => "^", Pos(0x80056288) => "^", Pos(0x8005628c) => "^", - Pos(0x800562f8) => "InitHeap(0x8007f988, ???)", - Pos(0x8005630c) => "func_1025(0x8007f98c)", - Pos(0x80056324) => "func_1026(string_0, string_0)", + Pos(0x800562f8) => "args: (0x8007f988, ???)", + Pos(0x8005630c) => "args: (0x8007f98c)", + Pos(0x80056324) => "args: (string_0, string_0)", + }, + labels: hashmap! { + Pos(0x80056280) => "zero_loop", }, start_pos: Pos(0x80056270), - end_pos: Pos(0x80056330), - }, - Self { - name: "func_1025", - signature: "void(int*)", - desc: "", - comments: hashmap! { - Pos(0x80013ef4) => "Called indefinitely?", - Pos(0x80013efc) => "^ Due to this loop" - }, - start_pos: Pos(0x80013e4c), - end_pos: Pos(0x80013f04), - }, - Self { - name: "func_446", - signature: "int(int)", - desc: "", - comments: hashmap! {}, - start_pos: Pos(0x80069124), - end_pos: Pos(0x80069150), + end_pos: Pos(0x80056384), }, ]) } diff --git a/dcb/src/game/exe/func/funcs.rs b/dcb/src/game/exe/func/funcs.rs index 3dfcc49..9914243 100644 --- a/dcb/src/game/exe/func/funcs.rs +++ b/dcb/src/game/exe/func/funcs.rs @@ -4,7 +4,7 @@ use super::{Func, WithInstructionsIter}; use crate::{ game::exe::{ - instruction::{Directive, Register, SimpleInstruction}, + instruction::{Directive, PseudoInstruction, Register, SimpleInstruction}, Instruction, Pos, }, util::merge_iter::MergeSortedIter, @@ -58,6 +58,7 @@ impl + Into> Funcs { signature: func.signature.into(), desc: func.desc.into(), comments: func.comments.into_iter().map(|(pos, comment)| (pos, comment.into())).collect(), + labels: func.labels.into_iter().map(|(pos, label)| (pos, label.into())).collect(), start_pos: func.start_pos, end_pos: func.end_pos, }) @@ -100,6 +101,29 @@ impl Funcs { }) .collect(); + // Get all labels + let labels: BTreeSet = instructions + .clone() + .filter_map(|(_, instruction)| match instruction { + Instruction::Simple( + SimpleInstruction::J { target } | + SimpleInstruction::Beq { target, .. } | + SimpleInstruction::Bne { target, .. } | + SimpleInstruction::Bltz { target, .. } | + SimpleInstruction::Bgez { target, .. } | + SimpleInstruction::Bgtz { target, .. } | + SimpleInstruction::Blez { target, .. } | + SimpleInstruction::Bltzal { target, .. } | + SimpleInstruction::Bgezal { target, .. }, + ) | + Instruction::Pseudo( + PseudoInstruction::Beqz { target, .. } | PseudoInstruction::Bnez { target, .. } | PseudoInstruction::B { target }, + ) => Some(*target), + _ => None, + }) + .filter(|target| (Instruction::CODE_START..Instruction::CODE_END).contains(target) && offsets.contains(target)) + .collect(); + // Now get every function entrance from jumps and `dw`s. let function_entrances: BTreeSet = instructions .filter_map(|(_, instruction)| match instruction { @@ -115,13 +139,22 @@ impl Funcs { let functions = function_entrances .iter() .zip(0..) - .map(|(&target, idx)| Func { - name: format!("func_{idx}"), - signature: "".to_string(), - desc: "".to_string(), - comments: hashmap![], - start_pos: target, - end_pos: returns.range(target..).next().copied().unwrap_or(Pos(0xFFFFFFFF)), + .map(|(&target, idx)| { + let end_pos = returns.range(target..).next().copied().unwrap_or(target); + let labels = labels + .range(target..end_pos) + .zip(0..) + .map(|(&pos, idx)| (pos, format!("{idx}"))) + .collect(); + Func { + name: format!("func_{idx}"), + signature: "".to_string(), + desc: "".to_string(), + comments: hashmap! {}, + labels, + start_pos: target, + end_pos, + } }) .collect(); diff --git a/dcb/src/game/exe/pos.rs b/dcb/src/game/exe/pos.rs index 8e5fd64..e2d4df8 100644 --- a/dcb/src/game/exe/pos.rs +++ b/dcb/src/game/exe/pos.rs @@ -2,7 +2,7 @@ // TODO: More implementations for `Pos` // Imports -use int_conv::Signed; +use int_conv::{SignExtended, Signed}; use std::{fmt, ops}; /// An instruction position @@ -27,6 +27,14 @@ impl ops::Sub for Pos { } } +impl ops::Sub for Pos { + type Output = i64; + + fn sub(self, rhs: Self) -> Self::Output { + self.0.as_signed().sign_extended::() - rhs.0.as_signed().sign_extended::() + } +} + impl ops::Add for Pos { type Output = Self;