From 220d0068a680179e49cd2146b9541c89ab192fd2 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Tue, 12 Jan 2021 01:17:02 +0000 Subject: [PATCH] Improved `inst::iter`. The decompiler now successfully goes through functions, data and others. --- dcb-exe/src/exe.rs | 18 ++-- dcb-exe/src/exe/data/table.rs | 36 +++---- dcb-exe/src/exe/func/table.rs | 14 ++- dcb-exe/src/exe/inst/basic/cond.rs | 35 ++++--- dcb-exe/src/exe/inst/pseudo/load_imm.rs | 11 +- dcb-exe/src/exe/iter.rs | 84 +++++++++++---- dcb-tools/src/decompiler/main.rs | 132 +++++++++++++++--------- 7 files changed, 212 insertions(+), 118 deletions(-) diff --git a/dcb-exe/src/exe.rs b/dcb-exe/src/exe.rs index cf661fa..97957a8 100644 --- a/dcb-exe/src/exe.rs +++ b/dcb-exe/src/exe.rs @@ -70,6 +70,18 @@ impl Exe { &*self.bytes } + /// Returns this executable's data table + #[must_use] + pub const fn data_table(&self) -> &DataTable { + &self.data_table + } + + /// Returns this executable's func table + #[must_use] + pub const fn func_table(&self) -> &FuncTable { + &self.func_table + } + /// Creates an iterator over this executable #[must_use] pub const fn iter(&self) -> iter::Iter { @@ -81,12 +93,6 @@ impl Exe { pub const fn parse_iter(&self) -> inst::ParseIter { inst::ParseIter::new(&*self.bytes, Self::MEM_START_ADDRESS) } - - /// Returns the instruction at `pos` - #[must_use] - pub fn get(&self, pos: Pos) -> Option { - inst::ParseIter::new(&*self.bytes, pos).next().map(|(_, inst)| inst) - } } impl Exe { diff --git a/dcb-exe/src/exe/data/table.rs b/dcb-exe/src/exe/data/table.rs index daba93e..4172c49 100644 --- a/dcb-exe/src/exe/data/table.rs +++ b/dcb-exe/src/exe/data/table.rs @@ -19,11 +19,11 @@ use int_conv::SignExtended; // Imports use super::{Data, DataType}; use crate::exe::{ - inst::{self, basic, Inst}, + inst::{self, basic, pseudo, Inst}, Pos, }; use dcb_util::DiscardingSortedMergeIter; -use std::{collections::BTreeSet, fs::File, iter::FromIterator}; +use std::{collections::BTreeSet, fs::File, iter::FromIterator, ops::RangeBounds}; /// Data table /// @@ -68,7 +68,13 @@ impl DataTable { // Find the closest one and check if it contains `pos` // Note: We search from the end to make sure we grab the // smaller locations first. - self.0.range(..=pos).next_back().filter(|data| pos < data.end_pos()) + self.range(..=pos).next_back().filter(|data| pos < data.end_pos()) + } + + /// Returns a range of data + #[must_use] + pub fn range(&self, range: impl RangeBounds) -> impl DoubleEndedIterator + Clone { + self.0.range(range) } } @@ -91,24 +97,14 @@ impl DataTable { Inst::Basic(basic::Inst::Load(basic::load::Inst { offset, .. }) | basic::Inst::Store(basic::store::Inst { offset, .. })) => { Some(pos + offset.sign_extended::()) }, - /* - Instruction::Pseudo( - PseudoInst::La { target: offset, .. } | - PseudoInst::Li32 { imm: offset, .. } | - PseudoInst::LbImm { offset, .. } | - PseudoInst::LbuImm { offset, .. } | - PseudoInst::LhImm { offset, .. } | - PseudoInst::LhuImm { offset, .. } | - PseudoInst::LwlImm { offset, .. } | - PseudoInst::LwImm { offset, .. } | - PseudoInst::LwrImm { offset, .. } | - PseudoInst::SbImm { offset, .. } | - PseudoInst::ShImm { offset, .. } | - PseudoInst::SwlImm { offset, .. } | - PseudoInst::SwImm { offset, .. } | - PseudoInst::SwrImm { offset, .. }, + Inst::Pseudo( + pseudo::Inst::LoadImm(pseudo::load_imm::Inst { + kind: pseudo::load_imm::Kind::Address(Pos(address)) | pseudo::load_imm::Kind::Word(address), + .. + }) | + pseudo::Inst::Load(pseudo::load::Inst { target: Pos(address), .. }) | + pseudo::Inst::Store(pseudo::store::Inst { target: Pos(address), .. }), ) | - */ Inst::Directive(Directive::Dw(address)) => Some(Pos(address)), _ => None, }) diff --git a/dcb-exe/src/exe/func/table.rs b/dcb-exe/src/exe/func/table.rs index b477051..31efba8 100644 --- a/dcb-exe/src/exe/func/table.rs +++ b/dcb-exe/src/exe/func/table.rs @@ -14,7 +14,6 @@ pub mod error; // Exports pub use error::GetKnownError; -use int_conv::SignExtended; //pub use iter::WithInstructionsIter; // Imports @@ -28,6 +27,7 @@ use std::{ collections::{BTreeMap, BTreeSet}, fs::File, iter::FromIterator, + ops::RangeBounds, }; /// Function table @@ -69,7 +69,13 @@ impl FuncTable { #[must_use] pub fn get(&self, pos: Pos) -> Option<&Func> { // Note: As we're sorted, we can binary search - self.0.range(..=pos).next_back().filter(|func| pos < func.end_pos) + self.range(..=pos).next_back().filter(|func| pos < func.end_pos) + } + + /// Returns a range of functions + #[must_use] + pub fn range(&self, range: impl RangeBounds) -> impl DoubleEndedIterator + Clone { + self.0.range(range) } /* @@ -137,7 +143,7 @@ impl FuncTable { }, ))) => Some(inst.target(pos)), // Conditional jumps - Inst::Basic(basic::Inst::Cond(basic::cond::Inst { offset, .. })) => Some(pos + offset.sign_extended::()), + Inst::Basic(basic::Inst::Cond(inst)) => Some(inst.target(pos)), _ => None, }) .filter(|target| Inst::CODE_RANGE.contains(target)) @@ -146,7 +152,7 @@ impl FuncTable { // Now check every `Jal` and `Dw` for possible function entrances let function_entries: BTreeSet = insts .filter_map(|(pos, inst)| match inst { - // `jr` + // `jar` Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm( inst @ basic::jmp::imm::Inst { kind: basic::jmp::imm::Kind::JumpLink, diff --git a/dcb-exe/src/exe/inst/basic/cond.rs b/dcb-exe/src/exe/inst/basic/cond.rs index fe04f20..2b8f71f 100644 --- a/dcb-exe/src/exe/inst/basic/cond.rs +++ b/dcb-exe/src/exe/inst/basic/cond.rs @@ -95,7 +95,7 @@ impl Inst { /// Returns the target using an offset #[must_use] pub fn target_of(offset: i16, pos: Pos) -> Pos { - pos + 4 * offset.sign_extended::() + pos + 4 * (offset.sign_extended::() + 1) } } @@ -149,18 +149,25 @@ impl Encodable for Inst { impl InstFmt for Inst { fn fmt(&self, pos: Pos, f: &mut fmt::Formatter) -> fmt::Result { - let Self { kind, arg, .. } = self; - let mnemonic = kind.mnemonic(); - let target = self.target(pos); - - match kind { - Kind::Equal(reg) | Kind::NotEqual(reg) => write!(f, "{mnemonic} {arg}, {reg}, {target}"), - Kind::LessOrEqualZero | - Kind::GreaterThanZero | - Kind::LessThanZero | - Kind::GreaterOrEqualZero | - Kind::LessThanZeroLink | - Kind::GreaterOrEqualZeroLink => write!(f, "{mnemonic} {arg}, {target}"), - } + write!(f, "{}", self.fmt_target(self.target(pos))) + } +} + +impl Inst { + /// Returns a formattable for this instruction using `target` as it's target. + pub fn fmt_target<'a>(self, target: impl fmt::Display + 'a) -> impl fmt::Display + 'a { + dcb_util::DisplayWrapper::new(move |f| { + let Self { kind, arg, .. } = self; + let mnemonic = kind.mnemonic(); + match kind { + Kind::Equal(reg) | Kind::NotEqual(reg) => write!(f, "{mnemonic} {arg}, {reg}, {target}"), + Kind::LessOrEqualZero | + Kind::GreaterThanZero | + Kind::LessThanZero | + Kind::GreaterOrEqualZero | + Kind::LessThanZeroLink | + Kind::GreaterOrEqualZeroLink => write!(f, "{mnemonic} {arg}, {target}"), + } + }) } } diff --git a/dcb-exe/src/exe/inst/pseudo/load_imm.rs b/dcb-exe/src/exe/inst/pseudo/load_imm.rs index b8d15be..93a5e11 100644 --- a/dcb-exe/src/exe/inst/pseudo/load_imm.rs +++ b/dcb-exe/src/exe/inst/pseudo/load_imm.rs @@ -2,7 +2,10 @@ // Imports use super::Decodable; -use crate::exe::inst::{basic, InstFmt, InstSize, Register}; +use crate::{ + exe::inst::{basic, InstFmt, InstSize, Register}, + Pos, +}; use dcb_util::SignedHex; use int_conv::{Join, SignExtended, Signed}; use std::convert::TryInto; @@ -14,7 +17,7 @@ pub enum Kind { /// Address /// /// Alias for `lui $dst, {hi} / addiu $dst, $dst, {lo}` - Address(u32), + Address(Pos), /// Word /// @@ -47,7 +50,7 @@ impl Kind { #[must_use] pub fn value_fmt(self) -> impl std::fmt::Display { dcb_util::DisplayWrapper::new(move |f| match self { - Self::Address(address) => write!(f, "{address:#x}"), + Self::Address(address) => write!(f, "{address}"), Self::Word(value) => write!(f, "{value:#x}"), Self::HalfWordUnsigned(value) => write!(f, "{value:#x}"), Self::HalfWordSigned(value) => write!(f, "{:#}", SignedHex(value)), @@ -76,7 +79,7 @@ impl Decodable for Inst { dst: lui.dst, kind: match alu.kind { // lui << 16 + rhs - AddUnsigned(rhs) => Kind::Address((u32::join(0, lui.value).as_signed() + rhs.sign_extended::()).as_unsigned()), + AddUnsigned(rhs) => Kind::Address(Pos((u32::join(0, lui.value).as_signed() + rhs.sign_extended::()).as_unsigned())), Or(rhs) => Kind::Word(u32::join(rhs, lui.value)), _ => return None, }, diff --git a/dcb-exe/src/exe/iter.rs b/dcb-exe/src/exe/iter.rs index ea41b48..9685ee8 100644 --- a/dcb-exe/src/exe/iter.rs +++ b/dcb-exe/src/exe/iter.rs @@ -1,13 +1,11 @@ //! Executable iterator // Imports -use super::{ - inst::{Inst, ParseIter}, - Data, Func, -}; +use super::{inst::ParseIter, Data, Func}; use crate::{Exe, Pos}; /// Iterator over executable parts +#[derive(PartialEq, Eq, Clone, Debug)] pub struct Iter<'a> { /// Executable exe: &'a Exe, @@ -27,15 +25,31 @@ impl<'a> Iter<'a> { } /// An executable item +#[derive(PartialEq, Eq, Clone, Debug)] pub enum ExeItem<'a> { /// A function - Func(&'a Func), + Func { + /// The function metadata + func: &'a Func, + + /// The instructions for this function + insts: ParseIter<'a>, + }, /// A data - Data(&'a Data), + Data { + /// The data metadata + data: &'a Data, - /// Instruction - Inst(Pos, Inst<'a>), + /// The instructions for this data + insts: ParseIter<'a>, + }, + + /// Unknown + Unknown { + /// Instruction in this unknown section + insts: ParseIter<'a>, + }, } impl<'a> Iterator for Iter<'a> { @@ -43,27 +57,59 @@ impl<'a> Iterator for Iter<'a> { fn next(&mut self) -> Option { // If we're at the end, return `None` - if self.cur_pos == Exe::MEM_END_ADDRESS { + let cur_pos = self.cur_pos; + if cur_pos >= Exe::MEM_END_ADDRESS { return None; } // Try to get data - // TODO: Not skip over small data somehow? Maybe just remove the ability to have overlapping data sections - if let Some(data) = self.exe.data_table.get(self.cur_pos) { - self.cur_pos = data.end_pos(); - return Some(ExeItem::Data(data)); + if let Some(data) = self.exe.data_table.get(cur_pos) { + // Check the next data for our next position that isn't equal to our current one + let end_pos = match self.exe.data_table.range(cur_pos..).find(|next_data| next_data.pos != data.pos) { + // If it ends before or at the end of this data, use it + Some(next_data) if next_data.pos <= data.end_pos() => next_data.pos, + + // Else end at the end of this data + _ => data.end_pos(), + }; + self.cur_pos = end_pos; + + return Some(ExeItem::Data { + data, + insts: ParseIter::new(&self.exe.bytes[cur_pos.as_mem_idx()..end_pos.as_mem_idx()], cur_pos), + }); } // Else try to get a function if let Some(func) = self.exe.func_table.get(self.cur_pos) { self.cur_pos = func.end_pos; - return Some(ExeItem::Func(func)); + return Some(ExeItem::Func { + func, + insts: ParseIter::new(&self.exe.bytes[cur_pos.as_mem_idx()..func.end_pos.as_mem_idx()], cur_pos), + }); } - // Else simply get an instruction - let mut iter = ParseIter::new(&self.exe.bytes[self.cur_pos.as_mem_idx()..], self.cur_pos); - let (pos, inst) = iter.next().expect("Iterator was empty before code ending"); - self.cur_pos = iter.cur_pos(); - Some(ExeItem::Inst(pos, inst)) + // Else return an iterator until the next data / function, or until end, if none or past the end. + let next_data = self.exe.data_table.range(cur_pos..).next(); + let next_func = self.exe.func_table.range(cur_pos..).next(); + + let end_pos = match (next_data, next_func) { + (Some(next_data), Some(next_func)) => match next_data.pos < next_func.start_pos { + true => next_data.pos, + false => next_func.start_pos, + }, + (Some(next_data), None) => next_data.pos, + (None, Some(next_func)) => next_func.start_pos, + (None, None) => Exe::MEM_END_ADDRESS, + }; + + // Make sure to limit the end position + let end_pos = end_pos.min(Exe::MEM_END_ADDRESS); + self.cur_pos = end_pos; + + + Some(ExeItem::Unknown { + insts: ParseIter::new(&self.exe.bytes[cur_pos.as_mem_idx()..end_pos.as_mem_idx()], cur_pos), + }) } } diff --git a/dcb-tools/src/decompiler/main.rs b/dcb-tools/src/decompiler/main.rs index 72bb3f2..546fba4 100644 --- a/dcb-tools/src/decompiler/main.rs +++ b/dcb-tools/src/decompiler/main.rs @@ -68,6 +68,8 @@ #![allow(clippy::else_if_without_else)] // We're usually fine with missing future variants #![allow(clippy::wildcard_enum_match_arm)] +// We're fine with it +#![allow(clippy::match_bool)] // Modules mod cli; @@ -75,8 +77,17 @@ mod cli; mod logger; // Imports +use std::fmt; + use anyhow::Context; -use dcb_exe::exe::inst::InstFmt; +use dcb_exe::{ + exe::{ + inst::{basic, pseudo, Inst, InstFmt}, + iter::ExeItem, + Func, + }, + Pos, +}; use dcb_io::GameFile; #[allow(clippy::cognitive_complexity, clippy::too_many_lines)] // TODO: Refactor @@ -97,78 +108,97 @@ fn main() -> Result<(), anyhow::Error> { println!("Header:\n{}", exe.header()); - for (pos, inst) in exe.parse_iter() { - println!("{}: {}", pos, inst.fmt_value(pos)); - } - - /* for item in exe.iter() { match item { // For each function or header, print a header and all it's instructions - ExeItem::Func(func) => { - println!("####################"); + ExeItem::Func { func, insts } => { + println!(); println!("{}:", func.name); if !func.signature.is_empty() { println!("# {}", func.signature); } for description in func.desc.lines() { - println!("# {}", description); + println!("# {description}"); } - /* - #[allow( - clippy::as_conversions, - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss - )] // TODO: Check if this is fine - let pos_width = ((func.end_pos - func.start_pos) as f64).log10() as usize; - for pos in (func.start_pos.0..func.end_pos.0).map(Pos) { - let inst = exe.get(pos).expect("Unable to get function instruction"); - println!( - "{:0width$}: {}", - SignedHex(pos - func.start_pos), - inst.fmt_value(pos, &*exe.bytes), - width = pos_width, - ); + for (pos, label) in &func.labels { + println!("# {pos}: .{label}"); + } + for (pos, inst) in insts { + // If there's a label, print it + if let Some(label) = func.labels.get(&pos) { + println!("\t.{label}:"); + } + + // Write the position + print!("{pos}: "); + + /// Looks up a function, data or label, if possible, else returns the position. + fn inst_target<'a>(exe: &'a dcb_exe::Exe, func: &'a Func, pos: Pos) -> impl fmt::Display + 'a { + dcb_util::DisplayWrapper::new(move |f| { + if let Some(label) = func.labels.get(&pos) { + return write!(f, "{}", label); + } + + if let Some(func) = exe.func_table().get(pos) { + return match func.start_pos == pos { + true => write!(f, "{}", func.name), + false => write!(f, "{}{:+#x}", func.name, pos - func.start_pos), + }; + } + + if let Some(data) = exe.data_table().get(pos) { + return match data.pos == pos { + true => write!(f, "{}", data.name), + false => write!(f, "{}{:+#x}", data.name, pos - data.pos), + }; + } + + write!(f, "{}", pos) + }) + } + + // If it's a jump, check if we can replace it with a label + #[rustfmt::skip] + match inst { + Inst::Basic (basic ::Inst::Cond (inst)) => print!("{}", inst.fmt_target(inst_target(&exe, func, inst.target(pos)))), + Inst::Basic (basic ::Inst::Jmp (inst)) => print!("{}", inst.fmt_value(pos)), + Inst::Basic (basic ::Inst::Load (inst)) => print!("{}", inst.fmt_value(pos)), + Inst::Basic (basic ::Inst::Store (inst)) => print!("{}", inst.fmt_value(pos)), + Inst::Pseudo(pseudo::Inst::LoadImm(inst)) => print!("{}", inst.fmt_value(pos)), + Inst::Pseudo(pseudo::Inst::Jmp (inst)) => print!("{}", inst.fmt_value(pos)), + Inst::Pseudo(pseudo::Inst::Load (inst)) => print!("{}", inst.fmt_value(pos)), + Inst::Pseudo(pseudo::Inst::Store (inst)) => print!("{}", inst.fmt_value(pos)), + inst => print!("{}", inst.fmt_value(pos)), + }; + + // If there's a comment, print it + if let Some(comment) = func.comments.get(&pos) { + print!(" # {comment}"); + } + + println!(); } - */ - println!("####################"); }, - ExeItem::Data(data) => { - println!("####################"); + ExeItem::Data { data, insts } => { + println!(); println!("{}:", data.name); for description in data.desc.lines() { - println!("# {}", description); + println!("# {description}"); } - /* - #[allow( - clippy::as_conversions, - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss - )] // TODO: Check if this is fine - let pos_width = f64::from(data.size()).log10() as usize; - for pos in (data.pos.0..data.end_pos().0).map(Pos) { - let inst = exe.get(pos).expect("Unable to get data instruction"); - println!( - "{:0width$}: {}", - SignedHex(pos - data.pos), - inst.fmt_value(pos, &*exe.bytes), - width = pos_width, - ); + for (pos, inst) in insts { + println!("{}: {}", pos, inst.fmt_value(pos)); } - */ - println!("####################"); }, // If it's standalone, print it by it's own - ExeItem::Inst(pos, inst) => { - println!("{}: {}", pos, inst.fmt_value(pos)); + ExeItem::Unknown { insts } => { + for (pos, inst) in insts { + println!("{pos}: {}", inst.fmt_value(pos)); + } }, } } - */ /* // Build the full instructions iterator