Improved inst::iter.

The decompiler now successfully goes through functions, data and others.
This commit is contained in:
2021-01-12 01:17:02 +00:00
parent 267cfd1cd7
commit 220d0068a6
7 changed files with 212 additions and 118 deletions

View File

@@ -70,6 +70,18 @@ impl Exe {
&*self.bytes
}
/// Returns this executable's data table
#[must_use]
pub const fn data_table(&self) -> &DataTable {
&self.data_table
}
/// Returns this executable's func table
#[must_use]
pub const fn func_table(&self) -> &FuncTable {
&self.func_table
}
/// Creates an iterator over this executable
#[must_use]
pub const fn iter(&self) -> iter::Iter {
@@ -81,12 +93,6 @@ impl Exe {
pub const fn parse_iter(&self) -> inst::ParseIter {
inst::ParseIter::new(&*self.bytes, Self::MEM_START_ADDRESS)
}
/// Returns the instruction at `pos`
#[must_use]
pub fn get(&self, pos: Pos) -> Option<inst::Inst> {
inst::ParseIter::new(&*self.bytes, pos).next().map(|(_, inst)| inst)
}
}
impl Exe {

View File

@@ -19,11 +19,11 @@ use int_conv::SignExtended;
// Imports
use super::{Data, DataType};
use crate::exe::{
inst::{self, basic, Inst},
inst::{self, basic, pseudo, Inst},
Pos,
};
use dcb_util::DiscardingSortedMergeIter;
use std::{collections::BTreeSet, fs::File, iter::FromIterator};
use std::{collections::BTreeSet, fs::File, iter::FromIterator, ops::RangeBounds};
/// Data table
///
@@ -68,7 +68,13 @@ impl DataTable {
// Find the closest one and check if it contains `pos`
// Note: We search from the end to make sure we grab the
// smaller locations first.
self.0.range(..=pos).next_back().filter(|data| pos < data.end_pos())
self.range(..=pos).next_back().filter(|data| pos < data.end_pos())
}
/// Returns a range of data
#[must_use]
pub fn range(&self, range: impl RangeBounds<Pos>) -> impl DoubleEndedIterator<Item = &Data> + Clone {
self.0.range(range)
}
}
@@ -91,24 +97,14 @@ impl DataTable {
Inst::Basic(basic::Inst::Load(basic::load::Inst { offset, .. }) | basic::Inst::Store(basic::store::Inst { offset, .. })) => {
Some(pos + offset.sign_extended::<i32>())
},
/*
Instruction::Pseudo(
PseudoInst::La { target: offset, .. } |
PseudoInst::Li32 { imm: offset, .. } |
PseudoInst::LbImm { offset, .. } |
PseudoInst::LbuImm { offset, .. } |
PseudoInst::LhImm { offset, .. } |
PseudoInst::LhuImm { offset, .. } |
PseudoInst::LwlImm { offset, .. } |
PseudoInst::LwImm { offset, .. } |
PseudoInst::LwrImm { offset, .. } |
PseudoInst::SbImm { offset, .. } |
PseudoInst::ShImm { offset, .. } |
PseudoInst::SwlImm { offset, .. } |
PseudoInst::SwImm { offset, .. } |
PseudoInst::SwrImm { offset, .. },
Inst::Pseudo(
pseudo::Inst::LoadImm(pseudo::load_imm::Inst {
kind: pseudo::load_imm::Kind::Address(Pos(address)) | pseudo::load_imm::Kind::Word(address),
..
}) |
pseudo::Inst::Load(pseudo::load::Inst { target: Pos(address), .. }) |
pseudo::Inst::Store(pseudo::store::Inst { target: Pos(address), .. }),
) |
*/
Inst::Directive(Directive::Dw(address)) => Some(Pos(address)),
_ => None,
})

View File

@@ -14,7 +14,6 @@ pub mod error;
// Exports
pub use error::GetKnownError;
use int_conv::SignExtended;
//pub use iter::WithInstructionsIter;
// Imports
@@ -28,6 +27,7 @@ use std::{
collections::{BTreeMap, BTreeSet},
fs::File,
iter::FromIterator,
ops::RangeBounds,
};
/// Function table
@@ -69,7 +69,13 @@ impl FuncTable {
#[must_use]
pub fn get(&self, pos: Pos) -> Option<&Func> {
// Note: As we're sorted, we can binary search
self.0.range(..=pos).next_back().filter(|func| pos < func.end_pos)
self.range(..=pos).next_back().filter(|func| pos < func.end_pos)
}
/// Returns a range of functions
#[must_use]
pub fn range(&self, range: impl RangeBounds<Pos>) -> impl DoubleEndedIterator<Item = &Func> + Clone {
self.0.range(range)
}
/*
@@ -137,7 +143,7 @@ impl FuncTable {
},
))) => Some(inst.target(pos)),
// Conditional jumps
Inst::Basic(basic::Inst::Cond(basic::cond::Inst { offset, .. })) => Some(pos + offset.sign_extended::<i32>()),
Inst::Basic(basic::Inst::Cond(inst)) => Some(inst.target(pos)),
_ => None,
})
.filter(|target| Inst::CODE_RANGE.contains(target))
@@ -146,7 +152,7 @@ impl FuncTable {
// Now check every `Jal` and `Dw` for possible function entrances
let function_entries: BTreeSet<Pos> = insts
.filter_map(|(pos, inst)| match inst {
// `jr`
// `jar`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm(
inst @ basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::JumpLink,

View File

@@ -95,7 +95,7 @@ impl Inst {
/// Returns the target using an offset
#[must_use]
pub fn target_of(offset: i16, pos: Pos) -> Pos {
pos + 4 * offset.sign_extended::<i32>()
pos + 4 * (offset.sign_extended::<i32>() + 1)
}
}
@@ -149,18 +149,25 @@ impl Encodable for Inst {
impl InstFmt for Inst {
fn fmt(&self, pos: Pos, f: &mut fmt::Formatter) -> fmt::Result {
let Self { kind, arg, .. } = self;
let mnemonic = kind.mnemonic();
let target = self.target(pos);
match kind {
Kind::Equal(reg) | Kind::NotEqual(reg) => write!(f, "{mnemonic} {arg}, {reg}, {target}"),
Kind::LessOrEqualZero |
Kind::GreaterThanZero |
Kind::LessThanZero |
Kind::GreaterOrEqualZero |
Kind::LessThanZeroLink |
Kind::GreaterOrEqualZeroLink => write!(f, "{mnemonic} {arg}, {target}"),
}
write!(f, "{}", self.fmt_target(self.target(pos)))
}
}
impl Inst {
/// Returns a formattable for this instruction using `target` as it's target.
pub fn fmt_target<'a>(self, target: impl fmt::Display + 'a) -> impl fmt::Display + 'a {
dcb_util::DisplayWrapper::new(move |f| {
let Self { kind, arg, .. } = self;
let mnemonic = kind.mnemonic();
match kind {
Kind::Equal(reg) | Kind::NotEqual(reg) => write!(f, "{mnemonic} {arg}, {reg}, {target}"),
Kind::LessOrEqualZero |
Kind::GreaterThanZero |
Kind::LessThanZero |
Kind::GreaterOrEqualZero |
Kind::LessThanZeroLink |
Kind::GreaterOrEqualZeroLink => write!(f, "{mnemonic} {arg}, {target}"),
}
})
}
}

View File

@@ -2,7 +2,10 @@
// Imports
use super::Decodable;
use crate::exe::inst::{basic, InstFmt, InstSize, Register};
use crate::{
exe::inst::{basic, InstFmt, InstSize, Register},
Pos,
};
use dcb_util::SignedHex;
use int_conv::{Join, SignExtended, Signed};
use std::convert::TryInto;
@@ -14,7 +17,7 @@ pub enum Kind {
/// Address
///
/// Alias for `lui $dst, {hi} / addiu $dst, $dst, {lo}`
Address(u32),
Address(Pos),
/// Word
///
@@ -47,7 +50,7 @@ impl Kind {
#[must_use]
pub fn value_fmt(self) -> impl std::fmt::Display {
dcb_util::DisplayWrapper::new(move |f| match self {
Self::Address(address) => write!(f, "{address:#x}"),
Self::Address(address) => write!(f, "{address}"),
Self::Word(value) => write!(f, "{value:#x}"),
Self::HalfWordUnsigned(value) => write!(f, "{value:#x}"),
Self::HalfWordSigned(value) => write!(f, "{:#}", SignedHex(value)),
@@ -76,7 +79,7 @@ impl Decodable for Inst {
dst: lui.dst,
kind: match alu.kind {
// lui << 16 + rhs
AddUnsigned(rhs) => Kind::Address((u32::join(0, lui.value).as_signed() + rhs.sign_extended::<i32>()).as_unsigned()),
AddUnsigned(rhs) => Kind::Address(Pos((u32::join(0, lui.value).as_signed() + rhs.sign_extended::<i32>()).as_unsigned())),
Or(rhs) => Kind::Word(u32::join(rhs, lui.value)),
_ => return None,
},

View File

@@ -1,13 +1,11 @@
//! Executable iterator
// Imports
use super::{
inst::{Inst, ParseIter},
Data, Func,
};
use super::{inst::ParseIter, Data, Func};
use crate::{Exe, Pos};
/// Iterator over executable parts
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Iter<'a> {
/// Executable
exe: &'a Exe,
@@ -27,15 +25,31 @@ impl<'a> Iter<'a> {
}
/// An executable item
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum ExeItem<'a> {
/// A function
Func(&'a Func),
Func {
/// The function metadata
func: &'a Func,
/// The instructions for this function
insts: ParseIter<'a>,
},
/// A data
Data(&'a Data),
Data {
/// The data metadata
data: &'a Data,
/// Instruction
Inst(Pos, Inst<'a>),
/// The instructions for this data
insts: ParseIter<'a>,
},
/// Unknown
Unknown {
/// Instruction in this unknown section
insts: ParseIter<'a>,
},
}
impl<'a> Iterator for Iter<'a> {
@@ -43,27 +57,59 @@ impl<'a> Iterator for Iter<'a> {
fn next(&mut self) -> Option<Self::Item> {
// If we're at the end, return `None`
if self.cur_pos == Exe::MEM_END_ADDRESS {
let cur_pos = self.cur_pos;
if cur_pos >= Exe::MEM_END_ADDRESS {
return None;
}
// Try to get data
// TODO: Not skip over small data somehow? Maybe just remove the ability to have overlapping data sections
if let Some(data) = self.exe.data_table.get(self.cur_pos) {
self.cur_pos = data.end_pos();
return Some(ExeItem::Data(data));
if let Some(data) = self.exe.data_table.get(cur_pos) {
// Check the next data for our next position that isn't equal to our current one
let end_pos = match self.exe.data_table.range(cur_pos..).find(|next_data| next_data.pos != data.pos) {
// If it ends before or at the end of this data, use it
Some(next_data) if next_data.pos <= data.end_pos() => next_data.pos,
// Else end at the end of this data
_ => data.end_pos(),
};
self.cur_pos = end_pos;
return Some(ExeItem::Data {
data,
insts: ParseIter::new(&self.exe.bytes[cur_pos.as_mem_idx()..end_pos.as_mem_idx()], cur_pos),
});
}
// Else try to get a function
if let Some(func) = self.exe.func_table.get(self.cur_pos) {
self.cur_pos = func.end_pos;
return Some(ExeItem::Func(func));
return Some(ExeItem::Func {
func,
insts: ParseIter::new(&self.exe.bytes[cur_pos.as_mem_idx()..func.end_pos.as_mem_idx()], cur_pos),
});
}
// Else simply get an instruction
let mut iter = ParseIter::new(&self.exe.bytes[self.cur_pos.as_mem_idx()..], self.cur_pos);
let (pos, inst) = iter.next().expect("Iterator was empty before code ending");
self.cur_pos = iter.cur_pos();
Some(ExeItem::Inst(pos, inst))
// Else return an iterator until the next data / function, or until end, if none or past the end.
let next_data = self.exe.data_table.range(cur_pos..).next();
let next_func = self.exe.func_table.range(cur_pos..).next();
let end_pos = match (next_data, next_func) {
(Some(next_data), Some(next_func)) => match next_data.pos < next_func.start_pos {
true => next_data.pos,
false => next_func.start_pos,
},
(Some(next_data), None) => next_data.pos,
(None, Some(next_func)) => next_func.start_pos,
(None, None) => Exe::MEM_END_ADDRESS,
};
// Make sure to limit the end position
let end_pos = end_pos.min(Exe::MEM_END_ADDRESS);
self.cur_pos = end_pos;
Some(ExeItem::Unknown {
insts: ParseIter::new(&self.exe.bytes[cur_pos.as_mem_idx()..end_pos.as_mem_idx()], cur_pos),
})
}
}

View File

@@ -68,6 +68,8 @@
#![allow(clippy::else_if_without_else)]
// We're usually fine with missing future variants
#![allow(clippy::wildcard_enum_match_arm)]
// We're fine with it
#![allow(clippy::match_bool)]
// Modules
mod cli;
@@ -75,8 +77,17 @@ mod cli;
mod logger;
// Imports
use std::fmt;
use anyhow::Context;
use dcb_exe::exe::inst::InstFmt;
use dcb_exe::{
exe::{
inst::{basic, pseudo, Inst, InstFmt},
iter::ExeItem,
Func,
},
Pos,
};
use dcb_io::GameFile;
#[allow(clippy::cognitive_complexity, clippy::too_many_lines)] // TODO: Refactor
@@ -97,78 +108,97 @@ fn main() -> Result<(), anyhow::Error> {
println!("Header:\n{}", exe.header());
for (pos, inst) in exe.parse_iter() {
println!("{}: {}", pos, inst.fmt_value(pos));
}
/*
for item in exe.iter() {
match item {
// For each function or header, print a header and all it's instructions
ExeItem::Func(func) => {
println!("####################");
ExeItem::Func { func, insts } => {
println!();
println!("{}:", func.name);
if !func.signature.is_empty() {
println!("# {}", func.signature);
}
for description in func.desc.lines() {
println!("# {}", description);
println!("# {description}");
}
/*
#[allow(
clippy::as_conversions,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)] // TODO: Check if this is fine
let pos_width = ((func.end_pos - func.start_pos) as f64).log10() as usize;
for pos in (func.start_pos.0..func.end_pos.0).map(Pos) {
let inst = exe.get(pos).expect("Unable to get function instruction");
println!(
"{:0width$}: {}",
SignedHex(pos - func.start_pos),
inst.fmt_value(pos, &*exe.bytes),
width = pos_width,
);
for (pos, label) in &func.labels {
println!("# {pos}: .{label}");
}
for (pos, inst) in insts {
// If there's a label, print it
if let Some(label) = func.labels.get(&pos) {
println!("\t.{label}:");
}
// Write the position
print!("{pos}: ");
/// Looks up a function, data or label, if possible, else returns the position.
fn inst_target<'a>(exe: &'a dcb_exe::Exe, func: &'a Func, pos: Pos) -> impl fmt::Display + 'a {
dcb_util::DisplayWrapper::new(move |f| {
if let Some(label) = func.labels.get(&pos) {
return write!(f, "{}", label);
}
if let Some(func) = exe.func_table().get(pos) {
return match func.start_pos == pos {
true => write!(f, "{}", func.name),
false => write!(f, "{}{:+#x}", func.name, pos - func.start_pos),
};
}
if let Some(data) = exe.data_table().get(pos) {
return match data.pos == pos {
true => write!(f, "{}", data.name),
false => write!(f, "{}{:+#x}", data.name, pos - data.pos),
};
}
write!(f, "{}", pos)
})
}
// If it's a jump, check if we can replace it with a label
#[rustfmt::skip]
match inst {
Inst::Basic (basic ::Inst::Cond (inst)) => print!("{}", inst.fmt_target(inst_target(&exe, func, inst.target(pos)))),
Inst::Basic (basic ::Inst::Jmp (inst)) => print!("{}", inst.fmt_value(pos)),
Inst::Basic (basic ::Inst::Load (inst)) => print!("{}", inst.fmt_value(pos)),
Inst::Basic (basic ::Inst::Store (inst)) => print!("{}", inst.fmt_value(pos)),
Inst::Pseudo(pseudo::Inst::LoadImm(inst)) => print!("{}", inst.fmt_value(pos)),
Inst::Pseudo(pseudo::Inst::Jmp (inst)) => print!("{}", inst.fmt_value(pos)),
Inst::Pseudo(pseudo::Inst::Load (inst)) => print!("{}", inst.fmt_value(pos)),
Inst::Pseudo(pseudo::Inst::Store (inst)) => print!("{}", inst.fmt_value(pos)),
inst => print!("{}", inst.fmt_value(pos)),
};
// If there's a comment, print it
if let Some(comment) = func.comments.get(&pos) {
print!(" # {comment}");
}
println!();
}
*/
println!("####################");
},
ExeItem::Data(data) => {
println!("####################");
ExeItem::Data { data, insts } => {
println!();
println!("{}:", data.name);
for description in data.desc.lines() {
println!("# {}", description);
println!("# {description}");
}
/*
#[allow(
clippy::as_conversions,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)] // TODO: Check if this is fine
let pos_width = f64::from(data.size()).log10() as usize;
for pos in (data.pos.0..data.end_pos().0).map(Pos) {
let inst = exe.get(pos).expect("Unable to get data instruction");
println!(
"{:0width$}: {}",
SignedHex(pos - data.pos),
inst.fmt_value(pos, &*exe.bytes),
width = pos_width,
);
for (pos, inst) in insts {
println!("{}: {}", pos, inst.fmt_value(pos));
}
*/
println!("####################");
},
// If it's standalone, print it by it's own
ExeItem::Inst(pos, inst) => {
println!("{}: {}", pos, inst.fmt_value(pos));
ExeItem::Unknown { insts } => {
for (pos, inst) in insts {
println!("{pos}: {}", inst.fmt_value(pos));
}
},
}
}
*/
/*
// Build the full instructions iterator