diff --git a/dcb-tools/src/decompiler/main.rs b/dcb-tools/src/decompiler/main.rs index f9a0f4e..cdc4339 100644 --- a/dcb-tools/src/decompiler/main.rs +++ b/dcb-tools/src/decompiler/main.rs @@ -139,15 +139,21 @@ fn main() -> Result<(), anyhow::Error> { .collect(); // Build the full instructions iterator + // TODO: Revamp this, iterate over an enum of `Func | Data | Other` let full_iter = functions .with_instructions(instructions.iter().map(|(pos, instruction)| (*pos, instruction))) .scan(None, |last_instruction, output @ (_, cur_instruction, _)| { Some((output, last_instruction.replace(cur_instruction))) - }); + }) + .map(|((cur_pos, instruction, cur_func), last_instruction)| (cur_pos, instruction, last_instruction, cur_func)) + .scan(None, |last_func, output @ (_, _, cur_func, _)| { + Some((output, last_func.replace(cur_func))) + }) + .map(|((cur_pos, instruction, last_instruction, cur_func), last_func)| (cur_pos, instruction, last_instruction, cur_func, last_func)); // Read all instructions let mut skipped_nops = 0; - for ((cur_pos, instruction, cur_func), last_instruction) in full_iter { + for (cur_pos, instruction, last_instruction, cur_func, last_func) in full_iter { // Note: Required by `rust-analyzer` currently, it can't determine the type of `cur_func`. let cur_func: Option<&dcb::game::exe::Func> = cur_func; @@ -164,13 +170,30 @@ fn main() -> Result<(), anyhow::Error> { skipped_nops = 0; } + // If we just exited a function, space it out. + /* + if last_func.is_some() && cur_func.is_none() { + println!("####################"); + println!(); + } + */ + + // Space out data if it had a name + if let Some(data) = data_pos.get(cur_pos) { + if data.end_pos() == cur_pos && !data.name.is_empty() { + println!(); + } + } + // Check if we need to prefix if let Some(cur_func) = cur_func { if cur_func.start_pos == cur_pos { println!(); println!("####################"); println!("{}:", cur_func.name); - println!("# {}\n#", cur_func.signature); + if !cur_func.signature.is_empty() { + println!("# {}", cur_func.signature); + } for description in cur_func.desc.lines() { println!("# {}", description); } @@ -180,13 +203,17 @@ fn main() -> Result<(), anyhow::Error> { } } if let Some(data) = data_pos.get(cur_pos) { - if data.start_pos() == cur_pos { - println!("\t{}:", data.name()); + if data.start_pos == cur_pos { + println!("{}:", data.name); + println!("# {}", data.kind); + for description in data.desc.lines() { + println!("# {}", description); + } } } // Print the instruction and it's location. - print!("{cur_pos:#010x}: "); + print!("{cur_pos:#010x}:\t"); match instruction { Instruction::Simple( SimpleInstruction::J { target } | @@ -230,7 +257,7 @@ fn main() -> Result<(), anyhow::Error> { ) => match functions .get(Pos(*target)) .map(|func| (func.start_pos, &func.name)) - .or_else(|| data_pos.get(Pos(*target)).map(|data| (data.start_pos(), data.name()))) + .or_else(|| data_pos.get(Pos(*target)).map(|data| (data.start_pos, &data.name))) { Some((start_pos, name)) => { if start_pos == Pos(*target) { @@ -254,8 +281,8 @@ fn main() -> Result<(), anyhow::Error> { print!(" # {}", func.name); } if let Some(data) = data_pos.get(Pos(*target)) { - if data.start_pos() == Pos(*target) { - print!(" # {}", data.name()); + if data.start_pos == Pos(*target) { + print!(" # {}", data.name); } } } @@ -269,15 +296,6 @@ fn main() -> Result<(), anyhow::Error> { // And finish the line println!(); - - // If this is the last instruction in this function, space it out - // TODO: This can fail when the last instruction is more than 4 bytes - if let Some(cur_func) = cur_func { - if cur_func.end_pos == cur_pos + 4 { - println!("####################"); - println!(); - } - } } Ok(()) diff --git a/dcb/Cargo.toml b/dcb/Cargo.toml index 154f72a..0c5a6d8 100644 --- a/dcb/Cargo.toml +++ b/dcb/Cargo.toml @@ -21,6 +21,7 @@ indoc = "1.0" bitmatch = "0.1" maplit = "1.0" either = "1.6" +smallvec = "1.4" # Serde serde = { version = "1.0", features = ["derive"] } diff --git a/dcb/src/game/exe/data.rs b/dcb/src/game/exe/data.rs index 2ee0049..f03863d 100644 --- a/dcb/src/game/exe/data.rs +++ b/dcb/src/game/exe/data.rs @@ -12,45 +12,87 @@ use crate::game::exe::Pos; /// Executable data #[derive(Clone, Debug)] #[derive(serde::Serialize, serde::Deserialize)] -pub enum Data> { - /// An ascii string - Ascii { - /// Name - name: S, +pub struct Data> { + /// Name + pub name: S, - /// Description - desc: S, + /// Description + pub desc: S, - /// Start position - start_pos: Pos, + /// Start position + pub start_pos: Pos, + + /// Data kind + pub kind: DataKind, +} + +impl> Data { + /// Returns the end position of this data + pub fn end_pos(&self) -> Pos { + self.start_pos + self.kind.size() + } +} + +/// Data kind +#[derive(Clone, Debug)] +#[derive(serde::Serialize, serde::Deserialize)] +#[derive(derive_more::Display)] +pub enum DataKind { + /// Ascii string + // TODO: Maybe somehow get rid of the length? + #[display(fmt = "str")] + AsciiStr { + /// String length + len: u32, }, - /// Bytes - Bytes { - /// Name - name: S, + /// Word + #[display(fmt = "u32")] + Word, - /// Description - desc: S, + /// Half-word + #[display(fmt = "u16")] + HalfWord, - /// Start position - start_pos: Pos, + /// Byte + #[display(fmt = "u8")] + Byte, + + /// Array + #[display(fmt = "[{ty}; {len}]")] + Array { + /// Array type + ty: Box, + + /// Array length + len: u32, }, } +impl DataKind { + /// Returns the size of this data kind + #[must_use] + pub fn size(&self) -> u32 { + match self { + Self::AsciiStr { len } => len + 4 - (len % 4), + Self::Word => 4, + Self::HalfWord => 2, + Self::Byte => 1, + Self::Array { ty, len } => ty.size() * len, + } + } +} + impl> std::borrow::Borrow for Data { fn borrow(&self) -> &Pos { - match self { - Self::Ascii { start_pos, .. } => start_pos, - Self::Bytes { start_pos, .. } => start_pos, - } + &self.start_pos } } impl> PartialEq for Data { fn eq(&self, other: &Self) -> bool { // Only compare the start position - self.start_pos().eq(&other.start_pos()) + self.start_pos.eq(&other.start_pos) } } @@ -58,7 +100,7 @@ impl> Eq for Data {} impl> std::hash::Hash for Data { fn hash(&self, state: &mut H) { - self.start_pos().hash(state); + self.start_pos.hash(state); } } @@ -71,143 +113,136 @@ impl> PartialOrd for Data { impl> Ord for Data { fn cmp(&self, other: &Self) -> std::cmp::Ordering { // Only compare the start position - self.start_pos().cmp(&other.start_pos()) + self.start_pos.cmp(&other.start_pos) } } -impl> Data { - /// Accesses the name of this data - pub fn name(&self) -> &S { - match self { - Self::Ascii { name, .. } => name, - Self::Bytes { name, .. } => name, - } - } - - /// Accesses the description of this data - pub fn desc(&self) -> &S { - match self { - Self::Ascii { desc, .. } => desc, - Self::Bytes { desc, .. } => desc, - } - } - - /// Accesses the start position of this data - pub fn start_pos(&self) -> Pos { - match self { - Self::Ascii { start_pos, .. } => *start_pos, - Self::Bytes { start_pos, .. } => *start_pos, - } - } -} - - impl Data<&'static str> { /// Returns an iterator of all known data #[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known data pub fn known() -> impl Iterator { std::array::IntoIter::new([ - Self::Bytes { + Self { name: "StackTop", desc: "Stack top address", start_pos: Pos(0x8006dd44), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "StackSize", desc: "Stack size", start_pos: Pos(0x8006dd48), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "ZeroStart", desc: "Start of the zero section in `start`", start_pos: Pos(0x80077a08), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "HeapStart", desc: "Start of the heap", start_pos: Pos(0x801ddf38), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "something1_data1", desc: "", start_pos: Pos(0x8006f984), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "something1_data2", desc: "", start_pos: Pos(0x80010000), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "something5_data1", desc: "", start_pos: Pos(0x8006fa20), + kind: DataKind::HalfWord, }, - Self::Bytes { + Self { name: "I_STAT_PTR", desc: "", start_pos: Pos(0x80070aac), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "I_MASK_PTR", desc: "", start_pos: Pos(0x80070ab0), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "DPCR_PTR", desc: "", start_pos: Pos(0x80070ab4), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "something5_data5", desc: "", start_pos: Pos(0x8006fa5c), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "FuncList1", desc: "", start_pos: Pos(0x80070a88), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "FuncList1Ptr", desc: "Pointer to FuncList1", start_pos: Pos(0x80070aa8), + kind: DataKind::Word, }, // Hardware registers // 0x1f80_1000 - 0x1f80_2fff - Self::Bytes { + Self { name: "I_STAT", desc: "Interrupt status register", start_pos: Pos(0x1f801070), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "I_MASK", desc: "Interrupt mask register", start_pos: Pos(0x1f801074), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "DPCR", desc: "DMA Control register", start_pos: Pos(0x1f8010f0), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "DICR", desc: "DMA Interrupt register", start_pos: Pos(0x1f8010f4), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "Timer0", desc: "", start_pos: Pos(0x1f801100), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "Timer1", desc: "", start_pos: Pos(0x1f801110), + kind: DataKind::Word, }, - Self::Bytes { + Self { name: "Timer2", desc: "", start_pos: Pos(0x1f801120), + kind: DataKind::Word, }, ]) } diff --git a/dcb/src/game/exe/data/all_data.rs b/dcb/src/game/exe/data/all_data.rs index 35c2574..e49f1c1 100644 --- a/dcb/src/game/exe/data/all_data.rs +++ b/dcb/src/game/exe/data/all_data.rs @@ -1,7 +1,7 @@ //! Data list // Imports -use super::Data; +use super::{Data, DataKind}; use crate::{ game::exe::{ instruction::{Directive, PseudoInstruction}, @@ -47,17 +47,11 @@ impl + Into> AllData { AllData( self.0 .into_iter() - .map(|data| match data { - Data::Ascii { name, desc, start_pos } => Data::Ascii { - name: name.into(), - desc: desc.into(), - start_pos, - }, - Data::Bytes { name, desc, start_pos } => Data::Bytes { - name: name.into(), - desc: desc.into(), - start_pos, - }, + .map(|data| Data { + name: data.name.into(), + desc: data.desc.into(), + start_pos: data.start_pos, + kind: data.kind, }) .collect(), ) @@ -110,18 +104,35 @@ impl AllData { _ => None, }) .zip(0..) - .map(|((pos, directive), idx)| match directive { - Directive::Ascii(_) => Data::Ascii { - name: format!("string_{idx}"), - desc: "".to_string(), - start_pos: pos, - }, + .map(|((pos, directive), idx)| { + #[allow(clippy::as_conversions, clippy::cast_possible_truncation)] // All strings will fit into a `u32` + match directive { + Directive::Ascii(ascii) => Data { + name: format!("string_{idx}"), + desc: String::new(), + start_pos: pos, + kind: DataKind::AsciiStr { len: ascii.len() as u32 }, + }, - Directive::Dw(_) => Data::Bytes { - name: format!("data_{idx}"), - desc: "".to_string(), - start_pos: pos, - }, + Directive::Dw(_) => Data { + name: format!("w{idx}"), + desc: String::new(), + start_pos: pos, + kind: DataKind::Word, + }, + Directive::Dh(_) => Data { + name: format!("h{idx}"), + desc: String::new(), + start_pos: pos, + kind: DataKind::HalfWord, + }, + Directive::Db(_) => Data { + name: format!("b{idx}"), + desc: String::new(), + start_pos: pos, + kind: DataKind::Byte, + }, + } }) .collect(), ) diff --git a/dcb/src/game/exe/func.rs b/dcb/src/game/exe/func.rs index 13d6295..9e22700 100644 --- a/dcb/src/game/exe/func.rs +++ b/dcb/src/game/exe/func.rs @@ -69,7 +69,7 @@ impl Func<&'static str> { std::array::IntoIter::new([ Self { name: "InitHeap", - signature: "fn(addr: *int, size: int)", + signature: "fn(addr: *u32, size: u32)", desc: "Calls A(0x39)", comments: hashmap! { Pos(0x8006a738) => "Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG", @@ -101,7 +101,7 @@ impl Func<&'static str> { }, Self { name: "something1", - signature: "fn(arg: int)", + signature: "fn(arg: u32)", desc: indoc! {" This function checks if *something1_data1 is positive, if so decreases it by 1 and calls call_func_arr with (something1_data2, something1_data2). @@ -138,7 +138,7 @@ impl Func<&'static str> { }, Self { name: "something2", - signature: "fn(start: *int)", + signature: "fn(start: *u32)", desc: "", comments: hashmap! { Pos(0x80013e54) => "args: (start)", @@ -209,7 +209,7 @@ impl Func<&'static str> { }, Self { name: "save_registers", - signature: "fn(int* pos)", + signature: "fn(u32* pos)", desc: indoc! {" Saves the following registers in `pos[0x0 .. 0x30]`. $ra, $gp, $sp, $fp, @@ -223,7 +223,7 @@ impl Func<&'static str> { }, Self { name: "memset_zero", - signature: "fn(int* ptr, int size)", + signature: "fn(u32* ptr, u32 size)", desc: indoc! {" Zeroes out the memory at `ptr` for `size` words. "}, diff --git a/dcb/src/game/exe/func/funcs.rs b/dcb/src/game/exe/func/funcs.rs index eac26e1..08e64d8 100644 --- a/dcb/src/game/exe/func/funcs.rs +++ b/dcb/src/game/exe/func/funcs.rs @@ -149,8 +149,8 @@ impl Funcs { .collect(); Func { name: format!("func_{idx}"), - signature: "".to_string(), - desc: "".to_string(), + signature: String::new(), + desc: String::new(), comments: hashmap! {}, labels, start_pos: target, diff --git a/dcb/src/game/exe/instruction/directive.rs b/dcb/src/game/exe/instruction/directive.rs index 085674c..12cd89a 100644 --- a/dcb/src/game/exe/instruction/directive.rs +++ b/dcb/src/game/exe/instruction/directive.rs @@ -4,6 +4,8 @@ use super::{FromRawIter, Instruction, Raw}; use crate::game::exe::Pos; use ascii::{AsciiChar, AsciiStr, AsciiString}; +use int_conv::Split; +use smallvec::{smallvec, SmallVec}; use std::ops::{ Bound::{self, Excluded, Included, Unbounded}, RangeBounds, @@ -18,17 +20,85 @@ pub enum Directive { #[display(fmt = "dw {_0:#x}")] Dw(u32), + /// Write half-word + #[display(fmt = "dh {_0:#x}")] + Dh(u16), + + /// Write byte + #[display(fmt = "db {_0:#x}")] + Db(u8), + /// Ascii string #[display(fmt = ".ascii {_0:?}")] Ascii(AsciiString), } +/// A force decode range +pub struct ForceDecodeRange { + /// Start bound + start: Bound, + + /// End bound + end: Bound, + + /// Decoding kind + kind: ForceDecodeKind, +} + +impl RangeBounds for ForceDecodeRange { + fn start_bound(&self) -> Bound<&Pos> { + match self.start { + Included(ref start) => Included(start), + Excluded(ref start) => Excluded(start), + Unbounded => Unbounded, + } + } + + fn end_bound(&self) -> Bound<&Pos> { + match self.end { + Included(ref end) => Included(end), + Excluded(ref end) => Excluded(end), + Unbounded => Unbounded, + } + } +} + +/// Force decode range kind +pub enum ForceDecodeKind { + /// Single Word + W, + + /// Two half-words + HH, + + /// Half-word followed by bytes + HBB, + + /// Bytes followed by half-word + BBH, + + /// Bytes + BBBB, +} + impl Directive { - /// All range of positions that should be force decoded - /// as `dw`. - pub const FORCE_DW_RANGES: &'static [(Bound, Bound)] = &[ - (Included(Pos(0x80010000)), Excluded(Pos(0x80010008))), - (Included(Instruction::CODE_END), Unbounded), + /// Positions that should be force decoded using a specific variant. + pub const FORCE_DECODE_RANGES: &'static [ForceDecodeRange] = &[ + ForceDecodeRange { + start: Included(Pos(0x80010000)), + end: Excluded(Pos(0x80010008)), + kind: ForceDecodeKind::W, + }, + ForceDecodeRange { + start: Included(Pos(0x8006fa20)), + end: Excluded(Pos(0x8006fa24)), + kind: ForceDecodeKind::HH, + }, + ForceDecodeRange { + start: Included(Instruction::CODE_END), + end: Unbounded, + kind: ForceDecodeKind::W, + }, ]; /// Returns the size of this instruction @@ -37,7 +107,14 @@ impl Directive { #[allow(clippy::as_conversions, clippy::cast_possible_truncation)] // Our length will always fit into a `u32`. match self { Self::Dw(_) => 4, - Self::Ascii(ascii) => 4 * (ascii.len() as u32), + Self::Dh(_) => 2, + Self::Db(_) => 1, + // Round ascii strings' len up to the + // nearest word. + Self::Ascii(ascii) => { + let len = ascii.len() as u32; + len + 4 - (len % 4) + }, } } } @@ -57,18 +134,50 @@ fn check_nulls>(s: S) -> (S, usize, bool) { (s, null_idx, uniform_null) } - impl FromRawIter for Directive { - type Decoded = Option<(Pos, Self)>; + //type Decoded = Option<(Pos, Self)>; + // Note: We return at most 4 directives. + type Decoded = SmallVec<[(Pos, Self); 4]>; fn decode + Clone>(iter: &mut I) -> Self::Decoded { // Get the first raw - let raw = iter.next()?; + let raw = match iter.next() { + Some(raw) => raw, + None => return smallvec![], + }; // If we're past all the code, there are no more strings, // so just decode a `dw`. - if Self::FORCE_DW_RANGES.iter().any(|range| range.contains(&raw.pos)) { - return Some((raw.pos, Self::Dw(raw.repr))); + // Note: We're working in big endian when returning these. + if let Some(ForceDecodeRange { kind, .. }) = Self::FORCE_DECODE_RANGES.iter().find(|range| range.contains(&raw.pos)) { + return match kind { + ForceDecodeKind::W => smallvec![(raw.pos, Self::Dw(raw.repr))], + ForceDecodeKind::HH => { + let (lo, hi) = raw.repr.lo_hi(); + smallvec![(raw.pos, Self::Dh(hi)), (raw.pos + 2, Self::Dh(lo))] + }, + ForceDecodeKind::HBB => { + let (lo, hi) = raw.repr.lo_hi(); + let (lo_lo, lo_hi) = lo.lo_hi(); + smallvec![(raw.pos, Self::Dh(hi)), (raw.pos + 2, Self::Db(lo_hi)), (raw.pos + 3, Self::Db(lo_lo))] + }, + ForceDecodeKind::BBH => { + let (lo, hi) = raw.repr.lo_hi(); + let (hi_lo, hi_hi) = hi.lo_hi(); + smallvec![(raw.pos, Self::Db(hi_hi)), (raw.pos + 1, Self::Db(hi_lo)), (raw.pos + 2, Self::Dh(lo))] + }, + ForceDecodeKind::BBBB => { + let (lo, hi) = raw.repr.lo_hi(); + let (lo_lo, lo_hi) = lo.lo_hi(); + let (hi_lo, hi_hi) = hi.lo_hi(); + smallvec![ + (raw.pos, Self::Db(hi_hi)), + (raw.pos + 1, Self::Db(hi_lo)), + (raw.pos + 2, Self::Db(lo_hi)), + (raw.pos + 3, Self::Db(lo_lo)) + ] + }, + }; } // Try to get an ascii string from the raw and check for nulls @@ -77,7 +186,7 @@ impl FromRawIter for Directive { // at least 1 null and uniformly null, return just it Ok((mut ascii_string, null_idx @ 1..=3, true)) => { ascii_string.truncate(null_idx); - Some((raw.pos, Self::Ascii(ascii_string))) + smallvec![(raw.pos, Self::Ascii(ascii_string))] }, // If we got a string without any nulls, keep @@ -115,12 +224,12 @@ impl FromRawIter for Directive { } }; - Some((raw.pos, Self::Ascii(ascii_string))) + smallvec![(raw.pos, Self::Ascii(ascii_string))] }, // Else if it was full null, non-uniformly null or non-ascii, - // try to get a dw table - _ => Some((raw.pos, Self::Dw(raw.repr))), + // just return a normal word. + _ => smallvec![(raw.pos, Self::Dw(raw.repr))], } } } diff --git a/dcb/src/lib.rs b/dcb/src/lib.rs index d245ad6..28466f8 100644 --- a/dcb/src/lib.rs +++ b/dcb/src/lib.rs @@ -50,7 +50,8 @@ const_assume, bindings_after_at, array_value_iter, - or_patterns + or_patterns, + once_cell )] // Lints #![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]