diff --git a/dcb-exe/src/func.rs b/dcb-exe/src/func.rs index 3924354..d038327 100644 --- a/dcb-exe/src/func.rs +++ b/dcb-exe/src/func.rs @@ -30,6 +30,10 @@ pub struct Func { #[serde(default)] pub desc: String, + /// Inline Comments + #[serde(default)] + pub inline_comments: BTreeMap, + /// Comments #[serde(default)] pub comments: BTreeMap, diff --git a/dcb-exe/src/func/table.rs b/dcb-exe/src/func/table.rs index 3b26cd0..54158e1 100644 --- a/dcb-exe/src/func/table.rs +++ b/dcb-exe/src/func/table.rs @@ -179,6 +179,7 @@ impl FuncTable { name: format!("func_{idx}"), signature: "fn()".to_owned(), desc: String::new(), + inline_comments: BTreeMap::new(), comments: BTreeMap::new(), labels, start_pos: func_pos, diff --git a/dcb-tools/dcb-compiler/src/main.rs b/dcb-tools/dcb-compiler/src/main.rs index 7896539..763a581 100644 --- a/dcb-tools/dcb-compiler/src/main.rs +++ b/dcb-tools/dcb-compiler/src/main.rs @@ -7,6 +7,8 @@ mod cli; // Imports use anyhow::Context; +use dcb_exe::inst::parse::InstParser; +use std::io::Write; fn main() -> Result<(), anyhow::Error> { // Initialize the logger @@ -17,10 +19,16 @@ fn main() -> Result<(), anyhow::Error> { let cli = cli::CliData::new(); // Open the input and output file - let _input_file = std::fs::File::open(&cli.input_path).context("Unable to open input file")?; - let _output_file = std::fs::File::open(&cli.output_file_path).context("Unable to open output file")?; + let input_file = std::fs::File::open(&cli.input_path).context("Unable to open input file")?; + let mut output_file = std::fs::File::create(&cli.output_file_path).context("Unable to open output file")?; - // Read the executable + // Read the input + let parser = InstParser::new(input_file); + + // For each instruction, output it + for inst in parser { + writeln!(output_file, "{:?}", inst).context("Unable to write to output file")?; + } /* let exe = Exe::new(); diff --git a/dcb-tools/dcb-decompiler/src/main.rs b/dcb-tools/dcb-decompiler/src/main.rs index 381116d..88c469d 100644 --- a/dcb-tools/dcb-decompiler/src/main.rs +++ b/dcb-tools/dcb-decompiler/src/main.rs @@ -1,6 +1,14 @@ //! Decompiler -#![feature(box_syntax, backtrace, panic_info_message, array_chunks, format_args_capture, bindings_after_at)] +#![feature( + box_syntax, + backtrace, + panic_info_message, + array_chunks, + format_args_capture, + bindings_after_at, + iter_map_while +)] // Modules mod cli; @@ -12,7 +20,7 @@ use dcb_exe::{ reader::iter::ExeItem, ExeReader, Func, Pos, }; -use std::fmt; +use std::{collections::BTreeMap, fmt}; fn main() -> Result<(), anyhow::Error> { // Initialize the logger @@ -33,10 +41,19 @@ fn main() -> Result<(), anyhow::Error> { println!("Header:\n{}", exe.header()); } + // Instruction buffers + let mut inst_buffers: BTreeMap = BTreeMap::new(); + + // If currently in an inline-comment alignment + let mut cur_inline_comment_alignment_max_inst_len: Option = None; + for item in exe.iter() { match item { // For each function or header, print a header and all it's instructions ExeItem::Func { func, insts } => { + // Drop any old instruction buffers + inst_buffers = inst_buffers.split_off(&func.start_pos); + println!("\n##########"); println!("{}:", func.name); if !func.signature.is_empty() { @@ -45,22 +62,91 @@ fn main() -> Result<(), anyhow::Error> { for description in func.desc.lines() { println!("# {description}"); } - for (pos, inst) in insts { + + let insts: Vec<_> = insts.collect(); + for (cur_n, (pos, inst)) in insts.iter().enumerate() { + // If there's a comment, print it + if let Some(comment) = func.comments.get(pos) { + // Iterate over the lines in the comment + for line in comment.lines() { + println!("# {line}"); + } + } + // If there's a label, print it - if let Some(label) = func.labels.get(&pos) { + if let Some(label) = func.labels.get(pos) { println!("\t.{label}:"); } + // If we don't have a comment, remove the current alignment + if !func.inline_comments.contains_key(&pos) { + cur_inline_comment_alignment_max_inst_len = None; + } + + // If we don't have any alignment padding, and this instruction and the next have inline comments, + // set the inline alignment + if cur_inline_comment_alignment_max_inst_len.is_none() && + func.inline_comments.contains_key(&pos) && + func.inline_comments.contains_key(&(pos + 4)) + { + let max_inst_len = (0..) + .map_while(|n| { + // If the next instruction doesn't have a comment, return + let offset = 4 * n; + let pos = pos + offset; + if !func.inline_comments.contains_key(&pos) { + return None; + } + + // Then build the instruction + let inst = &insts.get(cur_n + n)?.1; + let inst = inst_buffers + .entry(pos) + .or_insert_with(|| self::inst_display(inst, &exe, Some(func), pos).to_string()); + let inst_len = inst.len(); + + Some(inst_len) + }) + .max() + .expect("Next instruction had an inline comment"); + + cur_inline_comment_alignment_max_inst_len = Some(max_inst_len); + } + // Write the position if cli.print_inst_pos { print!("{pos}:"); } - // Write the instruction - print!("\t{}", self::inst_display(&inst, &exe, Some(func), pos)); + // If we have the instruction buffer, pop it and use it + match inst_buffers.get(&pos) { + Some(inst) => print!("\t{inst}"), + None => print!("\t{}", self::inst_display(&inst, &exe, Some(func), *pos)), + } + + // If there's an inline comment, print it + if let Some(comment) = func.inline_comments.get(&pos) { + // Replace any newlines with '\n' + let modified_comment; + let comment = match comment.contains('\n') { + true => { + modified_comment = comment.replace("\n", "\\n"); + &modified_comment + }, + false => comment, + }; + + // If we have alignment padding, apply it + if let Some(max_inst_len) = cur_inline_comment_alignment_max_inst_len { + let inst = inst_buffers + .get(&pos) + .expect("Instruction wasn't in buffer during inline comment alignment"); + let padding = max_inst_len - inst.len(); + for _ in 0..padding { + print!(" "); + } + } - // If there's a comment, print it - if let Some(comment) = func.comments.get(&pos) { print!(" # {comment}"); } diff --git a/resources/game_data.yaml b/resources/game_data.yaml index 411a1fd..89f9fed 100644 --- a/resources/game_data.yaml +++ b/resources/game_data.yaml @@ -84,6 +84,11 @@ pos: 0x80077a08 ty: u32 kind: Known +- name: cur_prng_value + desc: Current prng value + pos: 0x801ddc10 + ty: u32 + kind: Known - name: HeapStart desc: Start of the heap pos: 0x801ddf38 @@ -122,7 +127,10 @@ kind: Known - name: FuncList1 pos: 0x80070a88 - ty: u32 + ty: + Array: + ty: u32 + len: 8 kind: Known - name: FuncList1Ptr desc: Pointer to FuncList1 diff --git a/resources/game_funcs.yaml b/resources/game_funcs.yaml index 518e97c..4b75ea4 100644 --- a/resources/game_funcs.yaml +++ b/resources/game_funcs.yaml @@ -3,7 +3,7 @@ - name: start signature: fn() -> ! desc: Executable start - comments: + inline_comments: 0x80056280: Zero out ZeroStart .. HeapStart word by word. 0x80056284: ^ 0x80056288: ^ @@ -22,7 +22,7 @@ signature: fn() -> ! desc: Runs the main loop of the game. Never seems to return. - comments: + inline_comments: 0x80013e50: The return address is stored, but it seems it's never popped. 0x80013e54: Just sets `something1_executed` to 1. 0x80013e5c: Setup interrupts and DMA and possibly other stuff. @@ -43,7 +43,7 @@ desc: If something1_executed is 0, sets it to 1. Also calls `call_func_arr(something1_data2, something1_data2)` if 0, but this seems to be a nop. - comments: + inline_comments: 0x80056348: If *something1_executed != 0, skip 0x80056350: Else set it to 1. 0x80056368: "args: (something1_data2, something1_data2). Seems like a nop?" @@ -59,7 +59,7 @@ After running the game for a bit it doesn't seem to be triggered, all the way from the start of a new game to the battle arena fight. signature: "fn(start: *fn(), end: *fn())" - comments: + inline_comments: 0x800563a0: If `start >= end`, return 0x800563a8: Load the current function 0x800563b0: If it was null, skip the call. @@ -77,7 +77,7 @@ - name: something3 signature: fn() desc: Calls `something5`. - comments: + inline_comments: 0x80056604: "Loads FuncList1[3]" 0x8005660c: "Calls FuncList1[3] (i.e. something5)" start_pos: 0x800565f4 @@ -88,7 +88,7 @@ signature: fn() desc: Setups interrupt and DMA. If called again, simply returns. - comments: + inline_comments: 0x8005679c: Loads *something5_data1 0x800567a4: "If the loaded value is not zero, exit" 0x800567c0: Stop all interrupts by writing 0 to I_MASK @@ -111,7 +111,7 @@ - name: something6 signature: "fn(arg: u32)" desc: "" - comments: + inline_comments: 0x80056404: Get the gpu status from `GPU_STAT` 0x80056408: Read the horizontal retrace from timer1 0x80056418: Read the horizontal retrace again. @@ -142,7 +142,7 @@ # ? - name: something7 signature: "fn(a: u32, b: u32)" - comments: + inline_comments: 0x80056578: If something6_data1 >= a, return 0x8005659c: If (b << 15 - 1) != -1, goto try_again 0x800565dc: If something6_data1 < a, try again, else exit. @@ -174,7 +174,7 @@ - name: memset_zero signature: "fn(u32* ptr, u32 size)" desc: "Zeroes out the memory at `ptr` for `size` words.\n" - comments: + inline_comments: 0x80056c90: "If size == 0, return" 0x80056c94: size-- 0x80056c9c: "*ptr = 0" @@ -199,29 +199,72 @@ start_pos: 0x8006a6b0 end_pos: 0x8006a6fc -- name: main_loop_sub1 +- name: prng_next signature: fn() - desc: "Called on loop by `main_loop` forever, maybe an interrupt breaks - out of the loop, but not sure how this function is related to that.\n - Simply reads from 0x801ddc10, calculates `(value * 0x41c64e6d) as u32 + 0x3039`,\n - storing it back into 0x801ddc10, then returns `value << 10 | 0x7fff`, where `value` - is the new value, just stored back." + desc: |- + Advances the current prng, stored at `cur_prng_value`. + Returns `cur_prng_value << 0x10 | 0x7fff`. start_pos: 0x80069124 end_pos: 0x80069154 - name: something8 signature: "fn(a: u32, b: u32, c: u32, d: u32)" desc: "" - comments: + inline_comments: 0x80061910: "args: ($s1, $s0, c & 0xffff, d & 0xffff)" start_pos: 0x800618e4 end_pos: 0x80061954 +- name: modify_spu_delay1 + signature: "fn() -> u32" + desc: |- + Sets `SPU_DELAY` to `(SPU_DELAY & 0xf0fffff) | 0x2000ffff` + and returns the new value + inline_comments: + 0x8004b45c: "$v0 = *SPU_DELAY" + 0x8004b474: "*SPU_DELAY = $v0" + start_pos: 0x8004b428 + end_pos: 0x8004b450 + +- name: modify_spu_delay2 + signature: "fn() -> u32" + desc: |- + Sets `SPU_DELAY` to `(SPU_DELAY & 0xf0fffff) | 0x2200ffff` + and returns the new value + inline_comments: + 0x8004b45c: "$v0 = *SPU_DELAY" + 0x8004b474: "*SPU_DELAY = $v0" + start_pos: 0x8004b450 + end_pos: 0x8004b478 + +- name: calc_0x890e6fbd + signature: "fn() -> u32" + desc: |- + Simply calculates and returns `0x890e6fbd`. + inline_comments: + 0x8004b480: "$sp[0x4] = 0xd;" + 0x8004b488: "$sp[0x4] = 0x0;" + 0x8004b4a4: "$sp[0x4] *= 13;" + 0x8004b4b4: "$sp[0x0] += 1;" + 0x8004b4c4: "if $sp[0x0] <= 0x3c { goto .loop; }" + comments: + 0x8004b48c: |- + let value = 0xd; + for _ in 0..0x3c { + value *= 13; + } + return value; + labels: + 0x8004b48c: "loop" + 0x8004b4b8: "while_condition" + start_pos: 0x8004b478 + end_pos: 0x8004b4d4 + # A functions - name: InitHeap signature: "fn(addr: *u32, size: u32)" desc: Calls A(0x39) - comments: + inline_comments: 0x8006a738: Register tailcall. Likely to prevent calling in KSEG0 and do it in KUSEG 0x8006a73c: "arg: 0x39" start_pos: 0x8006a734 @@ -405,3 +448,297 @@ desc: Calls C(0x0a) start_pos: 0x8006a894 end_pos: 0x8006a8a0 + +# Std +- name: sprintf + signature: "fn sprintf(buffer: *char, format: *const char, ...) -> i32" + inline_comments: + 0x80069d6c: "if *buffer == '\\0' { goto.65; }" + 0x80069d84: "if *buffer == '%' { goto .58; }" + comments: + 0x80069d24: "Save arguments on wasted space." + 0x80069d30: |- + Reserve stack space and save all additional + registers there + labels: + 0x80069d84: "0" + 0x80069db4: "1" + 0x80069de8: "2" + 0x80069e04: "3" + 0x80069e14: "4" + 0x80069e30: "5" + 0x80069e4c: "6" + 0x80069e84: "7" + 0x80069e98: "8" + 0x80069ed4: "9" + 0x80069ee0: "10" + 0x80069f34: "11" + 0x80069f70: "12" + 0x80069f7c: "13" + 0x80069f9c: "14" + 0x80069fbc: "15" + 0x80069fc0: "16" + 0x80069fe4: "17" + 0x80069ff0: "18" + 0x80069FFC: "66" + 0x8006a008: "19" + 0x8006a028: "20" + 0x8006a058: "21" + 0x8006a06c: "22" + 0x8006a080: "23" + 0x8006a0b0: "24" + 0x8006a0b4: "25" + 0x8006a0f4: "26" + 0x8006a108: "27" + 0x8006a118: "28" + 0x8006a14c: "29" + 0x8006a164: "30" + 0x8006a180: "31" + 0x8006a1a0: "32" + 0x8006a1d0: "33" + 0x8006a1f4: "34" + 0x8006a208: "35" + 0x8006a210: "36" + 0x8006a22c: "37" + 0x8006a268: "38" + 0x8006a280: "39" + 0x8006a2a0: "40" + 0x8006a2b4: "41" + 0x8006a2c0: "42" + 0x8006a2c8: "43" + 0x8006a2f8: "44" + 0x8006a328: "45" + 0x8006a33c: "46" + 0x8006a344: "47" + 0x8006a364: "48" + 0x8006a380: "49" + 0x8006a39c: "50" + 0x8006a3cc: "51" + 0x8006a3ec: "52" + 0x8006a43c: "53" + 0x8006a458: "54" + 0x8006a478: "55" + 0x8006a4a4: "56" + 0x8006a4ac: "57" + 0x8006a4b8: "58" + 0x8006a4c4: "59" + 0x8006a4ec: "60" + 0x8006a514: "61" + 0x8006a518: "62" + 0x8006a534: "63" + 0x8006a550: "64" + 0x8006a570: "65" + start_pos: 0x80069d24 + end_pos: 0x8006a5a4 + +- name: memcpy_args_reversed + signature: "fn(src: *const u8, dst: *mut u8, len: u32) -> *mut u8" + desc: |- + `memcpy` with the first two arguments swapped. + inline_comments: + 0x80069044: "if src == NULL { return 0; }" + 0x80069048: "" + 0x8006904c: "if len <= 0 { return src; }" + comments: + 0x80069054: |- + do { + *dst = *src; + src += 1; + len -= 1; + dst += 1; + } while (len > 0) + labels: + 0x80069054: "loop" + 0x8006906c: "end" + 0x80069070: "on_null" + start_pos: 0x80069044 + end_pos: 0x80069078 + +- name: mem_zero + signature: "fn(dst: *mut u8, len: u32) -> *mut u8" + desc: |- + Zeroes out `len` bytes of `dst`. + If `len <= 0`, returns `NULL`. + Otherwise returns `dst`. + inline_comments: + 0x80069084: "if dst == NULL { return 0; }" + 0x80069088: "" + 0x8006908c: "" + 0x80069090: "if len > 0 { _ret = dst; }" + 0x80069094: "if len <= 0 { return 0; }" + comments: + 0x8006909c: |- + do { + *dst = 0; + len -= 1; + dst += 1; + } while (len > 0) + labels: + 0x8006909c: "loop" + 0x800690ac: "end" + start_pos: 0x80069084 + end_pos: 0x800690b4 + +- name: memcpy + signature: "fn(dst: *mut u8, src: *const u8, len: u32) -> *mut u8" + inline_comments: + 0x800690b4: "if ptr == NULL { return 0; }" + 0x800690b8: "" + 0x800690bc: "if len <= 0 { return 0; }" + comments: + 0x800690c4: |- + do { + *dst = *src; + src += 1; + len -= 1; + dst += 1; + } while (len > 0) + labels: + 0x800690c4: "loop" + 0x800690dc: "end" + 0x800690e0: "on_null" + start_pos: 0x800690b4 + end_pos: 0x800690e8 + +- name: memset + signature: "fn(dst: *mut u8, value: u32, len: u32) -> *mut u8" + inline_comments: + 0x800690f4: "if ptr == NULL { return 0; }" + 0x800690f8: "" + 0x800690fc: "" + 0x80069100: "if len > 0 { _ret = ptr; }" + 0x80069104: "if len <= 0 { return 0; }" + comments: + 0x8006910c: |- + do { + *dst = value; + len -= 1; + ptr += 1; + } while (len > 0) + labels: + 0x8006910c: "loop" + 0x8006911c: "end" + start_pos: 0x800690f4 + end_pos: 0x80069124 + +- name: strcat + signature: "fn(dst: *mut u8, src: *const u8) -> *mut u8" + desc: "" + inline_comments: + 0x8006917c: "if dst == NULL { return NULL; }" + 0x80069180: "" + 0x80069184: "if src == NULL { return NULL; }" + 0x8006918c: "let dst_len = strlen(dst);" + 0x80069190: "" + 0x80069194: "" + 0x80069198: "let src_len = strlen(src);" + 0x800691a4: "if dst + dst_len == src + src_len { return NULL; }" + 0x800691b4: "dst += 1; if *dst == 0 { goto go_back_1_dst; }" + 0x800691cc: "dst -= 1;" + 0x800691e8: "return dst;" + comments: + 0x800691ac: |- + Skips `dst` until it hits one-past '\\0'. + Then backs up one and begins appending `src` + 0x800691bc: |- + // Note: This causes `dst` to be one-past the null. + do { + let tmp = *dst; + dst += 1; + } while tmp != 0 + 0x800691d0: |- + // Note: The null is copied in this loop too. + do { + let tmp = *src; + *dst = src; + src += 1; + dst += 1; + } while tmp != 0 + labels: + 0x800691bc: "advance_dst_until_past_null" + 0x800691cc: "go_back_1_dst" + 0x800691d0: "loop" + 0x800691ec: "on_null" + 0x800691f0: "exit" + start_pos: 0x80069164 + end_pos: 0x80069208 + +- name: strlen + signature: "fn(ptr: *const u8) -> u32" + desc: "" + inline_comments: + 0x800692d8: "if ptr == NULL { return 0; }" + labels: + 0x800692e8: "increase_len" + 0x800692ec: "loop" + 0x80069300: "on_null" + start_pos: 0x800692d4 + end_pos: 0x80069308 + +# TODO: Investigate this more, seems weird +- name: strcmp + signature: "fn(lhs: *const u8, rhs: *const u8) -> u32" + desc: "" + inline_comments: + 0x80069214: "if lhs == NULL { goto .lhs_null; }" + 0x8006921c: "if rhs != NULL { goto .start_cmp; }" + 0x80069224: "if lhs == rhs { return 0; } // Same as `rhs == NULL`" + 0x80069228: "" + 0x8006922c: "if lhs == NULL { return -1; } // Always true?" + 0x80069234: "// Dead code?" + 0x8006925c: "rhs += 1;" + 0x80069248: "lhs += 1;" + 0x8006926c: "return lhs[0] - rhs[-1]; // Note: This is because we don't advance `lhs` when we don't take the branch" + comments: + 0x8006924c: |- + // Note: Includes the code from `.2` and `.1` + loop { + let left = *lhs; + let right = *rhs; + rhs += 1; + if left == right { + if *right == 0 { return 0; } + } + } + 0x8006926c: |- + // This is because we update `rhs` prematurely in the previous loop. + // Note: This returns the lexicographic order by comparing the last character. + return lhs[0] - rhs[-1]; + labels: + 0x80069224: "lhs_null" + 0x8006923c: "1" + 0x80069244: "2" + 0x8006924c: "start_cmp" + 0x80069270: "exit" + start_pos: 0x80069214 + end_pos: 0x80069278 + +- name: strcpy + signature: "fn(dst: *mut u8, src: *const u8) -> *mut u8" + desc: "" + inline_comments: + 0x80069284: "if dst == NULL { return NULL; }" + 0x80069288: "" + 0x8006928c: "if dst == NULL { return NULL; }" + 0x800692bc: "// Returns the original `lhs`" + comments: + 0x80069294: |- + let tmp = *rhs; + rhs += 1; + *lhs = tmp; + lhs += 1; + if tmp == 0 { return lhs; } + 0x800692a8: |- + do { + let tmp = *rhs; + rhs += 1; + *lhs = tmp; + lhs += 1; + if tmp == 0 { return lhs; } + } + labels: + 0x800692a8: "loop" + 0x800692bc: "end" + 0x800692c0: "on_null" + start_pos: 0x80069284 + end_pos: 0x800692c8