From 3eed1e930a7bf9cec301df9e558bdae856ec8f63 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Thu, 29 Apr 2021 16:36:27 +0100 Subject: [PATCH] Moved known/foreign data/func loading into the executable. Slightly revised the data / func table interface. --- dcb-exe/src/data/table.rs | 69 ++++++---- dcb-exe/src/func.rs | 156 ++++++++++++++++++++- dcb-exe/src/func/table.rs | 197 +++++---------------------- dcb-exe/src/lib.rs | 3 +- dcb-exe/src/pos.rs | 3 + dcb-exe/src/reader.rs | 141 +++++++++---------- dcb-exe/src/reader/error.rs | 30 +--- dcb-exe/src/reader/iter.rs | 6 +- dcb-exe/src/reader/opts.rs | 14 ++ dcb-tools/dcb-decompiler/src/main.rs | 64 +++++++-- 10 files changed, 366 insertions(+), 317 deletions(-) create mode 100644 dcb-exe/src/reader/opts.rs diff --git a/dcb-exe/src/data/table.rs b/dcb-exe/src/data/table.rs index 5051e96..7d74fe0 100644 --- a/dcb-exe/src/data/table.rs +++ b/dcb-exe/src/data/table.rs @@ -15,7 +15,7 @@ pub use node::DataNode; // Imports use super::{Data, DataKind}; use crate::Pos; -use std::fmt; +use std::{fmt, iter::FromIterator}; /// Data table /// @@ -46,39 +46,11 @@ pub struct DataTable { impl DataTable { /// Creates an empty data table #[must_use] - pub fn empty() -> Self { + pub fn new() -> Self { let root = DataNode::new(Data::dummy()); Self { root } } - /// Creates a data table from data locations - pub fn new(data: impl IntoIterator) -> Self { - let mut table = Self::empty(); - table.extend(data); - table - } - - /// Extends this data table with data locations. - /// - /// Any data that cannot be inserted is discarded, see [`DataNode::insert`] for - /// more information. - pub fn extend(&mut self, data: impl IntoIterator) { - for data in data { - // Try to insert and log if we get an error. - if let Err(err) = self.root.insert(data) { - let log_level = match err.data().kind() { - DataKind::Known | DataKind::Foreign => log::Level::Warn, - DataKind::Heuristics => log::Level::Trace, - }; - log::log!( - log_level, - "Unable to add data:\n{:#}", - dcb_util::DisplayWrapper::new(|f| dcb_util::fmt_err(&err, f)) - ); - } - } - } - /// Retrieves the smallest data location containing `pos` #[must_use] pub fn get_containing(&self, pos: Pos) -> Option<&Data> { @@ -130,6 +102,43 @@ impl DataTable { } } +impl Default for DataTable { + fn default() -> Self { + Self::new() + } +} + +impl Extend for DataTable { + fn extend>(&mut self, data: T) { + for data in data { + self.extend_one(data); + } + } + + fn extend_one(&mut self, data: Data) { + // Try to insert and log if we get an error. + if let Err(err) = self.root.insert(data) { + let log_level = match err.data().kind() { + DataKind::Known | DataKind::Foreign => log::Level::Warn, + DataKind::Heuristics => log::Level::Trace, + }; + log::log!( + log_level, + "Unable to add data:\n{:#}", + dcb_util::DisplayWrapper::new(|f| dcb_util::fmt_err(&err, f)) + ); + } + } +} + +impl FromIterator for DataTable { + fn from_iter>(data: T) -> Self { + let mut table = Self::new(); + table.extend(data); + table + } +} + impl fmt::Display for DataTable { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for node in self.root.nodes() { diff --git a/dcb-exe/src/func.rs b/dcb-exe/src/func.rs index d038327..c73baa8 100644 --- a/dcb-exe/src/func.rs +++ b/dcb-exe/src/func.rs @@ -12,8 +12,15 @@ pub mod table; pub use table::FuncTable; // Imports -use crate::Pos; -use std::{borrow::Borrow, collections::BTreeMap}; +use crate::{ + inst::{basic, Directive, Inst, Register}, + DataTable, Pos, +}; +use std::{ + borrow::Borrow, + collections::{BTreeMap, BTreeSet}, + ops::{Bound, Range}, +}; /// A function within the executable #[derive(Clone, Debug)] @@ -57,6 +64,151 @@ impl Func { } } +impl Func { + /// Creates a new list of functions from an iterator over insts + #[must_use] + #[allow(clippy::too_many_lines)] // TODO: Refactor + pub fn search_instructions<'a>( + insts_range: Range, insts: impl Iterator)> + Clone, func_table: Option<&FuncTable>, + data_table: Option<&DataTable>, + ) -> BTreeSet { + // Get all returns + let returns: BTreeSet = insts + .clone() + .filter_map(|(pos, inst)| match inst { + // `jr $ra` + Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Reg(basic::jmp::reg::Inst { + target: Register::Ra, + kind: basic::jmp::reg::Kind::Jump, + }))) => Some(pos), + _ => None, + }) + .collect(); + + // Get all possible tailcalls + let tailcalls: BTreeSet = insts + .clone() + .filter_map(|(pos, inst)| match inst { + Inst::Basic(basic::Inst::Jmp( + // `j` + basic::jmp::Inst::Reg(basic::jmp::reg::Inst { + kind: basic::jmp::reg::Kind::Jump, + .. + }) | + // `jr` + basic::jmp::Inst::Imm(basic::jmp::imm::Inst { + kind: basic::jmp::imm::Kind::Jump, + .. + }), + )) => Some(pos), + _ => None, + }) + .collect(); + + // Get all labels + let labels: BTreeSet = insts + .clone() + .filter_map(|(pos, inst)| match inst { + // `j` + Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm( + inst @ basic::jmp::imm::Inst { + kind: basic::jmp::imm::Kind::Jump, + .. + }, + ))) => Some(inst.target(pos)), + // Conditional jumps + Inst::Basic(basic::Inst::Cond(inst)) => Some(inst.target(pos)), + _ => None, + }) + .filter(|target| insts_range.contains(target)) + .collect(); + + // Now check every `Jal` and `Dw` for possible function entrances + let function_entries: BTreeSet = insts + .filter_map(|(pos, inst)| match inst { + // `jar` + Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm( + inst @ basic::jmp::imm::Inst { + kind: basic::jmp::imm::Kind::JumpLink, + .. + }, + ))) if pos.0 % 4 == 0 => Some(inst.target(pos)), + // `dw` + Inst::Directive(Directive::Dw(address)) if address % 4 == 0 => Some(Pos(address)), + _ => None, + }) + .filter(|target| insts_range.contains(target)) + .filter(|&target| data_table.map_or(true, |data_table| data_table.get_containing(target).is_none())) + .collect(); + + let mut cur_funcs = BTreeSet::::new(); + for (idx, &func_pos) in function_entries.iter().enumerate() { + // Try to get the end position from the returns + // Note: +8 for return + inst after. + let mut end_pos: Pos = returns.range(func_pos..).next().copied().unwrap_or(func_pos) + 8; + + // If there's a function in between us and the return, use the last tailcall instead + if let Some(next_func_pos) = function_entries.range(func_pos + 4i32..end_pos).next() { + end_pos = tailcalls + .range(..next_func_pos) + .next_back() + .copied() + .unwrap_or(func_pos) + 8i32; + + // If we got a tailcall before this function, just end it 2 insts + if end_pos <= func_pos { + end_pos = func_pos + 8i32; + } + } + + // If this function would intersect any other, skip this one. + let intersects = cur_funcs + .range(..=func_pos) + .next_back() + .map_or(false, |func| func.end_pos > func_pos) || + cur_funcs + .range(func_pos..) + .next() + .map_or(false, |func| func.start_pos < end_pos) || + func_table.map_or(false, |func_table| { + func_table + .range(..=func_pos) + .next_back() + .map_or(false, |func| func.end_pos > func_pos) || + func_table + .range(func_pos..) + .next() + .map_or(false, |func| func.start_pos < end_pos) + }); + if intersects { + continue; + } + + // Get all labels within this function + // Note: We skip labels on the function location itself. + let labels = labels + .range((Bound::Excluded(func_pos), Bound::Excluded(end_pos))) + .enumerate() + .map(|(idx, &pos)| (pos, format!("{idx}"))) + .collect(); + + let func = Func { + name: format!("func_{idx}"), + signature: "fn()".to_owned(), + desc: String::new(), + inline_comments: BTreeMap::new(), + comments: BTreeMap::new(), + labels, + start_pos: func_pos, + end_pos, + }; + assert!(cur_funcs.insert(func)); + } + + cur_funcs + } +} + impl Borrow for Func { fn borrow(&self) -> &Pos { &self.start_pos diff --git a/dcb-exe/src/func/table.rs b/dcb-exe/src/func/table.rs index 32da6b5..96f902e 100644 --- a/dcb-exe/src/func/table.rs +++ b/dcb-exe/src/func/table.rs @@ -16,17 +16,8 @@ pub use error::GetKnownError; // Imports use super::Func; -use crate::{ - inst::{basic, Directive, Inst, Register}, - DataTable, Pos, -}; -use dcb_util::DiscardingSortedMergeIter; -use std::{ - collections::{BTreeMap, BTreeSet}, - fs::File, - iter::FromIterator, - ops::{Bound, Range, RangeBounds}, -}; +use crate::Pos; +use std::{collections::BTreeSet, fs::File, iter::FromIterator, ops::RangeBounds}; /// Function table /// @@ -37,18 +28,25 @@ use std::{ pub struct FuncTable(BTreeSet); impl FuncTable { - /// Merges two data tables, discarding duplicates from `other`. - /// - /// This can be useful when combining known functions and heuristically - /// discovered function, as the known functions are always kept, and the - /// duplicate discovered ones are discarded. + /// Creates an empty function table #[must_use] - pub fn merge_with(self, other: Self) -> Self { - // Note: We don't return the iterator, as we want the user to - // keep the guarantees supplied by this type. - DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter()).collect() + pub const fn new() -> Self { + Self(BTreeSet::new()) } +} +// Constructors +impl FuncTable { + /// Returns all known functions + pub fn get_known() -> Result { + let file = File::open("resources/game_funcs.yaml").map_err(GetKnownError::File)?; + + serde_yaml::from_reader(file).map_err(GetKnownError::Parse) + } +} + +// Getters +impl FuncTable { /// Retrieves the function containing `pos` #[must_use] pub fn get_containing(&self, pos: Pos) -> Option<&Func> { @@ -69,153 +67,18 @@ impl FuncTable { } } -impl FuncTable { - /// Returns all known functions - pub fn get_known() -> Result { - let file = File::open("resources/game_funcs.yaml").map_err(GetKnownError::File)?; - - serde_yaml::from_reader(file).map_err(GetKnownError::Parse) +// Note: `BTreeSet` already discards duplicates on it's own. +impl Extend for FuncTable { + fn extend>(&mut self, funcs: T) { + self.0.extend(funcs); } - /// Creates a new list of functions from an iterator over insts - #[must_use] - #[allow(clippy::too_many_lines)] // TODO: Refactor - pub fn search_instructions<'a>( - insts_range: Range, insts: impl Iterator)> + Clone, known_func_table: &Self, - data_table: &DataTable, - ) -> Self { - // Get all returns - let returns: BTreeSet = insts - .clone() - .filter_map(|(pos, inst)| match inst { - // `jr $ra` - Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Reg(basic::jmp::reg::Inst { - target: Register::Ra, - kind: basic::jmp::reg::Kind::Jump, - }))) => Some(pos), - _ => None, - }) - .collect(); + fn extend_one(&mut self, func: Func) { + self.0.extend_one(func); + } - // Get all possible tailcalls - let tailcalls: BTreeSet = insts - .clone() - .filter_map(|(pos, inst)| match inst { - Inst::Basic(basic::Inst::Jmp( - // `j` - basic::jmp::Inst::Reg(basic::jmp::reg::Inst { - kind: basic::jmp::reg::Kind::Jump, - .. - }) | - // `jr` - basic::jmp::Inst::Imm(basic::jmp::imm::Inst { - kind: basic::jmp::imm::Kind::Jump, - .. - }), - )) => Some(pos), - _ => None, - }) - .collect(); - - // Get all labels - let labels: BTreeSet = insts - .clone() - .filter_map(|(pos, inst)| match inst { - // `j` - Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm( - inst @ basic::jmp::imm::Inst { - kind: basic::jmp::imm::Kind::Jump, - .. - }, - ))) => Some(inst.target(pos)), - // Conditional jumps - Inst::Basic(basic::Inst::Cond(inst)) => Some(inst.target(pos)), - _ => None, - }) - .filter(|target| insts_range.contains(target)) - .collect(); - - // Now check every `Jal` and `Dw` for possible function entrances - let function_entries: BTreeSet = insts - .filter_map(|(pos, inst)| match inst { - // `jar` - Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm( - inst @ basic::jmp::imm::Inst { - kind: basic::jmp::imm::Kind::JumpLink, - .. - }, - ))) if pos.0 % 4 == 0 => Some(inst.target(pos)), - // `dw` - Inst::Directive(Directive::Dw(address)) if address % 4 == 0 => Some(Pos(address)), - _ => None, - }) - .filter(|target| insts_range.contains(target)) - .filter(|&target| data_table.get_containing(target).is_none()) - .collect(); - - let mut cur_funcs = BTreeSet::::new(); - for (idx, &func_pos) in function_entries.iter().enumerate() { - // Try to get the end position from the returns - // Note: +8 for return + inst after. - let mut end_pos: Pos = returns.range(func_pos..).next().copied().unwrap_or(func_pos) + 8; - - // If there's a function in between us and the return, use the last tailcall instead - if let Some(next_func_pos) = function_entries.range(func_pos + 4i32..end_pos).next() { - end_pos = tailcalls - .range(..next_func_pos) - .next_back() - .copied() - .unwrap_or(func_pos) + 8i32; - - // If we got a tailcall before this function, just end it 2 insts - if end_pos <= func_pos { - end_pos = func_pos + 8i32; - } - } - - // If this function would intersect any other, skip this one. - if cur_funcs - .range(..=func_pos) - .next_back() - .map_or(false, |func| func.end_pos > func_pos) || - cur_funcs - .range(func_pos..) - .next() - .map_or(false, |func| func.start_pos < end_pos) || - known_func_table - .range(..=func_pos) - .next_back() - .map_or(false, |func| func.end_pos > func_pos) || - known_func_table - .range(func_pos..) - .next() - .map_or(false, |func| func.start_pos < end_pos) - { - continue; - } - - // Get all labels within this function - // Note: We skip labels on the function location itself. - let labels = labels - .range((Bound::Excluded(func_pos), Bound::Excluded(end_pos))) - .enumerate() - .map(|(idx, &pos)| (pos, format!("{idx}"))) - .collect(); - - let func = Func { - name: format!("func_{idx}"), - signature: "fn()".to_owned(), - desc: String::new(), - inline_comments: BTreeMap::new(), - comments: BTreeMap::new(), - labels, - start_pos: func_pos, - end_pos, - }; - assert!(cur_funcs.insert(func)); - } - - cur_funcs.into_iter().collect() + fn extend_reserve(&mut self, additional: usize) { + self.0.extend_reserve(additional); } } @@ -224,3 +87,9 @@ impl FromIterator for FuncTable { Self(iter.into_iter().collect()) } } + +impl Default for FuncTable { + fn default() -> Self { + Self::new() + } +} diff --git a/dcb-exe/src/lib.rs b/dcb-exe/src/lib.rs index 397e6f5..a90dada 100644 --- a/dcb-exe/src/lib.rs +++ b/dcb-exe/src/lib.rs @@ -18,7 +18,8 @@ unwrap_infallible, min_type_alias_impl_trait, external_doc, - assert_matches + assert_matches, + extend_one )] // Lints #![warn(clippy::restriction, clippy::pedantic, clippy::nursery)] diff --git a/dcb-exe/src/pos.rs b/dcb-exe/src/pos.rs index 6c95729..60e22ec 100644 --- a/dcb-exe/src/pos.rs +++ b/dcb-exe/src/pos.rs @@ -13,6 +13,9 @@ pub struct Pos(pub u32); impl Pos { /// Calculated the offset between two positions + /// + /// # Panics + /// Panics if the result would be negative. #[must_use] pub fn offset_from(self, start_pos: Self) -> usize { usize::try_from(self - start_pos).expect("Negative offset") diff --git a/dcb-exe/src/reader.rs b/dcb-exe/src/reader.rs index 8625352..cb37d83 100644 --- a/dcb-exe/src/reader.rs +++ b/dcb-exe/src/reader.rs @@ -1,33 +1,84 @@ -//! Executable reader +//! Executable reader. // Modules pub mod error; pub mod iter; +pub mod opts; // Exports -pub use error::{DeserializeError, GetKnownError}; +pub use error::DeserializeError; +pub use opts::DeserializeOpts; // Imports -use crate::{inst, Data, DataTable, FuncTable, Header, Pos}; +use crate::{inst, Data, DataTable, Func, FuncTable, Header, Pos}; use dcb_bytes::{ByteArray, Bytes}; use std::{convert::TryFrom, io, ops::Range}; -/// The game executable +/// Executable reader +/// +/// Serves to read all information from the executable, +/// decode it and provide an interface to retrieve data +/// and functions, including their instructions. #[derive(Clone, Debug)] pub struct ExeReader { /// The executable header header: Header, - /// All instruction bytes within the executable. + /// All bytes of the executable (excluding header.) bytes: Box<[u8]>, - /// The data table. + /// Data table data_table: DataTable, - /// The function table. + /// Function table func_table: FuncTable, } +// Constructors +impl ExeReader { + /// Deserializes the executable from a file. + /// + /// # Options + /// Allows external data and function tables to be used during this deserialization. + pub fn deserialize(file: &mut R, opts: DeserializeOpts) -> Result { + // Read header + let header = { + let mut bytes = [0u8; <
::ByteArray as ByteArray>::SIZE]; + file.read_exact(&mut bytes).map_err(DeserializeError::ReadHeader)?; + Header::from_bytes(&bytes).map_err(DeserializeError::ParseHeader)? + }; + + // Read all of the bytes + let mut bytes = + vec![0u8; usize::try_from(header.size).expect("Len didn't fit into `usize`")].into_boxed_slice(); + file.read_exact(bytes.as_mut()).map_err(DeserializeError::ReadData)?; + + // Check if we were given any initial tables, else initialize them + let mut data_table = opts.data_table.unwrap_or_else(DataTable::new); + let mut func_table = opts.func_table.unwrap_or_else(FuncTable::new); + + // Then parse all heuristic tables + let insts = inst::DecodeIter::new(&*bytes, &data_table, &func_table, header.start_pos); + let insts_range = { + let start = header.start_pos; + let end = header.start_pos + header.size; + start..end + }; + let heuristics_data = Data::search_instructions(insts_range.clone(), insts.clone()); + let heuristics_func_table = Func::search_instructions(insts_range, insts, Some(&func_table), Some(&data_table)); + data_table.extend(heuristics_data); + func_table.extend(heuristics_func_table); + + Ok(Self { + header, + bytes, + data_table, + func_table, + }) + } +} + +// Getters impl ExeReader { /// Returns this executable's header #[must_use] @@ -53,7 +104,8 @@ impl ExeReader { &self.func_table } - /// Returns this executable's instruction range + /// Returns the range of positions of this executable's + /// instructions. #[must_use] pub fn insts_range(&self) -> Range { let start = self.header.start_pos; @@ -61,21 +113,18 @@ impl ExeReader { start..end } - /// Creates an iterator over this executable + /// Creates an iterator over this executable's data and functions. #[must_use] pub const fn iter(&self) -> iter::Iter { iter::Iter::new(self) } - /// Returns a parsing iterator for all instructions + /// Returns an iterator that decodes instructions within a certain range. + /// + /// # Panics + /// Panics if `range` is not a valid range within this executable. #[must_use] - pub fn parse_iter(&self) -> inst::DecodeIter { - self.parse_iter_from(self.insts_range()) - } - - /// Returns a parsing iterator starting from a range - #[must_use] - pub fn parse_iter_from(&self, range: Range) -> inst::DecodeIter { + pub fn decode_iter(&self, range: Range) -> inst::DecodeIter { let start = range.start.offset_from(self.header.start_pos); let end = range.end.offset_from(self.header.start_pos); let bytes = &self.bytes[start..end]; @@ -83,61 +132,3 @@ impl ExeReader { inst::DecodeIter::new(bytes, &self.data_table, &self.func_table, range.start) } } - -impl ExeReader { - /// Deserializes the executable from file - pub fn deserialize(file: &mut R) -> Result { - // Read header - let mut header_bytes = [0u8; <
::ByteArray as ByteArray>::SIZE]; - file.read_exact(&mut header_bytes) - .map_err(DeserializeError::ReadHeader)?; - let header = Header::from_bytes(&header_bytes).map_err(DeserializeError::ParseHeader)?; - - // Get the instruction range - let insts_range = { - let start = header.start_pos; - let end = header.start_pos + header.size; - start..end - }; - - // Read all of the bytes - let mut bytes = - vec![0u8; usize::try_from(header.size).expect("Len didn't fit into `usize`")].into_boxed_slice(); - file.read_exact(bytes.as_mut()).map_err(DeserializeError::ReadData)?; - - // Read the known data and func table - let mut known_data_table = self::get_known_data_table().map_err(DeserializeError::KnownDataTable)?; - let known_func_table = FuncTable::get_known().map_err(DeserializeError::KnownFuncTable)?; - - // Parse all instructions - let insts = inst::DecodeIter::new(&*bytes, &known_data_table, &known_func_table, header.start_pos); - - // Then parse all heuristic tables - let heuristics_data = Data::search_instructions(insts_range.clone(), insts.clone()); - let heuristics_func_table = - FuncTable::search_instructions(insts_range, insts, &known_func_table, &known_data_table); - known_data_table.extend(heuristics_data); - let func_table = known_func_table.merge_with(heuristics_func_table); - - Ok(Self { - header, - bytes, - data_table: known_data_table, - func_table, - }) - } -} - -/// Returns all known data locations -fn get_known_data_table() -> Result { - let game_data_file = std::fs::File::open("resources/game_data.yaml").map_err(GetKnownError::OpenGame)?; - let game_data: Vec = serde_yaml::from_reader(game_data_file).map_err(GetKnownError::ParseGame)?; - - let foreign_data_file = std::fs::File::open("resources/foreign_data.yaml").map_err(GetKnownError::OpenForeign)?; - let foreign_data: Vec = serde_yaml::from_reader(foreign_data_file).map_err(GetKnownError::ParseForeign)?; - - let mut data_table = DataTable::new(game_data); - data_table.extend(foreign_data); - - Ok(data_table) -} diff --git a/dcb-exe/src/reader/error.rs b/dcb-exe/src/reader/error.rs index 31958d7..fce4ed9 100644 --- a/dcb-exe/src/reader/error.rs +++ b/dcb-exe/src/reader/error.rs @@ -1,7 +1,7 @@ //! Errors // Imports -use crate::{func, header}; +use crate::header; /// Error type for [`ExeReader::deserialize`](super::ExeReader::deserialize) #[derive(Debug, thiserror::Error)] @@ -21,32 +21,4 @@ pub enum DeserializeError { /// Unable to read data #[error("Unable to read data")] ReadData(#[source] std::io::Error), - - /// Unable to get known data - #[error("Unable to get known data table")] - KnownDataTable(#[source] GetKnownError), - - /// Unable to get known data - #[error("Unable to get known func table")] - KnownFuncTable(#[source] func::table::GetKnownError), -} - -/// Error type for getting the known function table -#[derive(Debug, thiserror::Error)] -pub enum GetKnownError { - /// Unable to open game data file - #[error("Unable to open game data file")] - OpenGame(#[source] std::io::Error), - - /// Unable to parse game data file - #[error("Unable to parse game data file")] - ParseGame(#[source] serde_yaml::Error), - - /// Unable to open foreign data file - #[error("Unable to open foreign data file")] - OpenForeign(#[source] std::io::Error), - - /// Unable to parse foreign data file - #[error("Unable to parse foreign data file")] - ParseForeign(#[source] serde_yaml::Error), } diff --git a/dcb-exe/src/reader/iter.rs b/dcb-exe/src/reader/iter.rs index 73a41bb..3fac103 100644 --- a/dcb-exe/src/reader/iter.rs +++ b/dcb-exe/src/reader/iter.rs @@ -75,7 +75,7 @@ impl<'a> Iterator for Iter<'a> { return Some(ExeItem::Data { data, - insts: self.exe.parse_iter_from(cur_pos..end_pos), + insts: self.exe.decode_iter(cur_pos..end_pos), }); } @@ -84,7 +84,7 @@ impl<'a> Iterator for Iter<'a> { self.cur_pos = func.end_pos; return Some(ExeItem::Func { func, - insts: self.exe.parse_iter_from(cur_pos..func.end_pos), + insts: self.exe.decode_iter(cur_pos..func.end_pos), }); } @@ -108,7 +108,7 @@ impl<'a> Iterator for Iter<'a> { Some(ExeItem::Unknown { - insts: self.exe.parse_iter_from(cur_pos..end_pos), + insts: self.exe.decode_iter(cur_pos..end_pos), }) } } diff --git a/dcb-exe/src/reader/opts.rs b/dcb-exe/src/reader/opts.rs new file mode 100644 index 0000000..624b6fa --- /dev/null +++ b/dcb-exe/src/reader/opts.rs @@ -0,0 +1,14 @@ +//! Deserialization options + +// Imports +use crate::{DataTable, FuncTable}; + +/// Options for deserialization +#[derive(Default, Debug)] +pub struct DeserializeOpts { + /// Existing data table to use + pub data_table: Option, + + /// Existing function table to use + pub func_table: Option, +} diff --git a/dcb-tools/dcb-decompiler/src/main.rs b/dcb-tools/dcb-decompiler/src/main.rs index 277c67b..a2d5cd4 100644 --- a/dcb-tools/dcb-decompiler/src/main.rs +++ b/dcb-tools/dcb-decompiler/src/main.rs @@ -1,6 +1,6 @@ //! Decompiler -#![feature(format_args_capture, iter_map_while)] +#![feature(try_blocks, format_args_capture, iter_map_while)] // Modules mod cli; @@ -9,11 +9,11 @@ mod cli; use anyhow::Context; use dcb_exe::{ inst::{parse::LineArgExpr, DisplayCtx, Inst, InstDisplay, InstFmtArg, ParseCtx}, - reader::iter::ExeItem, - ExeReader, Func, Pos, + reader::{iter::ExeItem, DeserializeOpts}, + Data, ExeReader, Func, Pos, }; use itertools::{Itertools, Position}; -use std::{collections::BTreeMap, fmt, path::PathBuf}; +use std::{collections::BTreeMap, fmt, fs, path::PathBuf}; fn main() -> Result<(), anyhow::Error> { // Initialize the logger @@ -28,11 +28,55 @@ fn main() -> Result<(), anyhow::Error> { let cli = cli::CliData::new(); // Open the input file - let mut input_file = std::fs::File::open(&cli.input_path).context("Unable to open input file")?; + let mut input_file = fs::File::open(&cli.input_path).context("Unable to open input file")?; + + // Load the known and foreign data / func tables + let known_data: Result, _> = try { + let file = fs::File::open("resources/game_data.yaml").context("Unable to open game data file")?; + serde_yaml::from_reader(file).context("Unable to parse game data file")? + }; + let known_data = known_data.unwrap_or_else(|err: anyhow::Error| { + log::warn!("Unable to load game data:\n{:?}", err); + vec![] + }); + let foreign_data: Result, _> = try { + let file = fs::File::open("resources/foreign_data.yaml").context("Unable to open foreign data file")?; + serde_yaml::from_reader(file).context("Unable to parse foreign data file")? + }; + let foreign_data = foreign_data.unwrap_or_else(|err: anyhow::Error| { + log::warn!("Unable to load foreign data:\n{:?}", err); + vec![] + }); + let data_table = known_data.into_iter().chain(foreign_data).collect(); + + let func_table: Result, _> = try { + let file = fs::File::open("resources/game_funcs.yaml").context("Unable to open functions file")?; + serde_yaml::from_reader(file).context("Unable to parse functions file")? + }; + let func_table = func_table.unwrap_or_else(|err: anyhow::Error| { + log::warn!("Unable to load functions:\n{:?}", err); + vec![] + }); + let func_table = func_table.into_iter().collect(); + + // Read all arg overrides + let inst_arg_overrides: Result<_, _> = try { + let file = fs::File::open("resources/inst_args_override.yaml") + .context("Unable to open instruction args override file")?; + serde_yaml::from_reader(file).context("Unable to parse instruction args override file")? + }; + let inst_arg_overrides: BTreeMap = inst_arg_overrides.unwrap_or_else(|err: anyhow::Error| { + log::warn!("Unable to load instruction overrides:\n{:?}", err); + BTreeMap::new() + }); // Read the executable log::debug!("Deserializing executable"); - let exe = ExeReader::deserialize(&mut input_file).context("Unable to parse game executable")?; + let exe = ExeReader::deserialize(&mut input_file, DeserializeOpts { + data_table: Some(data_table), + func_table: Some(func_table), + }) + .context("Unable to parse game executable")?; if cli.print_header { let header_file_path = { @@ -40,16 +84,10 @@ fn main() -> Result<(), anyhow::Error> { path.push(".header"); PathBuf::from(path) }; - let header_file = std::fs::File::create(header_file_path).context("Unable to create header file")?; + let header_file = fs::File::create(header_file_path).context("Unable to create header file")?; serde_yaml::to_writer(header_file, exe.header()).context("Unable to write header to file")?; } - // Read all arg overrides - let inst_arg_overrides_file = std::fs::File::open("resources/inst_args_override.yaml") - .context("Unable to open instruction args override file")?; - let inst_arg_overrides: BTreeMap = - serde_yaml::from_reader(inst_arg_overrides_file).context("Unable to parse instruction args override file")?; - // Instruction buffer let mut inst_buffers: BTreeMap = BTreeMap::new();