Moved known/foreign data/func loading into the executable.

Slightly revised the data / func table interface.
This commit is contained in:
2021-04-29 16:36:27 +01:00
parent 4b559b7ce1
commit 3eed1e930a
10 changed files with 366 additions and 317 deletions

View File

@@ -15,7 +15,7 @@ pub use node::DataNode;
// Imports
use super::{Data, DataKind};
use crate::Pos;
use std::fmt;
use std::{fmt, iter::FromIterator};
/// Data table
///
@@ -46,39 +46,11 @@ pub struct DataTable {
impl DataTable {
/// Creates an empty data table
#[must_use]
pub fn empty() -> Self {
pub fn new() -> Self {
let root = DataNode::new(Data::dummy());
Self { root }
}
/// Creates a data table from data locations
pub fn new(data: impl IntoIterator<Item = Data>) -> Self {
let mut table = Self::empty();
table.extend(data);
table
}
/// Extends this data table with data locations.
///
/// Any data that cannot be inserted is discarded, see [`DataNode::insert`] for
/// more information.
pub fn extend(&mut self, data: impl IntoIterator<Item = Data>) {
for data in data {
// Try to insert and log if we get an error.
if let Err(err) = self.root.insert(data) {
let log_level = match err.data().kind() {
DataKind::Known | DataKind::Foreign => log::Level::Warn,
DataKind::Heuristics => log::Level::Trace,
};
log::log!(
log_level,
"Unable to add data:\n{:#}",
dcb_util::DisplayWrapper::new(|f| dcb_util::fmt_err(&err, f))
);
}
}
}
/// Retrieves the smallest data location containing `pos`
#[must_use]
pub fn get_containing(&self, pos: Pos) -> Option<&Data> {
@@ -130,6 +102,43 @@ impl DataTable {
}
}
impl Default for DataTable {
fn default() -> Self {
Self::new()
}
}
impl Extend<Data> for DataTable {
fn extend<T: IntoIterator<Item = Data>>(&mut self, data: T) {
for data in data {
self.extend_one(data);
}
}
fn extend_one(&mut self, data: Data) {
// Try to insert and log if we get an error.
if let Err(err) = self.root.insert(data) {
let log_level = match err.data().kind() {
DataKind::Known | DataKind::Foreign => log::Level::Warn,
DataKind::Heuristics => log::Level::Trace,
};
log::log!(
log_level,
"Unable to add data:\n{:#}",
dcb_util::DisplayWrapper::new(|f| dcb_util::fmt_err(&err, f))
);
}
}
}
impl FromIterator<Data> for DataTable {
fn from_iter<T: IntoIterator<Item = Data>>(data: T) -> Self {
let mut table = Self::new();
table.extend(data);
table
}
}
impl fmt::Display for DataTable {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for node in self.root.nodes() {

View File

@@ -12,8 +12,15 @@ pub mod table;
pub use table::FuncTable;
// Imports
use crate::Pos;
use std::{borrow::Borrow, collections::BTreeMap};
use crate::{
inst::{basic, Directive, Inst, Register},
DataTable, Pos,
};
use std::{
borrow::Borrow,
collections::{BTreeMap, BTreeSet},
ops::{Bound, Range},
};
/// A function within the executable
#[derive(Clone, Debug)]
@@ -57,6 +64,151 @@ impl Func {
}
}
impl Func {
/// Creates a new list of functions from an iterator over insts
#[must_use]
#[allow(clippy::too_many_lines)] // TODO: Refactor
pub fn search_instructions<'a>(
insts_range: Range<Pos>, insts: impl Iterator<Item = (Pos, Inst<'a>)> + Clone, func_table: Option<&FuncTable>,
data_table: Option<&DataTable>,
) -> BTreeSet<Self> {
// Get all returns
let returns: BTreeSet<Pos> = insts
.clone()
.filter_map(|(pos, inst)| match inst {
// `jr $ra`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Reg(basic::jmp::reg::Inst {
target: Register::Ra,
kind: basic::jmp::reg::Kind::Jump,
}))) => Some(pos),
_ => None,
})
.collect();
// Get all possible tailcalls
let tailcalls: BTreeSet<Pos> = insts
.clone()
.filter_map(|(pos, inst)| match inst {
Inst::Basic(basic::Inst::Jmp(
// `j`
basic::jmp::Inst::Reg(basic::jmp::reg::Inst {
kind: basic::jmp::reg::Kind::Jump,
..
}) |
// `jr`
basic::jmp::Inst::Imm(basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::Jump,
..
}),
)) => Some(pos),
_ => None,
})
.collect();
// Get all labels
let labels: BTreeSet<Pos> = insts
.clone()
.filter_map(|(pos, inst)| match inst {
// `j`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm(
inst @ basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::Jump,
..
},
))) => Some(inst.target(pos)),
// Conditional jumps
Inst::Basic(basic::Inst::Cond(inst)) => Some(inst.target(pos)),
_ => None,
})
.filter(|target| insts_range.contains(target))
.collect();
// Now check every `Jal` and `Dw` for possible function entrances
let function_entries: BTreeSet<Pos> = insts
.filter_map(|(pos, inst)| match inst {
// `jar`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm(
inst @ basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::JumpLink,
..
},
))) if pos.0 % 4 == 0 => Some(inst.target(pos)),
// `dw`
Inst::Directive(Directive::Dw(address)) if address % 4 == 0 => Some(Pos(address)),
_ => None,
})
.filter(|target| insts_range.contains(target))
.filter(|&target| data_table.map_or(true, |data_table| data_table.get_containing(target).is_none()))
.collect();
let mut cur_funcs = BTreeSet::<Self>::new();
for (idx, &func_pos) in function_entries.iter().enumerate() {
// Try to get the end position from the returns
// Note: +8 for return + inst after.
let mut end_pos: Pos = returns.range(func_pos..).next().copied().unwrap_or(func_pos) + 8;
// If there's a function in between us and the return, use the last tailcall instead
if let Some(next_func_pos) = function_entries.range(func_pos + 4i32..end_pos).next() {
end_pos = tailcalls
.range(..next_func_pos)
.next_back()
.copied()
.unwrap_or(func_pos) + 8i32;
// If we got a tailcall before this function, just end it 2 insts
if end_pos <= func_pos {
end_pos = func_pos + 8i32;
}
}
// If this function would intersect any other, skip this one.
let intersects = cur_funcs
.range(..=func_pos)
.next_back()
.map_or(false, |func| func.end_pos > func_pos) ||
cur_funcs
.range(func_pos..)
.next()
.map_or(false, |func| func.start_pos < end_pos) ||
func_table.map_or(false, |func_table| {
func_table
.range(..=func_pos)
.next_back()
.map_or(false, |func| func.end_pos > func_pos) ||
func_table
.range(func_pos..)
.next()
.map_or(false, |func| func.start_pos < end_pos)
});
if intersects {
continue;
}
// Get all labels within this function
// Note: We skip labels on the function location itself.
let labels = labels
.range((Bound::Excluded(func_pos), Bound::Excluded(end_pos)))
.enumerate()
.map(|(idx, &pos)| (pos, format!("{idx}")))
.collect();
let func = Func {
name: format!("func_{idx}"),
signature: "fn()".to_owned(),
desc: String::new(),
inline_comments: BTreeMap::new(),
comments: BTreeMap::new(),
labels,
start_pos: func_pos,
end_pos,
};
assert!(cur_funcs.insert(func));
}
cur_funcs
}
}
impl Borrow<Pos> for Func {
fn borrow(&self) -> &Pos {
&self.start_pos

View File

@@ -16,17 +16,8 @@ pub use error::GetKnownError;
// Imports
use super::Func;
use crate::{
inst::{basic, Directive, Inst, Register},
DataTable, Pos,
};
use dcb_util::DiscardingSortedMergeIter;
use std::{
collections::{BTreeMap, BTreeSet},
fs::File,
iter::FromIterator,
ops::{Bound, Range, RangeBounds},
};
use crate::Pos;
use std::{collections::BTreeSet, fs::File, iter::FromIterator, ops::RangeBounds};
/// Function table
///
@@ -37,18 +28,25 @@ use std::{
pub struct FuncTable(BTreeSet<Func>);
impl FuncTable {
/// Merges two data tables, discarding duplicates from `other`.
///
/// This can be useful when combining known functions and heuristically
/// discovered function, as the known functions are always kept, and the
/// duplicate discovered ones are discarded.
/// Creates an empty function table
#[must_use]
pub fn merge_with(self, other: Self) -> Self {
// Note: We don't return the iterator, as we want the user to
// keep the guarantees supplied by this type.
DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter()).collect()
pub const fn new() -> Self {
Self(BTreeSet::new())
}
}
// Constructors
impl FuncTable {
/// Returns all known functions
pub fn get_known() -> Result<Self, GetKnownError> {
let file = File::open("resources/game_funcs.yaml").map_err(GetKnownError::File)?;
serde_yaml::from_reader(file).map_err(GetKnownError::Parse)
}
}
// Getters
impl FuncTable {
/// Retrieves the function containing `pos`
#[must_use]
pub fn get_containing(&self, pos: Pos) -> Option<&Func> {
@@ -69,153 +67,18 @@ impl FuncTable {
}
}
impl FuncTable {
/// Returns all known functions
pub fn get_known() -> Result<Self, GetKnownError> {
let file = File::open("resources/game_funcs.yaml").map_err(GetKnownError::File)?;
serde_yaml::from_reader(file).map_err(GetKnownError::Parse)
// Note: `BTreeSet` already discards duplicates on it's own.
impl Extend<Func> for FuncTable {
fn extend<T: IntoIterator<Item = Func>>(&mut self, funcs: T) {
self.0.extend(funcs);
}
/// Creates a new list of functions from an iterator over insts
#[must_use]
#[allow(clippy::too_many_lines)] // TODO: Refactor
pub fn search_instructions<'a>(
insts_range: Range<Pos>, insts: impl Iterator<Item = (Pos, Inst<'a>)> + Clone, known_func_table: &Self,
data_table: &DataTable,
) -> Self {
// Get all returns
let returns: BTreeSet<Pos> = insts
.clone()
.filter_map(|(pos, inst)| match inst {
// `jr $ra`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Reg(basic::jmp::reg::Inst {
target: Register::Ra,
kind: basic::jmp::reg::Kind::Jump,
}))) => Some(pos),
_ => None,
})
.collect();
fn extend_one(&mut self, func: Func) {
self.0.extend_one(func);
}
// Get all possible tailcalls
let tailcalls: BTreeSet<Pos> = insts
.clone()
.filter_map(|(pos, inst)| match inst {
Inst::Basic(basic::Inst::Jmp(
// `j`
basic::jmp::Inst::Reg(basic::jmp::reg::Inst {
kind: basic::jmp::reg::Kind::Jump,
..
}) |
// `jr`
basic::jmp::Inst::Imm(basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::Jump,
..
}),
)) => Some(pos),
_ => None,
})
.collect();
// Get all labels
let labels: BTreeSet<Pos> = insts
.clone()
.filter_map(|(pos, inst)| match inst {
// `j`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm(
inst @ basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::Jump,
..
},
))) => Some(inst.target(pos)),
// Conditional jumps
Inst::Basic(basic::Inst::Cond(inst)) => Some(inst.target(pos)),
_ => None,
})
.filter(|target| insts_range.contains(target))
.collect();
// Now check every `Jal` and `Dw` for possible function entrances
let function_entries: BTreeSet<Pos> = insts
.filter_map(|(pos, inst)| match inst {
// `jar`
Inst::Basic(basic::Inst::Jmp(basic::jmp::Inst::Imm(
inst @ basic::jmp::imm::Inst {
kind: basic::jmp::imm::Kind::JumpLink,
..
},
))) if pos.0 % 4 == 0 => Some(inst.target(pos)),
// `dw`
Inst::Directive(Directive::Dw(address)) if address % 4 == 0 => Some(Pos(address)),
_ => None,
})
.filter(|target| insts_range.contains(target))
.filter(|&target| data_table.get_containing(target).is_none())
.collect();
let mut cur_funcs = BTreeSet::<Func>::new();
for (idx, &func_pos) in function_entries.iter().enumerate() {
// Try to get the end position from the returns
// Note: +8 for return + inst after.
let mut end_pos: Pos = returns.range(func_pos..).next().copied().unwrap_or(func_pos) + 8;
// If there's a function in between us and the return, use the last tailcall instead
if let Some(next_func_pos) = function_entries.range(func_pos + 4i32..end_pos).next() {
end_pos = tailcalls
.range(..next_func_pos)
.next_back()
.copied()
.unwrap_or(func_pos) + 8i32;
// If we got a tailcall before this function, just end it 2 insts
if end_pos <= func_pos {
end_pos = func_pos + 8i32;
}
}
// If this function would intersect any other, skip this one.
if cur_funcs
.range(..=func_pos)
.next_back()
.map_or(false, |func| func.end_pos > func_pos) ||
cur_funcs
.range(func_pos..)
.next()
.map_or(false, |func| func.start_pos < end_pos) ||
known_func_table
.range(..=func_pos)
.next_back()
.map_or(false, |func| func.end_pos > func_pos) ||
known_func_table
.range(func_pos..)
.next()
.map_or(false, |func| func.start_pos < end_pos)
{
continue;
}
// Get all labels within this function
// Note: We skip labels on the function location itself.
let labels = labels
.range((Bound::Excluded(func_pos), Bound::Excluded(end_pos)))
.enumerate()
.map(|(idx, &pos)| (pos, format!("{idx}")))
.collect();
let func = Func {
name: format!("func_{idx}"),
signature: "fn()".to_owned(),
desc: String::new(),
inline_comments: BTreeMap::new(),
comments: BTreeMap::new(),
labels,
start_pos: func_pos,
end_pos,
};
assert!(cur_funcs.insert(func));
}
cur_funcs.into_iter().collect()
fn extend_reserve(&mut self, additional: usize) {
self.0.extend_reserve(additional);
}
}
@@ -224,3 +87,9 @@ impl FromIterator<Func> for FuncTable {
Self(iter.into_iter().collect())
}
}
impl Default for FuncTable {
fn default() -> Self {
Self::new()
}
}

View File

@@ -18,7 +18,8 @@
unwrap_infallible,
min_type_alias_impl_trait,
external_doc,
assert_matches
assert_matches,
extend_one
)]
// Lints
#![warn(clippy::restriction, clippy::pedantic, clippy::nursery)]

View File

@@ -13,6 +13,9 @@ pub struct Pos(pub u32);
impl Pos {
/// Calculated the offset between two positions
///
/// # Panics
/// Panics if the result would be negative.
#[must_use]
pub fn offset_from(self, start_pos: Self) -> usize {
usize::try_from(self - start_pos).expect("Negative offset")

View File

@@ -1,33 +1,84 @@
//! Executable reader
//! Executable reader.
// Modules
pub mod error;
pub mod iter;
pub mod opts;
// Exports
pub use error::{DeserializeError, GetKnownError};
pub use error::DeserializeError;
pub use opts::DeserializeOpts;
// Imports
use crate::{inst, Data, DataTable, FuncTable, Header, Pos};
use crate::{inst, Data, DataTable, Func, FuncTable, Header, Pos};
use dcb_bytes::{ByteArray, Bytes};
use std::{convert::TryFrom, io, ops::Range};
/// The game executable
/// Executable reader
///
/// Serves to read all information from the executable,
/// decode it and provide an interface to retrieve data
/// and functions, including their instructions.
#[derive(Clone, Debug)]
pub struct ExeReader {
/// The executable header
header: Header,
/// All instruction bytes within the executable.
/// All bytes of the executable (excluding header.)
bytes: Box<[u8]>,
/// The data table.
/// Data table
data_table: DataTable,
/// The function table.
/// Function table
func_table: FuncTable,
}
// Constructors
impl ExeReader {
/// Deserializes the executable from a file.
///
/// # Options
/// Allows external data and function tables to be used during this deserialization.
pub fn deserialize<R: io::Read + io::Seek>(file: &mut R, opts: DeserializeOpts) -> Result<Self, DeserializeError> {
// Read header
let header = {
let mut bytes = [0u8; <<Header as Bytes>::ByteArray as ByteArray>::SIZE];
file.read_exact(&mut bytes).map_err(DeserializeError::ReadHeader)?;
Header::from_bytes(&bytes).map_err(DeserializeError::ParseHeader)?
};
// Read all of the bytes
let mut bytes =
vec![0u8; usize::try_from(header.size).expect("Len didn't fit into `usize`")].into_boxed_slice();
file.read_exact(bytes.as_mut()).map_err(DeserializeError::ReadData)?;
// Check if we were given any initial tables, else initialize them
let mut data_table = opts.data_table.unwrap_or_else(DataTable::new);
let mut func_table = opts.func_table.unwrap_or_else(FuncTable::new);
// Then parse all heuristic tables
let insts = inst::DecodeIter::new(&*bytes, &data_table, &func_table, header.start_pos);
let insts_range = {
let start = header.start_pos;
let end = header.start_pos + header.size;
start..end
};
let heuristics_data = Data::search_instructions(insts_range.clone(), insts.clone());
let heuristics_func_table = Func::search_instructions(insts_range, insts, Some(&func_table), Some(&data_table));
data_table.extend(heuristics_data);
func_table.extend(heuristics_func_table);
Ok(Self {
header,
bytes,
data_table,
func_table,
})
}
}
// Getters
impl ExeReader {
/// Returns this executable's header
#[must_use]
@@ -53,7 +104,8 @@ impl ExeReader {
&self.func_table
}
/// Returns this executable's instruction range
/// Returns the range of positions of this executable's
/// instructions.
#[must_use]
pub fn insts_range(&self) -> Range<Pos> {
let start = self.header.start_pos;
@@ -61,21 +113,18 @@ impl ExeReader {
start..end
}
/// Creates an iterator over this executable
/// Creates an iterator over this executable's data and functions.
#[must_use]
pub const fn iter(&self) -> iter::Iter {
iter::Iter::new(self)
}
/// Returns a parsing iterator for all instructions
/// Returns an iterator that decodes instructions within a certain range.
///
/// # Panics
/// Panics if `range` is not a valid range within this executable.
#[must_use]
pub fn parse_iter(&self) -> inst::DecodeIter {
self.parse_iter_from(self.insts_range())
}
/// Returns a parsing iterator starting from a range
#[must_use]
pub fn parse_iter_from(&self, range: Range<Pos>) -> inst::DecodeIter {
pub fn decode_iter(&self, range: Range<Pos>) -> inst::DecodeIter {
let start = range.start.offset_from(self.header.start_pos);
let end = range.end.offset_from(self.header.start_pos);
let bytes = &self.bytes[start..end];
@@ -83,61 +132,3 @@ impl ExeReader {
inst::DecodeIter::new(bytes, &self.data_table, &self.func_table, range.start)
}
}
impl ExeReader {
/// Deserializes the executable from file
pub fn deserialize<R: io::Read + io::Seek>(file: &mut R) -> Result<Self, DeserializeError> {
// Read header
let mut header_bytes = [0u8; <<Header as Bytes>::ByteArray as ByteArray>::SIZE];
file.read_exact(&mut header_bytes)
.map_err(DeserializeError::ReadHeader)?;
let header = Header::from_bytes(&header_bytes).map_err(DeserializeError::ParseHeader)?;
// Get the instruction range
let insts_range = {
let start = header.start_pos;
let end = header.start_pos + header.size;
start..end
};
// Read all of the bytes
let mut bytes =
vec![0u8; usize::try_from(header.size).expect("Len didn't fit into `usize`")].into_boxed_slice();
file.read_exact(bytes.as_mut()).map_err(DeserializeError::ReadData)?;
// Read the known data and func table
let mut known_data_table = self::get_known_data_table().map_err(DeserializeError::KnownDataTable)?;
let known_func_table = FuncTable::get_known().map_err(DeserializeError::KnownFuncTable)?;
// Parse all instructions
let insts = inst::DecodeIter::new(&*bytes, &known_data_table, &known_func_table, header.start_pos);
// Then parse all heuristic tables
let heuristics_data = Data::search_instructions(insts_range.clone(), insts.clone());
let heuristics_func_table =
FuncTable::search_instructions(insts_range, insts, &known_func_table, &known_data_table);
known_data_table.extend(heuristics_data);
let func_table = known_func_table.merge_with(heuristics_func_table);
Ok(Self {
header,
bytes,
data_table: known_data_table,
func_table,
})
}
}
/// Returns all known data locations
fn get_known_data_table() -> Result<DataTable, GetKnownError> {
let game_data_file = std::fs::File::open("resources/game_data.yaml").map_err(GetKnownError::OpenGame)?;
let game_data: Vec<Data> = serde_yaml::from_reader(game_data_file).map_err(GetKnownError::ParseGame)?;
let foreign_data_file = std::fs::File::open("resources/foreign_data.yaml").map_err(GetKnownError::OpenForeign)?;
let foreign_data: Vec<Data> = serde_yaml::from_reader(foreign_data_file).map_err(GetKnownError::ParseForeign)?;
let mut data_table = DataTable::new(game_data);
data_table.extend(foreign_data);
Ok(data_table)
}

View File

@@ -1,7 +1,7 @@
//! Errors
// Imports
use crate::{func, header};
use crate::header;
/// Error type for [`ExeReader::deserialize`](super::ExeReader::deserialize)
#[derive(Debug, thiserror::Error)]
@@ -21,32 +21,4 @@ pub enum DeserializeError {
/// Unable to read data
#[error("Unable to read data")]
ReadData(#[source] std::io::Error),
/// Unable to get known data
#[error("Unable to get known data table")]
KnownDataTable(#[source] GetKnownError),
/// Unable to get known data
#[error("Unable to get known func table")]
KnownFuncTable(#[source] func::table::GetKnownError),
}
/// Error type for getting the known function table
#[derive(Debug, thiserror::Error)]
pub enum GetKnownError {
/// Unable to open game data file
#[error("Unable to open game data file")]
OpenGame(#[source] std::io::Error),
/// Unable to parse game data file
#[error("Unable to parse game data file")]
ParseGame(#[source] serde_yaml::Error),
/// Unable to open foreign data file
#[error("Unable to open foreign data file")]
OpenForeign(#[source] std::io::Error),
/// Unable to parse foreign data file
#[error("Unable to parse foreign data file")]
ParseForeign(#[source] serde_yaml::Error),
}

View File

@@ -75,7 +75,7 @@ impl<'a> Iterator for Iter<'a> {
return Some(ExeItem::Data {
data,
insts: self.exe.parse_iter_from(cur_pos..end_pos),
insts: self.exe.decode_iter(cur_pos..end_pos),
});
}
@@ -84,7 +84,7 @@ impl<'a> Iterator for Iter<'a> {
self.cur_pos = func.end_pos;
return Some(ExeItem::Func {
func,
insts: self.exe.parse_iter_from(cur_pos..func.end_pos),
insts: self.exe.decode_iter(cur_pos..func.end_pos),
});
}
@@ -108,7 +108,7 @@ impl<'a> Iterator for Iter<'a> {
Some(ExeItem::Unknown {
insts: self.exe.parse_iter_from(cur_pos..end_pos),
insts: self.exe.decode_iter(cur_pos..end_pos),
})
}
}

View File

@@ -0,0 +1,14 @@
//! Deserialization options
// Imports
use crate::{DataTable, FuncTable};
/// Options for deserialization
#[derive(Default, Debug)]
pub struct DeserializeOpts {
/// Existing data table to use
pub data_table: Option<DataTable>,
/// Existing function table to use
pub func_table: Option<FuncTable>,
}