Removed custom implementation of DataType.

Removed `DataType::AsciiChar` and added `DataType::AsciiStr`.
`inst::Inst::decode` now takes a data and func table.
This commit is contained in:
Filipe Rodrigues 2021-01-12 15:19:07 +00:00
parent fcd376c9a5
commit 29643a72db
8 changed files with 132 additions and 216 deletions

View File

@ -25,7 +25,7 @@ use dcb_io::GameFile;
use std::{
convert::TryFrom,
io::{Read, Seek, Write},
ops::{self, Range},
ops::Range,
};
/// The game executable
@ -90,8 +90,18 @@ impl Exe {
/// Returns a parsing iterator for all instructions
#[must_use]
pub const fn parse_iter(&self) -> inst::ParseIter {
inst::ParseIter::new(&*self.bytes, self.header.start_pos)
pub fn parse_iter(&self) -> inst::ParseIter {
self.parse_iter_from(self.insts_range())
}
/// Returns a parsing iterator starting from a range
#[must_use]
pub fn parse_iter_from(&self, range: Range<Pos>) -> inst::ParseIter {
let start = range.start.offset_from(self.header.start_pos);
let end = range.end.offset_from(self.header.start_pos);
let bytes = &self.bytes[start..end];
inst::ParseIter::new(bytes, &self.data_table, &self.func_table, range.start)
}
}
@ -123,12 +133,12 @@ impl Exe {
let known_func_table = FuncTable::get_known().map_err(DeserializeError::KnownFuncTable)?;
// Parse all instructions
let insts = inst::ParseIter::new(&*bytes, header.start_pos);
let insts = inst::ParseIter::new(&*bytes, &known_data_table, &known_func_table, header.start_pos);
// Then parse all heuristic tables
let heuristics_data_table = DataTable::search_instructions(insts_range.clone(), insts.clone());
let data_table = known_data_table.merge_with(heuristics_data_table);
let heuristics_func_table = FuncTable::search_instructions(insts_range, insts);
let data_table = known_data_table.merge_with(heuristics_data_table);
let func_table = known_func_table.merge_with(heuristics_func_table);
Ok(Self {
@ -139,14 +149,3 @@ impl Exe {
})
}
}
impl ops::Index<Range<Pos>> for Exe {
type Output = [u8];
fn index(&self, index: Range<Pos>) -> &Self::Output {
let start = index.start.offset_from(self.header.start_pos);
let end = index.end.offset_from(self.header.start_pos);
&self.bytes[start..end]
}
}

View File

@ -129,7 +129,7 @@ impl DataTable {
name: format!("string_{idx}"),
desc: String::new(),
pos,
ty: DataType::Array { ty: Box::new(DataType::AsciiChar), len: string.len() },
ty: DataType::AsciiStr { len: string.len() },
},
Directive::Dw(_) => Data {
name: format!("data_w{idx}"),

View File

@ -1,33 +1,28 @@
//! Data types
// Imports
use ::std::fmt;
use ::serde::de::{Deserialize, Deserializer};
use serde::de::Visitor;
/// Data types
#[derive(PartialEq, Eq, Clone, Hash, Debug)]
#[derive(derive_more::Display)]
#[derive(serde::Serialize, serde::Deserialize)]
pub enum DataType {
/// Ascii string
#[display(fmt = "str")]
AsciiChar,
AsciiStr {
/// String length
len: usize,
},
/// Word
#[display(fmt = "u32")]
#[serde(rename = "u32")]
Word,
/// Half-word
#[display(fmt = "u16")]
#[serde(rename = "u16")]
HalfWord,
/// Byte
#[display(fmt = "u8")]
#[serde(rename = "u8")]
Byte,
/// Array
#[display(fmt = "[{ty}; {len}]")]
Array {
/// Array type
ty: Box<DataType>,
@ -44,129 +39,9 @@ impl DataType {
match self {
Self::Word => 4,
Self::HalfWord => 2,
Self::Byte | Self::AsciiChar => 1,
Self::Byte => 1,
Self::AsciiStr { len } => *len,
Self::Array { ty, len } => len * ty.size(),
}
}
}
/// Error for [`FromStr`](std::str::FromStr) impl.
#[derive(Debug, thiserror::Error)]
pub enum FromStrError {
/// Missing ']' in array.
#[error("Missing ']' after '[...'")]
MissingArraySuffix,
/// Missing array separator, ';'.
#[error("Missing ';' in array '[...;...]'")]
MissingArraySep,
/// Invalid array type
#[error("Invalid array type")]
InvalidArrayTy(#[source] Box<Self>),
/// Invalid array length
#[error("Invalid array length '{len}'")]
InvalidArrayLen {
/// Invalid length
len: String,
/// Underlying error
#[source]
err: std::num::ParseIntError,
},
/// Unknown type
#[error("Unknown type '{ty}'")]
UnknownTy {
/// The unknown type.
ty: String,
},
}
impl std::str::FromStr for DataType {
type Err = FromStrError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
// If it starts with '[', read it as an array
if let Some(s) = s.strip_prefix('[') {
// Find the first ';' from the end to split.
let s = s.strip_suffix(']').ok_or(FromStrError::MissingArraySuffix)?;
let (ty, len) = s
.char_indices()
.rev()
.find_map(|(pos, c)| c.eq(&';').then(|| s.split_at(pos)))
.ok_or(FromStrError::MissingArraySep)?;
// Ignore the leading ';' on the second.
let len = &len[1..];
// Trim both strings
let ty = ty.trim();
let len = len.trim();
let ty = Self::from_str(ty).map_err(|err| FromStrError::InvalidArrayTy(Box::new(err)))?;
let ty = Box::new(ty);
let len = self::parse_usize(len).map_err(|err| FromStrError::InvalidArrayLen { len: len.to_owned(), err })?;
return Ok(Self::Array { ty, len });
}
// Else check the type
match s {
"AsciiChar" => Ok(Self::AsciiChar),
"u8" => Ok(Self::Byte),
"u16" => Ok(Self::HalfWord),
"u32" => Ok(Self::Word),
_ => Err(FromStrError::UnknownTy { ty: s.to_owned() }),
}
}
}
impl<'de> Deserialize<'de> for DataType {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(DataTypeVisitor)
}
}
/// Visitor
pub struct DataTypeVisitor;
impl<'de> Visitor<'de> for DataTypeVisitor {
type Value = DataType;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a data type")
}
fn visit_str<E>(self, value: &str) -> Result<DataType, E>
where
E: serde::de::Error,
{
value.parse().map_err(E::custom)
}
}
impl serde::Serialize for DataType {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}
/// Helper function to parse a `u32` from a string with any base.
pub fn parse_usize(s: &str) -> Result<usize, std::num::ParseIntError> {
let (s, base) = match s.trim().as_bytes() {
[b'0', b'x', rest @ ..] => (rest, 16),
[b'0', b'o', rest @ ..] => (rest, 8),
[b'0', b'b', rest @ ..] => (rest, 2),
s => (s, 10),
};
let s = std::str::from_utf8(s).expect("Failed to convert `str` -> `[u8]` -> `str`");
usize::from_str_radix(s, base)
}

View File

@ -38,6 +38,7 @@ pub use target::InstTarget;
// Imports
use self::{basic::Decodable as _, pseudo::Decodable as _};
use super::{DataTable, FuncTable};
use crate::Pos;
/// An assembler instruction.
@ -58,7 +59,10 @@ impl<'a> Inst<'a> {}
impl<'a> Inst<'a> {
/// Decodes an instruction from bytes and it's position.
pub fn decode(pos: Pos, bytes: &'a [u8]) -> Option<Self> {
pub fn decode(pos: Pos, bytes: &'a [u8], data_table: &'a DataTable, _func_table: &'a FuncTable) -> Option<Self> {
// If there's data in this position, make sure to read the correct type
if let Some(_data) = data_table.get(pos) {}
// If we're not aligned to a word, decode a directive
if !pos.is_word_aligned() {
let directive = Directive::decode(pos, bytes)?;

View File

@ -70,46 +70,10 @@ pub enum ForceDecodeKind {
Byte,
}
impl<'a> Directive<'a> {
/*
/// Positions that should be force decoded using a specific variant.
// TODO: Get this at run-time via a file.
pub const FORCE_DECODE_RANGES: &'static [ForceDecodeRange] = &[
ForceDecodeRange {
start: Included(Pos(0x80010000)),
end: Excluded(Pos(0x80010008)),
kind: ForceDecodeKind::Word,
},
ForceDecodeRange {
start: Included(Pos(0x8006fa20)),
end: Excluded(Pos(0x8006fa24)),
kind: ForceDecodeKind::HalfWord,
},
ForceDecodeRange {
start: Included(Inst::CODE_END),
end: Unbounded,
kind: ForceDecodeKind::Word,
},
];
*/
}
impl<'a> Directive<'a> {
/// Decodes a directive
#[must_use]
pub fn decode(pos: Pos, bytes: &'a [u8]) -> Option<Self> {
/*
// Check if we need to force decode it
if let Some(ForceDecodeRange { kind, .. }) = Self::FORCE_DECODE_RANGES.iter().find(|range| range.contains(&pos)) {
#[rustfmt::skip]
return match kind {
ForceDecodeKind::Word => bytes.next_u32().map(Self::Dw),
ForceDecodeKind::HalfWord => bytes.next_u16().map(Self::Dh),
ForceDecodeKind::Byte => bytes.next_u8 ().map(Self::Db),
};
}
*/
// If we're not half-word aligned, read a byte
if !pos.is_half_word_aligned() {
return Some(Self::Db(bytes.next_u8()?));

View File

@ -2,7 +2,10 @@
// Imports
use super::{Inst, InstSize};
use crate::Pos;
use crate::{
exe::{DataTable, FuncTable},
Pos,
};
/// Parsing iterator.
///
@ -14,13 +17,24 @@ pub struct ParseIter<'a> {
/// Starting position of bytes
cur_pos: Pos,
/// Data table
data_table: &'a DataTable,
/// Func table
func_table: &'a FuncTable,
}
impl<'a> ParseIter<'a> {
/// Creates a new parsing iterator
#[must_use]
pub const fn new(bytes: &'a [u8], start_pos: Pos) -> Self {
Self { bytes, cur_pos: start_pos }
pub const fn new(bytes: &'a [u8], data_table: &'a DataTable, func_table: &'a FuncTable, start_pos: Pos) -> Self {
Self {
bytes,
cur_pos: start_pos,
data_table,
func_table,
}
}
/// Returns the current position of the iterator
@ -35,7 +49,7 @@ impl<'a> Iterator for ParseIter<'a> {
fn next(&mut self) -> Option<Self::Item> {
// Try to read an instruction
let inst = Inst::decode(self.cur_pos, self.bytes)?;
let inst = Inst::decode(self.cur_pos, self.bytes, self.data_table, self.func_table)?;
let pos = self.cur_pos;
// Then skip it in our bytes

View File

@ -76,7 +76,7 @@ impl<'a> Iterator for Iter<'a> {
return Some(ExeItem::Data {
data,
insts: ParseIter::new(&self.exe[cur_pos..end_pos], cur_pos),
insts: self.exe.parse_iter_from(cur_pos..end_pos),
});
}
@ -85,7 +85,7 @@ impl<'a> Iterator for Iter<'a> {
self.cur_pos = func.end_pos;
return Some(ExeItem::Func {
func,
insts: ParseIter::new(&self.exe[cur_pos..func.end_pos], cur_pos),
insts: self.exe.parse_iter_from(cur_pos..func.end_pos),
});
}
@ -109,7 +109,7 @@ impl<'a> Iterator for Iter<'a> {
Some(ExeItem::Unknown {
insts: ParseIter::new(&self.exe[cur_pos..end_pos], cur_pos),
insts: self.exe.parse_iter_from(cur_pos..end_pos),
})
}
}

View File

@ -2,19 +2,31 @@
- name: CrossMoveEffectStringsRaw
desc: The cross move effect string table with raw symbols
pos: 0x8006e47c
ty: "[u32; 16]"
ty:
Array:
ty: u32
len: 16
- name: CrossMoveEffectStrings
desc: The cross move effect string table with escaped symbols
pos: 0x8006e4bc
ty: "[u32; 16]"
ty:
Array:
ty: u32
len: 16
- name: PartnerEffectStrings
desc: Partner effects string table.
pos: 0x8006edb4
ty: "[u32; 82]"
ty:
Array:
ty: u32
len: 82
- name: CrossMoveEffectStrings
desc: The cross move effect string table with escaped symbols
pos: 0x8006e4bc
ty: "[u32; 18]"
ty:
Array:
ty: u32
len: 18
- name: HeapStartKuseg
desc: Stores the start of the heap in KUSEG (Minus 0x4).
@ -83,20 +95,32 @@
- name: FuncList1Ptr
desc: Pointer to FuncList1
pos: 0x80070aa8
ty: "[u32; 7]"
ty:
Array:
ty: u32
len: 7
# Expansion region 1
- name: ExpansionRegion1Header
pos: 0x1f00000
ty: "[u32; 0x40]"
ty:
Array:
ty: u32
len: 0x40
- name: ExpansionRegion1Data
pos: 0x1f00100
ty: "[u32; 0x1ffC0]"
ty:
Array:
ty: u32
len: 0x1ffC0
# Scratchpad
- name: Scratchpad
pos: 0x1f800000
ty: "[u32; 0x400]"
ty:
Array:
ty: u32
len: 0x400
# Memory control 1
- name: Expansion1BaseAddress
@ -193,31 +217,52 @@
- name: DMA_MDECin
desc: DMA0 channel 0
pos: 0x1f801080
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DMA_MDECout
desc: DMA1 channel 1
pos: 0x1f801090
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DMA_GPU
desc: DMA2 channel 2
pos: 0x1f8010a0
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DMA_CDROM
desc: DMA3 channel 3
pos: 0x1f8010b0
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DMA_SPU
desc: DMA4 channel 4
pos: 0x1f8010c0
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DMA_PIO
desc: DMA5 channel 5, Expansion port
pos: 0x1f8010d0
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DMA_OTC
desc: DMA5 channel 5, Reverse clear OT (GPU)
pos: 0x1f8010e0
ty: "[u32; 4]"
ty:
Array:
ty: u32
len: 4
- name: DPCR
desc: DMA Control register
pos: 0x1f8010f0
@ -273,7 +318,10 @@
# interleaved memory layout.
- name: SPU_Voices
pos: 0x1f801c00
ty: "[u32; 0x180]"
ty:
Array:
ty: u32
len: 0x180
# SPU Control registers
- name: SPU_MainVolumeLR
@ -429,17 +477,29 @@
- name: SPU_VoiceCurrentVolumeLR
desc: Voices current volume
pos: 0x1f801e00
ty: "[u32; 0x60]"
ty:
Array:
ty: u32
len: 0x60
- name: SPU_Unknown2
pos: 0x1f801e60
ty: "[u32; 0x4]"
ty:
Array:
ty: u32
len: 0x4
- name: SPU_Unknown3
pos: 0x1f801e80
ty: "[u32; 0x60]"
ty:
Array:
ty: u32
len: 0x60
# TODO: Expansion region 2
# BIOS Region
- name: BIOS
pos: 0x1fc00000
ty: "[u32; 0x20000]"
ty:
Array:
ty: u32
len: 0x20000