Revamped game::exe::{Data, DataTable}.

Renamed `MergeSortedIter` to `DiscardingSortedMergeIter`.
2026-02-09 03:40:23 +00:00 · 2020-10-30 07:09:31 +00:00
parent 998ef01b54
commit 0e5ea3967f
11 changed files with 394 additions and 347 deletions
--- a/dcb-tools/src/decompiler/main.rs
+++ b/dcb-tools/src/decompiler/main.rs
@@ -79,7 +79,7 @@ use anyhow::Context;
 use byteorder::{ByteOrder, LittleEndian};
 use dcb::{
 	game::exe::{
-		data::AllData,
+		data::DataTable,
 		func::Funcs,
 		instruction::{
 			Directive,
@@ -131,12 +131,11 @@ fn main() -> Result<(), anyhow::Error> {
 		.collect();

 	// Get all data
-	let data_pos: AllData<String> = AllData::known()
+	let data_pos: DataTable<String> = DataTable::known()
 		.into_string()
-		.merge(AllData::from_instructions(
+		.merge(DataTable::search_instructions(
 			instructions.iter().map(|(pos, instruction)| (*pos, instruction)),
-		))
-		.collect();
+		));

 	// Build the full instructions iterator
 	// TODO: Revamp this, iterate over an enum of `Func | Data | Other`
@@ -153,7 +152,7 @@ fn main() -> Result<(), anyhow::Error> {

 	// Read all instructions
 	let mut skipped_nops = 0;
-	for (cur_pos, instruction, last_instruction, cur_func, last_func) in full_iter {
+	for (cur_pos, instruction, last_instruction, cur_func, _last_func) in full_iter {
 		// Note: Required by `rust-analyzer` currently, it can't determine the type of `cur_func`.
 		let cur_func: Option<&dcb::game::exe::Func<String>> = cur_func;

@@ -203,7 +202,7 @@ fn main() -> Result<(), anyhow::Error> {
 			}
 		}
 		if let Some(data) = data_pos.get(cur_pos) {
-			if data.start_pos == cur_pos {
+			if data.pos == cur_pos {
 				println!("{}:", data.name);
 				println!("# {}", data.kind);
 				for description in data.desc.lines() {
@@ -257,7 +256,7 @@ fn main() -> Result<(), anyhow::Error> {
 			) => match functions
 				.get(Pos(*target))
 				.map(|func| (func.start_pos, &func.name))
-				.or_else(|| data_pos.get(Pos(*target)).map(|data| (data.start_pos, &data.name)))
+				.or_else(|| data_pos.get(Pos(*target)).map(|data| (data.pos, &data.name)))
 			{
 				Some((start_pos, name)) => {
 					if start_pos == Pos(*target) {
@@ -281,7 +280,7 @@ fn main() -> Result<(), anyhow::Error> {
 				print!(" # {}", func.name);
 			}
 			if let Some(data) = data_pos.get(Pos(*target)) {
-				if data.start_pos == Pos(*target) {
+				if data.pos == Pos(*target) {
 					print!(" # {}", data.name);
 				}
 			}
--- a/dcb/src/game.rs
+++ b/dcb/src/game.rs
@@ -23,5 +23,5 @@ pub mod validation;
 // Exports
 pub use card::{Digimon, Digivolve, Item, Table as CardTable};
 pub use deck::{Deck, Table as DeckTable};
-pub use exe::{Exe, Header as ExeHeader};
+pub use exe::{Exe, Header as ExeHeader, Pos as ExePos};
 pub use validation::{Validatable, Validation};
--- a/dcb/src/game/exe.rs
+++ b/dcb/src/game/exe.rs
@@ -12,7 +12,7 @@ pub mod instruction;
 pub mod pos;

 // Exports
-pub use data::Data;
+pub use data::{Data, DataKind, DataTable};
 pub use error::DeserializeError;
 pub use func::Func;
 pub use header::Header;
--- a/dcb/src/game/exe/data.rs
+++ b/dcb/src/game/exe/data.rs
@@ -1,15 +1,26 @@
-//! Executable data
+//! Executable data locations
+//!
+//! This module stores known data locations
+//! within the executable, as well as info on
+//! them, provided by the [`Data`] type.
+//!
+//! The full list of known data locations may
+//! be found at [`Data::known`].

 // Modules
-pub mod all_data;
+pub mod kind;
+pub mod known;
+pub mod table;

 // Exports
-pub use all_data::AllData;
+pub use kind::DataKind;
+pub use table::DataTable;

 // Imports
 use crate::game::exe::Pos;
+use std::borrow::Borrow;

-/// Executable data
+/// Data location
 #[derive(Clone, Debug)]
 #[derive(serde::Serialize, serde::Deserialize)]
 pub struct Data<S: AsRef<str>> {
@@ -20,7 +31,7 @@ pub struct Data<S: AsRef<str>> {
 	pub desc: S,

 	/// Start position
-	pub start_pos: Pos,
+	pub pos: Pos,

 	/// Data kind
 	pub kind: DataKind,
@@ -29,221 +40,57 @@ pub struct Data<S: AsRef<str>> {
 impl<S: AsRef<str>> Data<S> {
 	/// Returns the end position of this data
 	pub fn end_pos(&self) -> Pos {
-		self.start_pos + self.kind.size()
+		self.pos + self.kind.size()
 	}
 }

-/// Data kind
-#[derive(Clone, Debug)]
-#[derive(serde::Serialize, serde::Deserialize)]
-#[derive(derive_more::Display)]
-pub enum DataKind {
-	/// Ascii string
-	// TODO: Maybe somehow get rid of the length?
-	#[display(fmt = "str")]
-	AsciiStr {
-		/// String length
-		len: u32,
-	},
-
-	/// Word
-	#[display(fmt = "u32")]
-	Word,
-
-	/// Half-word
-	#[display(fmt = "u16")]
-	HalfWord,
-
-	/// Byte
-	#[display(fmt = "u8")]
-	Byte,
-
-	/// Array
-	#[display(fmt = "[{ty}; {len}]")]
-	Array {
-		/// Array type
-		ty: Box<DataKind>,
-
-		/// Array length
-		len: u32,
-	},
-}
-
-impl DataKind {
-	/// Returns the size of this data kind
-	#[must_use]
-	pub fn size(&self) -> u32 {
-		match self {
-			Self::AsciiStr { len } => len + 4 - (len % 4),
-			Self::Word => 4,
-			Self::HalfWord => 2,
-			Self::Byte => 1,
-			Self::Array { ty, len } => ty.size() * len,
+#[allow(clippy::use_self)] // False positive
+impl<S: AsRef<str> + Into<String>> Data<S> {
+	/// Returns this data with owned `String`s.
+	pub fn into_string(self) -> Data<String> {
+		Data {
+			name: self.name.into(),
+			desc: self.desc.into(),
+			pos:  self.pos,
+			kind: self.kind,
 		}
 	}
 }

-impl<S: AsRef<str>> std::borrow::Borrow<Pos> for Data<S> {
+impl<S: AsRef<str>> Borrow<Pos> for Data<S> {
 	fn borrow(&self) -> &Pos {
-		&self.start_pos
+		&self.pos
 	}
 }

+/// Two data locations are equal if their position is the same.
 impl<S: AsRef<str>> PartialEq for Data<S> {
 	fn eq(&self, other: &Self) -> bool {
-		// Only compare the start position
-		self.start_pos.eq(&other.start_pos)
+		self.pos.eq(&other.pos)
 	}
 }

 impl<S: AsRef<str>> Eq for Data<S> {}

+/// Only the position is hashed, just as in the [`PartialEq`] impl.
 impl<S: AsRef<str>> std::hash::Hash for Data<S> {
 	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-		self.start_pos.hash(state);
+		self.pos.hash(state);
 	}
 }

+/// Only the position matters for the order
 impl<S: AsRef<str>> PartialOrd for Data<S> {
 	fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
 		// Delegate to `eq` since we have a total order.
 		Some(self.cmp(other))
 	}
 }
+
+/// Only the position matters for the order
 impl<S: AsRef<str>> Ord for Data<S> {
 	fn cmp(&self, other: &Self) -> std::cmp::Ordering {
 		// Only compare the start position
-		self.start_pos.cmp(&other.start_pos)
-	}
-}
-
-impl Data<&'static str> {
-	/// Returns an iterator of all known data
-	#[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known data
-	pub fn known() -> impl Iterator<Item = Self> {
-		std::array::IntoIter::new([
-			Self {
-				name:      "StackTop",
-				desc:      "Stack top address",
-				start_pos: Pos(0x8006dd44),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "StackSize",
-				desc:      "Stack size",
-				start_pos: Pos(0x8006dd48),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "ZeroStart",
-				desc:      "Start of the zero section in `start`",
-				start_pos: Pos(0x80077a08),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "HeapStart",
-				desc:      "Start of the heap",
-				start_pos: Pos(0x801ddf38),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "something1_data1",
-				desc:      "",
-				start_pos: Pos(0x8006f984),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "something1_data2",
-				desc:      "",
-				start_pos: Pos(0x80010000),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "something5_data1",
-				desc:      "",
-				start_pos: Pos(0x8006fa20),
-				kind:      DataKind::HalfWord,
-			},
-			Self {
-				name:      "I_STAT_PTR",
-				desc:      "",
-				start_pos: Pos(0x80070aac),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "I_MASK_PTR",
-				desc:      "",
-				start_pos: Pos(0x80070ab0),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "DPCR_PTR",
-				desc:      "",
-				start_pos: Pos(0x80070ab4),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "something5_data5",
-				desc:      "",
-				start_pos: Pos(0x8006fa5c),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "FuncList1",
-				desc:      "",
-				start_pos: Pos(0x80070a88),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "FuncList1Ptr",
-				desc:      "Pointer to FuncList1",
-				start_pos: Pos(0x80070aa8),
-				kind:      DataKind::Word,
-			},
-			// Hardware registers
-			// 0x1f80_1000 - 0x1f80_2fff
-			Self {
-				name:      "I_STAT",
-				desc:      "Interrupt status register",
-				start_pos: Pos(0x1f801070),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "I_MASK",
-				desc:      "Interrupt mask register",
-				start_pos: Pos(0x1f801074),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "DPCR",
-				desc:      "DMA Control register",
-				start_pos: Pos(0x1f8010f0),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "DICR",
-				desc:      "DMA Interrupt register",
-				start_pos: Pos(0x1f8010f4),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "Timer0",
-				desc:      "",
-				start_pos: Pos(0x1f801100),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "Timer1",
-				desc:      "",
-				start_pos: Pos(0x1f801110),
-				kind:      DataKind::Word,
-			},
-			Self {
-				name:      "Timer2",
-				desc:      "",
-				start_pos: Pos(0x1f801120),
-				kind:      DataKind::Word,
-			},
-		])
+		self.pos.cmp(&other.pos)
 	}
 }
--- a/dcb/src/game/exe/data/all_data.rs
+++ b/dcb/src/game/exe/data/all_data.rs
@@ -1,140 +0,0 @@
-//! Data list
-
-// Imports
-use super::{Data, DataKind};
-use crate::{
-	game::exe::{
-		instruction::{Directive, PseudoInstruction},
-		Instruction, Pos,
-	},
-	util::merge_iter::MergeSortedIter,
-};
-use std::{
-	collections::{btree_set, BTreeSet},
-	iter::FromIterator,
-};
-
-/// List of data
-pub struct AllData<S: AsRef<str>>(BTreeSet<Data<S>>);
-
-impl<S: AsRef<str>> FromIterator<Data<S>> for AllData<S> {
-	fn from_iter<T: IntoIterator<Item = Data<S>>>(iter: T) -> Self {
-		Self(iter.into_iter().collect())
-	}
-}
-
-impl<S: AsRef<str>> AllData<S> {
-	/// Merges two function lists, discarding any duplicates
-	/// from `other`.
-	#[must_use]
-	pub fn merge(self, other: Self) -> MergeSortedIter<Data<S>, btree_set::IntoIter<Data<S>>, btree_set::IntoIter<Data<S>>> {
-		MergeSortedIter::new(self.0.into_iter(), other.0.into_iter())
-	}
-
-	/// Retrieves the closest data section to `pos`, searching
-	/// from `pos` backwards.
-	#[must_use]
-	pub fn get(&self, pos: Pos) -> Option<&Data<S>> {
-		self.0.range(..=pos).next_back()
-	}
-}
-
-#[allow(clippy::use_self)] // We're not using `AllData<S>`, but `AllData<String>`
-impl<S: AsRef<str> + Into<String>> AllData<S> {
-	/// Converts all strings to `String`.
-	#[must_use]
-	pub fn into_string(self) -> AllData<String> {
-		AllData(
-			self.0
-				.into_iter()
-				.map(|data| Data {
-					name:      data.name.into(),
-					desc:      data.desc.into(),
-					start_pos: data.start_pos,
-					kind:      data.kind,
-				})
-				.collect(),
-		)
-	}
-}
-
-
-impl AllData<&'static str> {
-	/// Returns all known functions
-	#[must_use]
-	pub fn known() -> Self {
-		Self(Data::known().collect())
-	}
-}
-
-
-impl AllData<String> {
-	/// Creates a new list of data from an iterator over instructions
-	#[must_use]
-	pub fn from_instructions<'a>(instructions: impl Iterator<Item = (Pos, &'a Instruction)> + Clone) -> Self {
-		// Get all directive references
-		let directive_references: BTreeSet<Pos> = instructions
-			.clone()
-			.filter_map(|(_, instruction)| match instruction {
-				Instruction::Pseudo(
-					PseudoInstruction::La { target: offset, .. } |
-					PseudoInstruction::Li32 { imm: offset, .. } |
-					PseudoInstruction::LbImm { offset, .. } |
-					PseudoInstruction::LbuImm { offset, .. } |
-					PseudoInstruction::LhImm { offset, .. } |
-					PseudoInstruction::LhuImm { offset, .. } |
-					PseudoInstruction::LwlImm { offset, .. } |
-					PseudoInstruction::LwImm { offset, .. } |
-					PseudoInstruction::LwrImm { offset, .. } |
-					PseudoInstruction::SbImm { offset, .. } |
-					PseudoInstruction::ShImm { offset, .. } |
-					PseudoInstruction::SwlImm { offset, .. } |
-					PseudoInstruction::SwImm { offset, .. } |
-					PseudoInstruction::SwrImm { offset, .. },
-				) |
-				Instruction::Directive(Directive::Dw(offset)) => Some(Pos(*offset)),
-				_ => None,
-			})
-			.collect();
-
-		Self(
-			instructions
-				.filter_map(|(pos, instruction)| match instruction {
-					Instruction::Directive(directive) if directive_references.contains(&pos) => Some((pos, directive)),
-					_ => None,
-				})
-				.zip(0..)
-				.map(|((pos, directive), idx)| {
-					#[allow(clippy::as_conversions, clippy::cast_possible_truncation)] // All strings will fit into a `u32`
-					match directive {
-						Directive::Ascii(ascii) => Data {
-							name:      format!("string_{idx}"),
-							desc:      String::new(),
-							start_pos: pos,
-							kind:      DataKind::AsciiStr { len: ascii.len() as u32 },
-						},
-
-						Directive::Dw(_) => Data {
-							name:      format!("w{idx}"),
-							desc:      String::new(),
-							start_pos: pos,
-							kind:      DataKind::Word,
-						},
-						Directive::Dh(_) => Data {
-							name:      format!("h{idx}"),
-							desc:      String::new(),
-							start_pos: pos,
-							kind:      DataKind::HalfWord,
-						},
-						Directive::Db(_) => Data {
-							name:      format!("b{idx}"),
-							desc:      String::new(),
-							start_pos: pos,
-							kind:      DataKind::Byte,
-						},
-					}
-				})
-				.collect(),
-		)
-	}
-}
--- a/dcb/src/game/exe/data/kind.rs
+++ b/dcb/src/game/exe/data/kind.rs
@@ -0,0 +1,54 @@
+//! Data kind
+//!
+//! Every piece of data within the executable
+//! may have a certain kind, an ascii string,
+//! a table of words, a single byte, etc.
+
+/// Data kind
+#[derive(PartialEq, Eq, Clone, Hash, Debug)]
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(derive_more::Display)]
+pub enum DataKind {
+	/// Ascii string
+	#[display(fmt = "str")]
+	AsciiStr {
+		/// String length
+		len: u32,
+	},
+
+	/// Word
+	#[display(fmt = "u32")]
+	Word,
+
+	/// Half-word
+	#[display(fmt = "u16")]
+	HalfWord,
+
+	/// Byte
+	#[display(fmt = "u8")]
+	Byte,
+
+	/// Array
+	#[display(fmt = "[{ty}; {len}]")]
+	Array {
+		/// Array type
+		ty: Box<DataKind>,
+
+		/// Array length
+		len: u32,
+	},
+}
+
+impl DataKind {
+	/// Returns the size of this data kind
+	#[must_use]
+	pub fn size(&self) -> u32 {
+		match self {
+			Self::AsciiStr { len } => len + 4 - (len % 4),
+			Self::Word => 4,
+			Self::HalfWord => 2,
+			Self::Byte => 1,
+			Self::Array { ty, len } => ty.size() * len,
+		}
+	}
+}
--- a/dcb/src/game/exe/data/known.rs
+++ b/dcb/src/game/exe/data/known.rs
@@ -0,0 +1,141 @@
+//! Known data locations
+//!
+//! This module stores the [`Data::known`] function
+//! that returns all known data locations.
+//!
+//! It is a separate module, as the known data locations
+//! occupy a large amount of space.
+
+// Imports
+use super::{Data, DataKind, Pos};
+
+impl Data<&'static str> {
+	/// Returns an iterator of all known data
+	#[allow(clippy::too_many_lines)] // This will be big, as it's the list of ALL known data
+	pub fn known() -> impl Iterator<Item = Self> {
+		std::array::IntoIter::new([
+			Self {
+				name: "StackTop",
+				desc: "Stack top address",
+				pos:  Pos(0x8006dd44),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "StackSize",
+				desc: "Stack size",
+				pos:  Pos(0x8006dd48),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "ZeroStart",
+				desc: "Start of the zero section in `start`",
+				pos:  Pos(0x80077a08),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "HeapStart",
+				desc: "Start of the heap",
+				pos:  Pos(0x801ddf38),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "something1_data1",
+				desc: "",
+				pos:  Pos(0x8006f984),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "something1_data2",
+				desc: "",
+				pos:  Pos(0x80010000),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "something5_data1",
+				desc: "",
+				pos:  Pos(0x8006fa20),
+				kind: DataKind::HalfWord,
+			},
+			Self {
+				name: "I_STAT_PTR",
+				desc: "",
+				pos:  Pos(0x80070aac),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "I_MASK_PTR",
+				desc: "",
+				pos:  Pos(0x80070ab0),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "DPCR_PTR",
+				desc: "",
+				pos:  Pos(0x80070ab4),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "something5_data5",
+				desc: "",
+				pos:  Pos(0x8006fa5c),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "FuncList1",
+				desc: "",
+				pos:  Pos(0x80070a88),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "FuncList1Ptr",
+				desc: "Pointer to FuncList1",
+				pos:  Pos(0x80070aa8),
+				kind: DataKind::Word,
+			},
+			// Hardware registers
+			// 0x1f80_1000 - 0x1f80_2fff
+			Self {
+				name: "I_STAT",
+				desc: "Interrupt status register",
+				pos:  Pos(0x1f801070),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "I_MASK",
+				desc: "Interrupt mask register",
+				pos:  Pos(0x1f801074),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "DPCR",
+				desc: "DMA Control register",
+				pos:  Pos(0x1f8010f0),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "DICR",
+				desc: "DMA Interrupt register",
+				pos:  Pos(0x1f8010f4),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "Timer0",
+				desc: "",
+				pos:  Pos(0x1f801100),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "Timer1",
+				desc: "",
+				pos:  Pos(0x1f801110),
+				kind: DataKind::Word,
+			},
+			Self {
+				name: "Timer2",
+				desc: "",
+				pos:  Pos(0x1f801120),
+				kind: DataKind::Word,
+			},
+		])
+	}
+}
--- a/dcb/src/game/exe/data/table.rs
+++ b/dcb/src/game/exe/data/table.rs
@@ -0,0 +1,145 @@
+//! Data table
+//!
+//! This module defines the [`DataTable`] type, which
+//! stores all data within the executable.
+//!
+//! Typically this data will be a mix of the known data,
+//! available through [`DataTable::known`] and heuristically
+//! discovered data through instruction references, available
+//! through [`DataTable::search_instructions`].
+
+// Imports
+use super::{Data, DataKind};
+use crate::{
+	game::exe::{
+		instruction::{Directive, PseudoInstruction},
+		Instruction, Pos,
+	},
+	util::DiscardingSortedMergeIter,
+};
+use std::{collections::BTreeSet, convert::TryInto, iter::FromIterator};
+
+/// Data table
+///
+/// Stores all data locations sorted by their address.
+/// Also guarantees all data locations are unique and non-overlapping.
+pub struct DataTable<S: AsRef<str>>(BTreeSet<Data<S>>);
+
+impl<S: AsRef<str>> DataTable<S> {
+	/// Merges two data tables, discarding duplicates from `other`.
+	///
+	/// This can be useful when combining known functions and heuristically
+	/// discovered function, as the known functions are always kept, and the
+	/// duplicate discovered ones are discarded.
+	#[must_use]
+	pub fn merge(self, other: Self) -> Self {
+		// Note: We don't return the iterator, as we want the user to
+		//       keep the guarantees supplied by this type.
+		DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter()).collect()
+	}
+
+	/// Retrieves the data location containing `pos`
+	#[must_use]
+	pub fn get(&self, pos: Pos) -> Option<&Data<S>> {
+		// Find the closest one and check if it contains `pos`
+		self.0.range(..=pos).next_back().filter(|data| pos <= data.end_pos())
+	}
+}
+
+#[allow(clippy::use_self)] // False positive
+impl<S: AsRef<str> + Into<String>> DataTable<S> {
+	/// Converts this table to use owned strings.
+	// TODO: Replace this with a impl<S: AsRef<str>> From<DataTable<S>> for DataTable<String> impl
+	//       once specialization is around.
+	#[must_use]
+	pub fn into_string(self) -> DataTable<String> {
+		DataTable(self.0.into_iter().map(Data::into_string).collect())
+	}
+}
+
+impl<S: AsRef<str>> FromIterator<Data<S>> for DataTable<S> {
+	fn from_iter<T: IntoIterator<Item = Data<S>>>(iter: T) -> Self {
+		Self(iter.into_iter().collect())
+	}
+}
+
+impl DataTable<&'static str> {
+	/// Returns all known functions
+	///
+	/// Alias for `Data::known().collect()`.
+	#[must_use]
+	pub fn known() -> Self {
+		Data::known().collect()
+	}
+}
+
+
+impl DataTable<String> {
+	/// Searches all instructions for references to
+	/// executable data using certain heuristics.
+	#[must_use]
+	pub fn search_instructions<'a>(instructions: impl Iterator<Item = (Pos, &'a Instruction)> + Clone) -> Self {
+		// Get all possible references to data
+		let data_references: BTreeSet<Pos> = instructions
+			.clone()
+			.filter_map(|(_, instruction)| match instruction {
+				Instruction::Pseudo(
+					PseudoInstruction::La { target: offset, .. } |
+					PseudoInstruction::Li32 { imm: offset, .. } |
+					PseudoInstruction::LbImm { offset, .. } |
+					PseudoInstruction::LbuImm { offset, .. } |
+					PseudoInstruction::LhImm { offset, .. } |
+					PseudoInstruction::LhuImm { offset, .. } |
+					PseudoInstruction::LwlImm { offset, .. } |
+					PseudoInstruction::LwImm { offset, .. } |
+					PseudoInstruction::LwrImm { offset, .. } |
+					PseudoInstruction::SbImm { offset, .. } |
+					PseudoInstruction::ShImm { offset, .. } |
+					PseudoInstruction::SwlImm { offset, .. } |
+					PseudoInstruction::SwImm { offset, .. } |
+					PseudoInstruction::SwrImm { offset, .. },
+				) |
+				Instruction::Directive(Directive::Dw(offset)) => Some(Pos(*offset)),
+				_ => None,
+			})
+			.collect();
+
+		// Then filter the instructions for data locations.
+		instructions
+			// Filter all non-directives
+			.filter_map(|(pos, instruction)| match instruction {
+				Instruction::Directive(directive) if data_references.contains(&pos) => Some((pos, directive)),
+				_ => None,
+			})
+			.zip(0..)
+			.map(|((pos, directive), idx)| {
+				match directive {
+					Directive::Ascii(ascii) => Data {
+						name: format!("string_{idx}"),
+						desc: String::new(),
+						pos,
+						kind: DataKind::AsciiStr { len: ascii.len().try_into().expect("String length didn't fit into a `u32`") },
+					},
+					Directive::Dw(_) => Data {
+						name: format!("data_w{idx}"),
+						desc: String::new(),
+						pos,
+						kind: DataKind::Word,
+					},
+					Directive::Dh(_) => Data {
+						name: format!("data_h{idx}"),
+						desc: String::new(),
+						pos,
+						kind: DataKind::HalfWord,
+					},
+					Directive::Db(_) => Data {
+						name: format!("data_b{idx}"),
+						desc: String::new(),
+						pos,
+						kind: DataKind::Byte,
+					},
+				}
+			})
+			.collect()
+	}
+}
--- a/dcb/src/game/exe/func/funcs.rs
+++ b/dcb/src/game/exe/func/funcs.rs
@@ -7,7 +7,7 @@ use crate::{
 		instruction::{Directive, PseudoInstruction, Register, SimpleInstruction},
 		Instruction, Pos,
 	},
-	util::merge_iter::MergeSortedIter,
+	util::discarding_sorted_merge_iter::DiscardingSortedMergeIter,
 };
 use maplit::hashmap;
 use std::{collections::BTreeSet, iter::FromIterator, vec};
@@ -25,8 +25,8 @@ impl<S: AsRef<str>> Funcs<S> {
 	/// Merges two function lists, discarding any duplicates
 	/// from `other`.
 	#[must_use]
-	pub fn merge(self, other: Self) -> MergeSortedIter<Func<S>, vec::IntoIter<Func<S>>, vec::IntoIter<Func<S>>> {
-		MergeSortedIter::new(self.0.into_iter(), other.0.into_iter())
+	pub fn merge(self, other: Self) -> DiscardingSortedMergeIter<Func<S>, vec::IntoIter<Func<S>>, vec::IntoIter<Func<S>>> {
+		DiscardingSortedMergeIter::new(self.0.into_iter(), other.0.into_iter())
 	}

 	/// Adapts an instruction iterator to extract the current function
--- a/dcb/src/util.rs
+++ b/dcb/src/util.rs
@@ -11,11 +11,12 @@ pub mod array_split;
 pub mod null_ascii_string;
 #[macro_use]
 pub mod impl_bytes;
-pub mod merge_iter;
+pub mod discarding_sorted_merge_iter;
 pub mod signed_hex;

 // Exports
 pub use array_split::{array_split, array_split_mut};
+pub use discarding_sorted_merge_iter::DiscardingSortedMergeIter;
 pub use signed_hex::SignedHex;

 /// Returns the absolute different between `a` and `b`, `a - b` as a `i64`.
--- a/dcb/src/util/discarding_sorted_merge_iter.rs
+++ b/dcb/src/util/discarding_sorted_merge_iter.rs
@@ -7,7 +7,7 @@ use std::cmp::Ordering;
 /// Merging sorted iterator
 ///
 /// Will discard duplicate items.
-pub struct MergeSortedIter<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> {
+pub struct DiscardingSortedMergeIter<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> {
 	/// Left iterator
 	lhs: Li,

@@ -18,7 +18,7 @@ pub struct MergeSortedIter<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T
 	last: Option<Either<T, T>>,
 }

-impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> MergeSortedIter<T, Li, Ri> {
+impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> DiscardingSortedMergeIter<T, Li, Ri> {
 	/// Creates a new merging iterator
 	pub fn new(lhs: Li, rhs: Ri) -> Self {
 		Self { lhs, rhs, last: None }
@@ -44,7 +44,7 @@ impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> MergeSortedIter<T,
 	}
 }

-impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> Iterator for MergeSortedIter<T, Li, Ri> {
+impl<T: Ord, Li: Iterator<Item = T>, Ri: Iterator<Item = T>> Iterator for DiscardingSortedMergeIter<T, Li, Ri> {
 	type Item = T;

 	fn next(&mut self) -> Option<Self::Item> {