From ec0c862bc5d237142c06b71277da4fddaf0dc079 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Tue, 25 Oct 2022 00:29:58 +0100 Subject: [PATCH] `ddw3-uniso` can now generate mkpsxiso-compatible xml files for building. --- tools/Cargo.lock | 7 ++ tools/ddw3-uniso/Cargo.toml | 3 + tools/ddw3-uniso/src/args.rs | 9 ++ tools/ddw3-uniso/src/main.rs | 229 +++++++++++++++++++++++++++++++---- 4 files changed, 226 insertions(+), 22 deletions(-) diff --git a/tools/Cargo.lock b/tools/Cargo.lock index 006e8d949..fdd0a00cd 100644 --- a/tools/Cargo.lock +++ b/tools/Cargo.lock @@ -183,6 +183,7 @@ dependencies = [ "serde", "serde_yaml", "tracing", + "xml-rs", ] [[package]] @@ -699,3 +700,9 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xml-rs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" diff --git a/tools/ddw3-uniso/Cargo.toml b/tools/ddw3-uniso/Cargo.toml index b88600bf7..fb36b5f70 100755 --- a/tools/ddw3-uniso/Cargo.toml +++ b/tools/ddw3-uniso/Cargo.toml @@ -34,3 +34,6 @@ derive_more = "0.99.17" # Serde serde = { version = "1.0.0", features = ["derive"] } serde_yaml = "0.9.13" + +# Xml +xml-rs = "0.8.4" diff --git a/tools/ddw3-uniso/src/args.rs b/tools/ddw3-uniso/src/args.rs index 37f076580..16a57b283 100755 --- a/tools/ddw3-uniso/src/args.rs +++ b/tools/ddw3-uniso/src/args.rs @@ -26,4 +26,13 @@ pub struct Args { /// might yield additional entries not reached through the path table. #[clap(long = "path-table-recurse", requires = "use_path_table")] pub path_table_recurse: bool, + + /// Don't actually create any files + #[clap(long = "dry-run")] + pub dry_run: bool, + + /// Creates a `mkpsxiso`-compatible `xml` file that allows rebuilding the + /// extracted iso. + #[clap(long = "mkpsxiso-xml")] + pub create_mkpsxiso_xml: Option, } diff --git a/tools/ddw3-uniso/src/main.rs b/tools/ddw3-uniso/src/main.rs index 3bc7d92ae..cac7d85b4 100755 --- a/tools/ddw3-uniso/src/main.rs +++ b/tools/ddw3-uniso/src/main.rs @@ -1,5 +1,7 @@ //! Iso extractor from `.bin` files. +// TODO: Allow dumping the license? + // Features #![feature(unwrap_infallible)] @@ -8,17 +10,18 @@ mod args; // Imports use { + self::args::Args, anyhow::Context, - args::Args, clap::Parser, ddw3_iso9660::{path_table::PathTableReader, string::FileStrWithoutVersion, Dir, FilesystemReader}, ddw3_util::IoSlice, std::{ collections::HashMap, fs, - io, + io::{self, BufWriter}, path::{Path, PathBuf}, }, + xml::{common::XmlVersion, writer::XmlEvent}, }; fn main() -> Result<(), anyhow::Error> { @@ -44,6 +47,7 @@ fn main() -> Result<(), anyhow::Error> { let fs_reader = FilesystemReader::new(&mut input_file).context("Unable to create filesystem reader")?; // Check if we should extract using the root directory or the path table + let mut extracted_root_dir = ExtractedDir::new(); match args.use_path_table { // Extract using the root directory false => { @@ -52,8 +56,15 @@ fn main() -> Result<(), anyhow::Error> { .root_dir() .read_dir(&mut input_file) .context("Unable to read root directory entry")?; - self::extract_dir(&root_dir, &mut input_file, &output_dir, true) - .context("Unable to extract root directory")? + self::extract_dir( + &root_dir, + &mut input_file, + &output_dir, + true, + args.dry_run, + &mut extracted_root_dir, + ) + .context("Unable to extract root directory")? }, // Extract using the path table @@ -65,11 +76,98 @@ fn main() -> Result<(), anyhow::Error> { primary_volume.path_table_location, primary_volume.path_table_size, args.path_table_recurse, + args.dry_run, + &mut extracted_root_dir, ) .context("Unable to extract path table")? }, } + // Then create the mkpsxiso xml if requested + if let Some(xml_path) = args.create_mkpsxiso_xml { + self::create_mkpsxiso_xml(&xml_path, &fs_reader, &extracted_root_dir) + .context("Unable to output mkpsxiso xml")?; + } + + Ok(()) +} + +fn create_mkpsxiso_xml( + xml_path: &Path, + fs_reader: &FilesystemReader, + extracted_root_dir: &ExtractedDir, +) -> Result<(), anyhow::Error> { + let primary_volume = fs_reader.primary_volume_descriptor(); + + let xml_file = fs::File::create(xml_path).context("Unable to create output file")?; + let xml_file = BufWriter::new(xml_file); + let mut xml = xml::EventWriter::new(xml_file); + + xml.write(XmlEvent::StartDocument { + version: XmlVersion::Version10, + encoding: Some("UTF-8"), + standalone: None, + })?; + + xml.write(XmlEvent::start_element("iso_project"))?; + { + xml.write(XmlEvent::start_element("track").attr("type", "data"))?; + { + xml.write( + XmlEvent::start_element("identifiers") + .attr("system", &primary_volume.system_id.as_lossy_str()) + .attr("application", &primary_volume.application_id.as_lossy_str()) + .attr("volume", &primary_volume.volume_id.as_lossy_str()) + .attr("volume_set", &primary_volume.volume_set_id.as_lossy_str()) + .attr("publisher", &primary_volume.publisher_id.as_lossy_str()) + .attr("data_preparer", &primary_volume.data_preparer_id.as_lossy_str()) + .attr("copyright", &primary_volume.copyright_file_id.as_lossy_str()), + )?; + xml.write(XmlEvent::end_element())?; + + xml.write(XmlEvent::start_element("directory_tree"))?; + { + fn visit_directory_tree( + xml: &mut xml::EventWriter, + dir: &ExtractedDir, + ) -> xml::writer::Result<()> { + // TODO: This doesn't account for interleaved entries from different directories + let mut entries = dir.entries.iter().collect::>(); + entries.sort_by_key(|(_, entry)| match *entry { + ExtractedDirEntry::Dir { sector_pos, .. } => sector_pos, + ExtractedDirEntry::File { sector_pos, .. } => sector_pos, + }); + + for (name, entry) in entries { + match entry { + ExtractedDirEntry::Dir { dir, .. } => { + xml.write(XmlEvent::start_element("dir").attr("name", name))?; + visit_directory_tree(xml, dir)?; + xml.write(XmlEvent::end_element())?; + }, + ExtractedDirEntry::File { output, .. } => { + xml.write( + XmlEvent::start_element("file") + .attr("name", name) + .attr("type", "data") + .attr("source", &output.to_string_lossy()), + )?; + xml.write(XmlEvent::end_element())?; + }, + } + } + + Ok(()) + } + + visit_directory_tree(&mut xml, extracted_root_dir)?; + } + xml.write(XmlEvent::end_element())?; + } + xml.write(XmlEvent::end_element())?; + } + xml.write(XmlEvent::end_element())?; + Ok(()) } @@ -80,6 +178,8 @@ fn extract_path_table( offset: u32, size: u32, recurse: bool, + dry_run: bool, + extracted_root_dir: &mut ExtractedDir, ) -> Result<(), anyhow::Error> { let path_table_reader = IoSlice::new_with_offset_len(&mut *input_file, u64::from(offset) * 0x800, u64::from(size)) .context("Unable to slice path table")?; @@ -97,6 +197,51 @@ fn extract_path_table( .with_context(|| format!("Path table entry had non-existing parent: {}", entry.parent_entry_idx))?; let entry_path = parent_path.join(&*entry.name.as_lossy_str()); + // Then read the directory + input_file + .seek(io::SeekFrom::Start(u64::from(entry.extent) * 0x800)) + .context("Unable to seek to directory")?; + let dir = Dir::from_reader(input_file).context("Unable to parse directory")?; + + // Finally extract it + let mut extracted_sub_dir = ExtractedDir::new(); + self::extract_dir(&dir, input_file, &entry_path, recurse, dry_run, &mut extracted_sub_dir) + .context("Unable to extract directory")?; + + // Then find the directory it came from + match entry.name.as_lossy_str().into_owned() { + // If it's empty, replace the root directory + entry_name if entry_name.is_empty() => *extracted_root_dir = extracted_sub_dir, + + // Else add it as a sub-directory + entry_name => { + // TODO: Not have to do this for each entry? + let mut cur_extracted_dir = &mut *extracted_root_dir; + for name in parent_path + .strip_prefix(output_dir) + .expect("Parent path was outside output directory") + .components() + { + let name = name.as_os_str().to_str().expect("Component wasn't valid utf-8"); + + cur_extracted_dir = match cur_extracted_dir + .entries + .get_mut(name) + .context("Parent directory didn't appear before child directory")? + { + ExtractedDirEntry::Dir { dir, .. } => dir, + ExtractedDirEntry::File { output, .. } => + anyhow::bail!("Found file {output:?} while getting directory {name:?}"), + } + } + + cur_extracted_dir.entries.insert(entry_name, ExtractedDirEntry::Dir { + dir: extracted_sub_dir, + sector_pos: entry.extent, + }); + }, + } + // Then try to register it in the entry paths. // Note: This will fail if the id has overflowed, which is fine a long as we don't have any children // directories. @@ -110,15 +255,6 @@ fn extract_path_table( ), } - // Then read the directory - input_file - .seek(io::SeekFrom::Start(u64::from(entry.extent) * 0x800)) - .context("Unable to seek to directory")?; - let dir = Dir::from_reader(input_file).context("Unable to parse directory")?; - - // Finally extract it - self::extract_dir(&dir, input_file, &entry_path, recurse).context("Unable to extract directory")?; - // And update our current entry index cur_entry_idx = cur_entry_idx.and_then(|idx| idx.checked_add(1)); } @@ -132,10 +268,14 @@ fn extract_dir( input_file: &mut R, output_dir: &Path, recurse: bool, + dry_run: bool, + extracted_dir: &mut ExtractedDir, ) -> Result<(), anyhow::Error> { - // Create the directory - std::fs::create_dir_all(output_dir) - .with_context(|| format!("Unable to create directory {}", output_dir.display()))?; + // Create the directory (if not in dry-run) + if !dry_run { + std::fs::create_dir_all(output_dir) + .with_context(|| format!("Unable to create directory {}", output_dir.display()))?; + } for entry in &dir.entries { // Note: If the file has no extension, it will still have a trailing `.`, so @@ -150,8 +290,18 @@ fn extract_dir( let dir = entry.read_dir(input_file).context("Unable to read entry directory")?; println!("{}/ ({} entries)", entry_path.display(), dir.entries.len()); - self::extract_dir(&dir, input_file, &entry_path, recurse) - .with_context(|| format!("Unable to extract directory {entry_path:?}"))? + // Extract the sub-directory + let mut extracted_sub_dir = ExtractedDir::new(); + self::extract_dir(&dir, input_file, &entry_path, recurse, dry_run, &mut extracted_sub_dir) + .with_context(|| format!("Unable to extract directory {entry_path:?}"))?; + + // Then add it to the parent directory + extracted_dir + .entries + .insert(entry_name.to_owned(), ExtractedDirEntry::Dir { + dir: extracted_sub_dir, + sector_pos: entry.sector_pos, + }); }, // Else skip @@ -160,13 +310,48 @@ fn extract_dir( // Extract file false => { println!("{}", entry_path.display()); - let mut iso_file = entry.read_file(input_file).context("Unable to read file")?; - let mut output_file = fs::File::create(&entry_path).context("Unable to open output file")?; - std::io::copy(&mut iso_file, &mut output_file) - .with_context(|| format!("Unable to write output file {entry_path:?}"))?; + + // Extract the file, if not in a dry run + if !dry_run { + let mut iso_file = entry.read_file(input_file).context("Unable to read file")?; + let mut output_file = fs::File::create(&entry_path).context("Unable to open output file")?; + std::io::copy(&mut iso_file, &mut output_file) + .with_context(|| format!("Unable to write output file {entry_path:?}"))?; + } + + // Then add it to the files extracted + extracted_dir + .entries + .insert(entry_name.to_owned(), ExtractedDirEntry::File { + output: entry_path, + sector_pos: entry.sector_pos, + }); }, } } Ok(()) } + +/// Directory we've extracted +struct ExtractedDir { + /// All entries + entries: HashMap, +} + +impl ExtractedDir { + fn new() -> Self { + Self { + entries: HashMap::new(), + } + } +} + +/// Directory entries we're extracted +enum ExtractedDirEntry { + /// Directory + Dir { dir: ExtractedDir, sector_pos: u32 }, + + /// File + File { output: PathBuf, sector_pos: u32 }, +}