From 16305f8c95686a221430eecd2f098a7cecdc04b3 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Tue, 7 Feb 2023 22:21:09 -0600 Subject: [PATCH] Unify index types and add version flag to pack command --- Cargo.toml | 3 +- repak/Cargo.toml | 3 +- repak/src/lib.rs | 12 ++- repak/src/pak.rs | 242 ++++++++++++++++++------------------------ repak_cli/Cargo.toml | 1 + repak_cli/src/main.rs | 20 +++- 6 files changed, 140 insertions(+), 141 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0e24af6..5b8958b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,9 @@ version = "0.1.0" edition = "2021" [workspace.dependencies] -base64 = "0.21.0" aes = "0.8.2" +base64 = "0.21.0" +strum = { version = "0.24", features = ["derive"] } # generated by 'cargo dist init' [profile.dist] diff --git a/repak/Cargo.toml b/repak/Cargo.toml index 8e72c43..d2a3645 100644 --- a/repak/Cargo.toml +++ b/repak/Cargo.toml @@ -5,14 +5,15 @@ authors.workspace = true license.workspace = true version.workspace = true edition.workspace = true +strum.workspace = true [dependencies] byteorder = "1.4" -strum = { version = "0.24", features = ["derive"] } aes = "0.8" flate2 = "1.0" thiserror = "1.0" sha1 = "0.10.5" +strum = { workspace = true } [dev-dependencies] base64 = { workspace = true } diff --git a/repak/src/lib.rs b/repak/src/lib.rs index 4dc9374..bfb4208 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -10,7 +10,17 @@ pub use {error::*, pak::*}; pub const MAGIC: u32 = 0x5A6F12E1; #[derive( - Clone, Copy, PartialEq, Eq, PartialOrd, Debug, strum::Display, strum::FromRepr, strum::EnumIter, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Debug, + strum::Display, + strum::FromRepr, + strum::EnumIter, + strum::EnumString, + strum::EnumVariantNames, )] pub enum Version { V0, diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 5ebe35b..b16a917 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -26,63 +26,42 @@ pub struct Pak { } impl Pak { - fn new(version: Version, mount_point: String) -> Self { + fn new(version: Version, mount_point: String, path_hash_seed: Option) -> Self { Pak { version, mount_point, - index: Index::new(version), + index: Index::new(path_hash_seed), } } } -#[derive(Debug)] -pub enum Index { - V1(IndexV1), - V2(IndexV2), +#[derive(Debug, Default)] +pub struct Index { + path_hash_seed: Option, + entries: BTreeMap, } impl Index { - fn new(version: Version) -> Self { - if version < Version::V10 { - Self::V1(IndexV1::default()) - } else { - Self::V2(IndexV2::default()) + fn new(path_hash_seed: Option) -> Self { + Index { + path_hash_seed, + ..Index::default() } } fn entries(&self) -> &BTreeMap { - match self { - Index::V1(index) => &index.entries, - Index::V2(index) => &index.entries, - } + &self.entries } fn into_entries(self) -> BTreeMap { - match self { - Index::V1(index) => index.entries, - Index::V2(index) => index.entries, - } + self.entries } fn add_entry(&mut self, path: &str, entry: super::entry::Entry) { - match self { - Index::V1(index) => index.entries.insert(path.to_string(), entry), - Index::V2(index) => index.entries.insert(path.to_string(), entry), - }; + self.entries.insert(path.to_string(), entry); } } -#[derive(Debug, Default)] -pub struct IndexV1 { - entries: BTreeMap, -} - -#[derive(Debug, Default)] -pub struct IndexV2 { - path_hash_seed: u64, - entries: BTreeMap, -} - fn decrypt(key: &Option, bytes: &mut [u8]) -> Result<(), super::Error> { if let Some(key) = &key { use aes::cipher::BlockDecrypt; @@ -152,9 +131,10 @@ impl PakWriter { key: Option, version: Version, mount_point: String, + path_hash_seed: Option, ) -> Self { PakWriter { - pak: Pak::new(version, mount_point), + pak: Pak::new(version, mount_point, path_hash_seed), writer, key, } @@ -312,10 +292,10 @@ impl Pak { assert_eq!(index.read_u32::()?, 0, "remaining index bytes are 0"); // TODO possibly remaining unencoded entries? - Index::V2(IndexV2 { - path_hash_seed, + Index { + path_hash_seed: Some(path_hash_seed), entries: entries_by_path, - }) + } } else { let mut entries = BTreeMap::new(); for _ in 0..len { @@ -324,7 +304,10 @@ impl Pak { super::entry::Entry::read(&mut index, version)?, ); } - Index::V1(IndexV1 { entries }) + Index { + path_hash_seed: None, + entries, + } }; Ok(Pak { @@ -345,103 +328,101 @@ impl Pak { let mut index_writer = io::Cursor::new(&mut index_buf); index_writer.write_string(&self.mount_point)?; - let secondary_index = match &self.index { - Index::V1(index) => { - let record_count = index.entries.len() as u32; - index_writer.write_u32::(record_count)?; - for (path, entry) in &index.entries { - index_writer.write_string(path)?; - entry.write( - &mut index_writer, - self.version, - super::entry::EntryLocation::Index, - )?; - } - None + let secondary_index = if self.version < super::Version::V10 { + let record_count = self.index.entries.len() as u32; + index_writer.write_u32::(record_count)?; + for (path, entry) in &self.index.entries { + index_writer.write_string(path)?; + entry.write( + &mut index_writer, + self.version, + super::entry::EntryLocation::Index, + )?; } - Index::V2(index) => { - let record_count = index.entries.len() as u32; - index_writer.write_u32::(record_count)?; - index_writer.write_u64::(index.path_hash_seed)?; + None + } else { + let record_count = self.index.entries.len() as u32; + let path_hash_seed = self.index.path_hash_seed.unwrap_or_default(); + index_writer.write_u32::(record_count)?; + index_writer.write_u64::(path_hash_seed)?; - // The index is organized sequentially as: - // - Index Header, which contains: - // - Mount Point (u32 len + string w/ terminating byte) - // - Entry Count (u32) - // - Path Hash Seed (u64) - // - Has Path Hash Index (u32); if true, then: - // - Path Hash Index Offset (u64) - // - Path Hash Index Size (u64) - // - Path Hash Index Hash ([u8; 20]) - // - Has Full Directory Index (u32); if true, then: - // - Full Directory Index Offset (u64) - // - Full Directory Index Size (u64) - // - Full Directory Index Hash ([u8; 20]) - // - Encoded Index Records Size - // - (Unused) File Count - // - Path Hash Index - // - Full Directory Index - // - Encoded Index Records; each encoded index record is (0xC bytes) from: - // - Flags (u32) - // - Offset (u32) - // - Size (u32) - let bytes_before_phi = { - let mut size = 0; - size += 4; // mount point len - size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte - size += 8; // path hash seed - size += 4; // record count - size += 4; // has path hash index (since we're generating, always true) - size += 8 + 8 + 20; // path hash index offset, size and hash - size += 4; // has full directory index (since we're generating, always true) - size += 8 + 8 + 20; // full directory index offset, size and hash - size += 4; // encoded entry size - size += index.entries.len() as u64 * { - 4 // flags + // The index is organized sequentially as: + // - Index Header, which contains: + // - Mount Point (u32 len + string w/ terminating byte) + // - Entry Count (u32) + // - Path Hash Seed (u64) + // - Has Path Hash Index (u32); if true, then: + // - Path Hash Index Offset (u64) + // - Path Hash Index Size (u64) + // - Path Hash Index Hash ([u8; 20]) + // - Has Full Directory Index (u32); if true, then: + // - Full Directory Index Offset (u64) + // - Full Directory Index Size (u64) + // - Full Directory Index Hash ([u8; 20]) + // - Encoded Index Records Size + // - (Unused) File Count + // - Path Hash Index + // - Full Directory Index + // - Encoded Index Records; each encoded index record is (0xC bytes) from: + // - Flags (u32) + // - Offset (u32) + // - Size (u32) + let bytes_before_phi = { + let mut size = 0; + size += 4; // mount point len + size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte + size += 8; // path hash seed + size += 4; // record count + size += 4; // has path hash index (since we're generating, always true) + size += 8 + 8 + 20; // path hash index offset, size and hash + size += 4; // has full directory index (since we're generating, always true) + size += 8 + 8 + 20; // full directory index offset, size and hash + size += 4; // encoded entry size + size += self.index.entries.len() as u64 * { + 4 // flags + 4 // offset + 4 // size - }; - size += 4; // unused file count - size }; + size += 4; // unused file count + size + }; - let path_hash_index_offset = index_offset + bytes_before_phi; + let path_hash_index_offset = index_offset + bytes_before_phi; - let mut phi_buf = vec![]; - let mut phi_writer = io::Cursor::new(&mut phi_buf); - generate_path_hash_index(&mut phi_writer, index.path_hash_seed, &index.entries)?; + let mut phi_buf = vec![]; + let mut phi_writer = io::Cursor::new(&mut phi_buf); + generate_path_hash_index(&mut phi_writer, path_hash_seed, &self.index.entries)?; - let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64; + let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64; - let mut fdi_buf = vec![]; - let mut fdi_writer = io::Cursor::new(&mut fdi_buf); - generate_full_directory_index(&mut fdi_writer, &index.entries)?; + let mut fdi_buf = vec![]; + let mut fdi_writer = io::Cursor::new(&mut fdi_buf); + generate_full_directory_index(&mut fdi_writer, &self.index.entries)?; - index_writer.write_u32::(1)?; // we have path hash index - index_writer.write_u64::(path_hash_index_offset)?; - index_writer.write_u64::(phi_buf.len() as u64)?; // path hash index size - index_writer.write_all(&hash(&phi_buf))?; + index_writer.write_u32::(1)?; // we have path hash index + index_writer.write_u64::(path_hash_index_offset)?; + index_writer.write_u64::(phi_buf.len() as u64)?; // path hash index size + index_writer.write_all(&hash(&phi_buf))?; - index_writer.write_u32::(1)?; // we have full directory index - index_writer.write_u64::(full_directory_index_offset)?; - index_writer.write_u64::(fdi_buf.len() as u64)?; // path hash index size - index_writer.write_all(&hash(&fdi_buf))?; + index_writer.write_u32::(1)?; // we have full directory index + index_writer.write_u64::(full_directory_index_offset)?; + index_writer.write_u64::(fdi_buf.len() as u64)?; // path hash index size + index_writer.write_all(&hash(&fdi_buf))?; - let encoded_entries_size = index.entries.len() as u32 * ENCODED_ENTRY_SIZE; - index_writer.write_u32::(encoded_entries_size)?; + let encoded_entries_size = self.index.entries.len() as u32 * ENCODED_ENTRY_SIZE; + index_writer.write_u32::(encoded_entries_size)?; - for entry in index.entries.values() { - entry.write( - &mut index_writer, - self.version, - super::entry::EntryLocation::Index, - )?; - } - - index_writer.write_u32::(0)?; - - Some((phi_buf, fdi_buf)) + for entry in self.index.entries.values() { + entry.write( + &mut index_writer, + self.version, + super::entry::EntryLocation::Index, + )?; } + + index_writer.write_u32::(0)?; + + Some((phi_buf, fdi_buf)) }; let index_hash = hash(&index_buf); @@ -580,8 +561,6 @@ fn encrypt(key: Aes256Enc, bytes: &mut [u8]) { #[cfg(test)] mod test { - use super::IndexV2; - #[test] fn test_rewrite_pak_v8b() { use std::io::Cursor; @@ -595,6 +574,7 @@ mod test { None, super::Version::V8B, pak_reader.mount_point().to_owned(), + None, ); for path in pak_reader.files() { @@ -621,6 +601,7 @@ mod test { None, super::Version::V11, pak_reader.mount_point().to_owned(), + Some(0x205C5A7D), ); for path in pak_reader.files() { @@ -630,19 +611,6 @@ mod test { .unwrap(); } - // There's a caveat: UnrealPak uses the absolute path (in UTF-16LE) of the output pak - // passed to strcrc32() as the PathHashSeed. We don't want to require the user to do this. - if let super::Index::V2(index) = pak_writer.pak.index { - pak_writer.pak.index = super::Index::V2(IndexV2 { - path_hash_seed: u64::from_le_bytes([ - 0x7D, 0x5A, 0x5C, 0x20, 0x00, 0x00, 0x00, 0x00, - ]), - ..index - }); - } else { - panic!() - }; - let out_bytes = pak_writer.write_index().unwrap().into_inner(); assert_eq!(&bytes[..], &out_bytes[..]); diff --git a/repak_cli/Cargo.toml b/repak_cli/Cargo.toml index 33df91a..dd35894 100644 --- a/repak_cli/Cargo.toml +++ b/repak_cli/Cargo.toml @@ -17,3 +17,4 @@ clap = { version = "4.1.4", features = ["derive"] } path-clean = "0.1.0" rayon = "1.6.1" repak = { version = "0.1.0", path = "../repak" } +strum = { workspace = true } diff --git a/repak_cli/src/main.rs b/repak_cli/src/main.rs index 2a1fdf0..2cffcbd 100644 --- a/repak_cli/src/main.rs +++ b/repak_cli/src/main.rs @@ -2,9 +2,11 @@ use std::fs::{self, File}; use std::io::{self, BufReader, BufWriter}; use std::path::{Path, PathBuf}; +use clap::builder::TypedValueParser; use clap::{Parser, Subcommand}; use path_clean::PathClean; use rayon::prelude::*; +use strum::VariantNames; #[derive(Parser, Debug)] struct ActionInfo { @@ -65,6 +67,18 @@ struct ActionPack { #[arg(short, long, default_value = "../../../")] mount_point: String, + /// Version + #[arg( + long, + default_value_t = repak::Version::V8B, + value_parser = clap::builder::PossibleValuesParser::new(repak::Version::VARIANTS).map(|s| s.parse::().unwrap()) + )] + version: repak::Version, + + /// Path hash seed for >= V10 + #[arg(short, long, default_value = "0")] + path_hash_seed: u64, + /// Verbose #[arg(short, long, default_value = "false")] verbose: bool, @@ -92,6 +106,9 @@ struct Args { fn main() -> Result<(), repak::Error> { let args = Args::parse(); + //let aasdf = repak::Version::iter().map(|v| format!("{v}")); + //clap::builder::PossibleValuesParser::new(aasdf.map(|a| a.as_str())); + match args.action { Action::Info(args) => info(args), Action::List(args) => list(args), @@ -208,8 +225,9 @@ fn pack(args: ActionPack) -> Result<(), repak::Error> { let mut pak = repak::PakWriter::new( BufWriter::new(File::create(output)?), None, - repak::Version::V8B, + args.version, args.mount_point, + Some(args.path_hash_seed), ); for p in paths {