Unify index types and add version flag to pack command

2025-07-01 20:57:11 +00:00 · 2023-02-07 22:21:09 -06:00 · 2023-02-07 22:21:09 -06:00 · 16305f8c95
commit 16305f8c95
parent 399f2f0187
6 changed files with 140 additions and 141 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -9,8 +9,9 @@ version = "0.1.0"
 edition = "2021"
 [workspace.dependencies]
 base64 = "0.21.0"
 aes = "0.8.2"
 base64 = "0.21.0"
 strum = { version = "0.24", features = ["derive"] }
 # generated by 'cargo dist init'
 [profile.dist]
--- a/repak/Cargo.toml
+++ b/repak/Cargo.toml
@ -5,14 +5,15 @@ authors.workspace = true
 license.workspace = true
 version.workspace = true
 edition.workspace = true
 strum.workspace = true
 [dependencies]
 byteorder = "1.4"
 strum = { version = "0.24", features = ["derive"] }
 aes = "0.8"
 flate2 = "1.0"
 thiserror = "1.0"
 sha1 = "0.10.5"
 strum = { workspace = true }
 [dev-dependencies]
 base64 = { workspace = true }
--- a/repak/src/lib.rs
+++ b/repak/src/lib.rs
@ -10,7 +10,17 @@ pub use {error::*, pak::*};
 pub const MAGIC: u32 = 0x5A6F12E1;
 #[derive(
-    Clone, Copy, PartialEq, Eq, PartialOrd, Debug, strum::Display, strum::FromRepr, strum::EnumIter,
+    Clone,
    Copy,
    PartialEq,
    Eq,
    PartialOrd,
    Debug,
    strum::Display,
    strum::FromRepr,
    strum::EnumIter,
    strum::EnumString,
    strum::EnumVariantNames,
 )]
 pub enum Version {
    V0,
--- a/repak/src/pak.rs
+++ b/repak/src/pak.rs
@ -26,63 +26,42 @@ pub struct Pak {
 }
 impl Pak {
-    fn new(version: Version, mount_point: String) -> Self {
+    fn new(version: Version, mount_point: String, path_hash_seed: Option<u64>) -> Self {
        Pak {
            version,
            mount_point,
-            index: Index::new(version),
+            index: Index::new(path_hash_seed),
        }
    }
 }
-#[derive(Debug)]
+#[derive(Debug, Default)]
-pub enum Index {
+pub struct Index {
-    V1(IndexV1),
+    path_hash_seed: Option<u64>,
-    V2(IndexV2),
+    entries: BTreeMap<String, super::entry::Entry>,
 }
 impl Index {
-    fn new(version: Version) -> Self {
+    fn new(path_hash_seed: Option<u64>) -> Self {
-        if version < Version::V10 {
+        Index {
-            Self::V1(IndexV1::default())
+            path_hash_seed,
-        } else {
+            ..Index::default()
            Self::V2(IndexV2::default())
        }
    }
    fn entries(&self) -> &BTreeMap<String, super::entry::Entry> {
-        match self {
+        &self.entries
            Index::V1(index) => &index.entries,
            Index::V2(index) => &index.entries,
        }
    }
    fn into_entries(self) -> BTreeMap<String, super::entry::Entry> {
-        match self {
+        self.entries
            Index::V1(index) => index.entries,
            Index::V2(index) => index.entries,
        }
    }
    fn add_entry(&mut self, path: &str, entry: super::entry::Entry) {
-        match self {
+        self.entries.insert(path.to_string(), entry);
            Index::V1(index) => index.entries.insert(path.to_string(), entry),
            Index::V2(index) => index.entries.insert(path.to_string(), entry),
        };
    }
 }
 #[derive(Debug, Default)]
 pub struct IndexV1 {
    entries: BTreeMap<String, super::entry::Entry>,
 }
 #[derive(Debug, Default)]
 pub struct IndexV2 {
    path_hash_seed: u64,
    entries: BTreeMap<String, super::entry::Entry>,
 }
 fn decrypt(key: &Option<aes::Aes256Dec>, bytes: &mut [u8]) -> Result<(), super::Error> {
    if let Some(key) = &key {
        use aes::cipher::BlockDecrypt;
@ -152,9 +131,10 @@ impl<W: Write + io::Seek> PakWriter<W> {
        key: Option<aes::Aes256Enc>,
        version: Version,
        mount_point: String,
        path_hash_seed: Option<u64>,
    ) -> Self {
        PakWriter {
-            pak: Pak::new(version, mount_point),
+            pak: Pak::new(version, mount_point, path_hash_seed),
            writer,
            key,
        }
@ -312,10 +292,10 @@ impl Pak {
            assert_eq!(index.read_u32::<LE>()?, 0, "remaining index bytes are 0"); // TODO possibly remaining unencoded entries?
-            Index::V2(IndexV2 {
+            Index {
-                path_hash_seed,
+                path_hash_seed: Some(path_hash_seed),
                entries: entries_by_path,
-            })
+            }
        } else {
            let mut entries = BTreeMap::new();
            for _ in 0..len {
@ -324,7 +304,10 @@ impl Pak {
                    super::entry::Entry::read(&mut index, version)?,
                );
            }
-            Index::V1(IndexV1 { entries })
+            Index {
                path_hash_seed: None,
                entries,
            }
        };
        Ok(Pak {
@ -345,103 +328,101 @@ impl Pak {
        let mut index_writer = io::Cursor::new(&mut index_buf);
        index_writer.write_string(&self.mount_point)?;
-        let secondary_index = match &self.index {
+        let secondary_index = if self.version < super::Version::V10 {
-            Index::V1(index) => {
+            let record_count = self.index.entries.len() as u32;
-                let record_count = index.entries.len() as u32;
+            index_writer.write_u32::<LE>(record_count)?;
-                index_writer.write_u32::<LE>(record_count)?;
+            for (path, entry) in &self.index.entries {
-                for (path, entry) in &index.entries {
+                index_writer.write_string(path)?;
-                    index_writer.write_string(path)?;
+                entry.write(
-                    entry.write(
+                    &mut index_writer,
-                        &mut index_writer,
+                    self.version,
-                        self.version,
+                    super::entry::EntryLocation::Index,
-                        super::entry::EntryLocation::Index,
+                )?;
                    )?;
                }
                None
            }
-            Index::V2(index) => {
+            None
-                let record_count = index.entries.len() as u32;
+        } else {
-                index_writer.write_u32::<LE>(record_count)?;
+            let record_count = self.index.entries.len() as u32;
-                index_writer.write_u64::<LE>(index.path_hash_seed)?;
+            let path_hash_seed = self.index.path_hash_seed.unwrap_or_default();
            index_writer.write_u32::<LE>(record_count)?;
            index_writer.write_u64::<LE>(path_hash_seed)?;
-                // The index is organized sequentially as:
+            // The index is organized sequentially as:
-                // - Index Header, which contains:
+            // - Index Header, which contains:
-                //     - Mount Point (u32 len + string w/ terminating byte)
+            //     - Mount Point (u32 len + string w/ terminating byte)
-                //     - Entry Count (u32)
+            //     - Entry Count (u32)
-                //     - Path Hash Seed (u64)
+            //     - Path Hash Seed (u64)
-                //     - Has Path Hash Index (u32); if true, then:
+            //     - Has Path Hash Index (u32); if true, then:
-                //         - Path Hash Index Offset (u64)
+            //         - Path Hash Index Offset (u64)
-                //         - Path Hash Index Size (u64)
+            //         - Path Hash Index Size (u64)
-                //         - Path Hash Index Hash ([u8; 20])
+            //         - Path Hash Index Hash ([u8; 20])
-                //     - Has Full Directory Index (u32); if true, then:
+            //     - Has Full Directory Index (u32); if true, then:
-                //         - Full Directory Index Offset (u64)
+            //         - Full Directory Index Offset (u64)
-                //         - Full Directory Index Size (u64)
+            //         - Full Directory Index Size (u64)
-                //         - Full Directory Index Hash ([u8; 20])
+            //         - Full Directory Index Hash ([u8; 20])
-                //     - Encoded Index Records Size
+            //     - Encoded Index Records Size
-                //     - (Unused) File Count
+            //     - (Unused) File Count
-                // - Path Hash Index
+            // - Path Hash Index
-                // - Full Directory Index
+            // - Full Directory Index
-                // - Encoded Index Records; each encoded index record is (0xC bytes) from:
+            // - Encoded Index Records; each encoded index record is (0xC bytes) from:
-                //     - Flags (u32)
+            //     - Flags (u32)
-                //     - Offset (u32)
+            //     - Offset (u32)
-                //     - Size (u32)
+            //     - Size (u32)
-                let bytes_before_phi = {
+            let bytes_before_phi = {
-                    let mut size = 0;
+                let mut size = 0;
-                    size += 4; // mount point len
+                size += 4; // mount point len
-                    size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte
+                size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte
-                    size += 8; // path hash seed
+                size += 8; // path hash seed
-                    size += 4; // record count
+                size += 4; // record count
-                    size += 4; // has path hash index (since we're generating, always true)
+                size += 4; // has path hash index (since we're generating, always true)
-                    size += 8 + 8 + 20; // path hash index offset, size and hash
+                size += 8 + 8 + 20; // path hash index offset, size and hash
-                    size += 4; // has full directory index (since we're generating, always true)
+                size += 4; // has full directory index (since we're generating, always true)
-                    size += 8 + 8 + 20; // full directory index offset, size and hash
+                size += 8 + 8 + 20; // full directory index offset, size and hash
-                    size += 4; // encoded entry size
+                size += 4; // encoded entry size
-                    size += index.entries.len() as u64 * {
+                size += self.index.entries.len() as u64 * {
-                        4 // flags
+                    4 // flags
                        + 4 // offset
                        + 4 // size
                    };
                    size += 4; // unused file count
                    size
                };
                size += 4; // unused file count
                size
            };
-                let path_hash_index_offset = index_offset + bytes_before_phi;
+            let path_hash_index_offset = index_offset + bytes_before_phi;
-                let mut phi_buf = vec![];
+            let mut phi_buf = vec![];
-                let mut phi_writer = io::Cursor::new(&mut phi_buf);
+            let mut phi_writer = io::Cursor::new(&mut phi_buf);
-                generate_path_hash_index(&mut phi_writer, index.path_hash_seed, &index.entries)?;
+            generate_path_hash_index(&mut phi_writer, path_hash_seed, &self.index.entries)?;
-                let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64;
+            let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64;
-                let mut fdi_buf = vec![];
+            let mut fdi_buf = vec![];
-                let mut fdi_writer = io::Cursor::new(&mut fdi_buf);
+            let mut fdi_writer = io::Cursor::new(&mut fdi_buf);
-                generate_full_directory_index(&mut fdi_writer, &index.entries)?;
+            generate_full_directory_index(&mut fdi_writer, &self.index.entries)?;
-                index_writer.write_u32::<LE>(1)?; // we have path hash index
+            index_writer.write_u32::<LE>(1)?; // we have path hash index
-                index_writer.write_u64::<LE>(path_hash_index_offset)?;
+            index_writer.write_u64::<LE>(path_hash_index_offset)?;
-                index_writer.write_u64::<LE>(phi_buf.len() as u64)?; // path hash index size
+            index_writer.write_u64::<LE>(phi_buf.len() as u64)?; // path hash index size
-                index_writer.write_all(&hash(&phi_buf))?;
+            index_writer.write_all(&hash(&phi_buf))?;
-                index_writer.write_u32::<LE>(1)?; // we have full directory index
+            index_writer.write_u32::<LE>(1)?; // we have full directory index
-                index_writer.write_u64::<LE>(full_directory_index_offset)?;
+            index_writer.write_u64::<LE>(full_directory_index_offset)?;
-                index_writer.write_u64::<LE>(fdi_buf.len() as u64)?; // path hash index size
+            index_writer.write_u64::<LE>(fdi_buf.len() as u64)?; // path hash index size
-                index_writer.write_all(&hash(&fdi_buf))?;
+            index_writer.write_all(&hash(&fdi_buf))?;
-                let encoded_entries_size = index.entries.len() as u32 * ENCODED_ENTRY_SIZE;
+            let encoded_entries_size = self.index.entries.len() as u32 * ENCODED_ENTRY_SIZE;
-                index_writer.write_u32::<LE>(encoded_entries_size)?;
+            index_writer.write_u32::<LE>(encoded_entries_size)?;
-                for entry in index.entries.values() {
+            for entry in self.index.entries.values() {
-                    entry.write(
+                entry.write(
-                        &mut index_writer,
+                    &mut index_writer,
-                        self.version,
+                    self.version,
-                        super::entry::EntryLocation::Index,
+                    super::entry::EntryLocation::Index,
-                    )?;
+                )?;
                }
                index_writer.write_u32::<LE>(0)?;
                Some((phi_buf, fdi_buf))
            }
            index_writer.write_u32::<LE>(0)?;
            Some((phi_buf, fdi_buf))
        };
        let index_hash = hash(&index_buf);
@ -580,8 +561,6 @@ fn encrypt(key: Aes256Enc, bytes: &mut [u8]) {
 #[cfg(test)]
 mod test {
    use super::IndexV2;
    #[test]
    fn test_rewrite_pak_v8b() {
        use std::io::Cursor;
@ -595,6 +574,7 @@ mod test {
            None,
            super::Version::V8B,
            pak_reader.mount_point().to_owned(),
            None,
        );
        for path in pak_reader.files() {
@ -621,6 +601,7 @@ mod test {
            None,
            super::Version::V11,
            pak_reader.mount_point().to_owned(),
            Some(0x205C5A7D),
        );
        for path in pak_reader.files() {
@ -630,19 +611,6 @@ mod test {
                .unwrap();
        }
        // There's a caveat: UnrealPak uses the absolute path (in UTF-16LE) of the output pak
        // passed to strcrc32() as the PathHashSeed. We don't want to require the user to do this.
        if let super::Index::V2(index) = pak_writer.pak.index {
            pak_writer.pak.index = super::Index::V2(IndexV2 {
                path_hash_seed: u64::from_le_bytes([
                    0x7D, 0x5A, 0x5C, 0x20, 0x00, 0x00, 0x00, 0x00,
                ]),
                ..index
            });
        } else {
            panic!()
        };
        let out_bytes = pak_writer.write_index().unwrap().into_inner();
        assert_eq!(&bytes[..], &out_bytes[..]);
--- a/repak_cli/Cargo.toml
+++ b/repak_cli/Cargo.toml
@ -17,3 +17,4 @@ clap = { version = "4.1.4", features = ["derive"] }
 path-clean = "0.1.0"
 rayon = "1.6.1"
 repak = { version = "0.1.0", path = "../repak" }
 strum = { workspace = true }
--- a/repak_cli/src/main.rs
+++ b/repak_cli/src/main.rs
@ -2,9 +2,11 @@ use std::fs::{self, File};
 use std::io::{self, BufReader, BufWriter};
 use std::path::{Path, PathBuf};
 use clap::builder::TypedValueParser;
 use clap::{Parser, Subcommand};
 use path_clean::PathClean;
 use rayon::prelude::*;
 use strum::VariantNames;
 #[derive(Parser, Debug)]
 struct ActionInfo {
@ -65,6 +67,18 @@ struct ActionPack {
    #[arg(short, long, default_value = "../../../")]
    mount_point: String,
    /// Version
    #[arg(
        long,
        default_value_t = repak::Version::V8B,
        value_parser = clap::builder::PossibleValuesParser::new(repak::Version::VARIANTS).map(|s| s.parse::<repak::Version>().unwrap())
    )]
    version: repak::Version,
    /// Path hash seed for >= V10
    #[arg(short, long, default_value = "0")]
    path_hash_seed: u64,
    /// Verbose
    #[arg(short, long, default_value = "false")]
    verbose: bool,
@ -92,6 +106,9 @@ struct Args {
 fn main() -> Result<(), repak::Error> {
    let args = Args::parse();
    //let aasdf = repak::Version::iter().map(|v| format!("{v}"));
    //clap::builder::PossibleValuesParser::new(aasdf.map(|a| a.as_str()));
    match args.action {
        Action::Info(args) => info(args),
        Action::List(args) => list(args),
@ -208,8 +225,9 @@ fn pack(args: ActionPack) -> Result<(), repak::Error> {
    let mut pak = repak::PakWriter::new(
        BufWriter::new(File::create(output)?),
        None,
-        repak::Version::V8B,
+        args.version,
        args.mount_point,
        Some(args.path_hash_seed),
    );
    for p in paths {