From fefd02a3692c66538754d44e145c8a5ad8e09486 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?=
 <39484203+jieyouxu@users.noreply.github.com>
Date: Fri, 3 Feb 2023 01:58:27 +0800
Subject: [PATCH] Implement support for rewriting plain v11 paks (#2)

---
 repak/src/entry.rs | 119 ++++++++++++-----
 repak/src/error.rs |   2 +
 repak/src/pak.rs   | 313 +++++++++++++++++++++++++++++++++++++++------
 3 files changed, 364 insertions(+), 70 deletions(-)
diff --git a/repak/src/entry.rs b/repak/src/entry.rs
index 8ba1070..64ffd03 100644
--- a/repak/src/entry.rs
+++ b/repak/src/entry.rs
@@ -2,7 +2,7 @@ use super::{ext::ReadExt, ext::WriteExt, Compression, Version, VersionMajor};
 use byteorder::{ReadBytesExt, WriteBytesExt, LE};
 use std::io;
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Clone, Copy)]
 pub enum EntryLocation {
     Data,
     Index,
@@ -119,47 +119,102 @@ impl Entry {
             },
         })
     }
+
     pub fn write<W: io::Write>(
         &self,
         writer: &mut W,
         version: super::Version,
         location: EntryLocation,
     ) -> Result<(), super::Error> {
-        writer.write_u64::<LE>(match location {
-            EntryLocation::Data => 0,
-            EntryLocation::Index => self.offset,
-        })?;
-        writer.write_u64::<LE>(self.compressed)?;
-        writer.write_u64::<LE>(self.uncompressed)?;
-        let compression: u8 = match self.compression {
-            Compression::None => 0,
-            Compression::Zlib => 1,
-            Compression::Gzip => todo!(),
-            Compression::Oodle => todo!(),
-        };
-        match version {
-            Version::V8A => writer.write_u8(compression)?,
-            _ => writer.write_u32::<LE>(compression.into())?,
-        }
+        if version >= super::Version::V10 && location == EntryLocation::Index {
+            let compression_block_size = self.block_uncompressed.unwrap_or_default();
+            let compression_blocks_count = if self.compression != Compression::None {
+                self.blocks.as_ref().unwrap().len() as u32
+            } else {
+                0
+            };
+            let is_size_32_bit_safe = self.compressed <= u32::MAX as u64;
+            let is_uncompressed_size_32_bit_safe = self.uncompressed <= u32::MAX as u64;
+            let is_offset_32_bit_safe = self.offset <= u32::MAX as u64;
 
-        if version.version_major() == VersionMajor::Initial {
-            writer.write_u64::<LE>(self.timestamp.unwrap_or_default())?;
-        }
-        if let Some(hash) = self.hash {
-            writer.write_all(&hash)?;
-        } else {
-            panic!("hash missing");
-        }
-        if version.version_major() >= VersionMajor::CompressionEncryption {
-            if let Some(blocks) = &self.blocks {
-                for block in blocks {
-                    block.write(writer)?;
+            let flags = (compression_block_size)
+                | (compression_blocks_count << 6)
+                | ((self.encrypted as u32) << 22)
+                | ((self.compression as u32) << 23)
+                | ((is_size_32_bit_safe as u32) << 29)
+                | ((is_uncompressed_size_32_bit_safe as u32) << 30)
+                | ((is_offset_32_bit_safe as u32) << 31);
+
+            writer.write_u32::<LE>(flags)?;
+
+            if is_offset_32_bit_safe {
+                writer.write_u32::<LE>(self.offset as u32)?;
+            } else {
+                writer.write_u64::<LE>(self.offset)?;
+            }
+
+            if is_uncompressed_size_32_bit_safe {
+                writer.write_u32::<LE>(self.uncompressed as u32)?
+            } else {
+                writer.write_u64::<LE>(self.uncompressed)?
+            }
+
+            if self.compression != Compression::None {
+                if is_size_32_bit_safe {
+                    writer.write_u32::<LE>(self.compressed as u32)?;
+                } else {
+                    writer.write_u64::<LE>(self.compressed)?;
+                }
+
+                assert!(self.blocks.is_some());
+                let blocks = self.blocks.as_ref().unwrap();
+                if blocks.len() > 1 || (blocks.len() == 1 && self.encrypted) {
+                    for b in blocks {
+                        let block_size = b.end - b.start;
+                        writer.write_u64::<LE>(block_size)?
+                    }
                 }
             }
-            writer.write_bool(self.encrypted)?;
-            writer.write_u32::<LE>(self.block_uncompressed.unwrap_or_default())?;
+
+            Ok(())
+        } else {
+            writer.write_u64::<LE>(match location {
+                EntryLocation::Data => 0,
+                EntryLocation::Index => self.offset,
+            })?;
+            writer.write_u64::<LE>(self.compressed)?;
+            writer.write_u64::<LE>(self.uncompressed)?;
+            let compression: u8 = match self.compression {
+                Compression::None => 0,
+                Compression::Zlib => 1,
+                Compression::Gzip => todo!(),
+                Compression::Oodle => todo!(),
+            };
+            match version {
+                Version::V8A => writer.write_u8(compression)?,
+                _ => writer.write_u32::<LE>(compression.into())?,
+            }
+
+            if version.version_major() == VersionMajor::Initial {
+                writer.write_u64::<LE>(self.timestamp.unwrap_or_default())?;
+            }
+            if let Some(hash) = self.hash {
+                writer.write_all(&hash)?;
+            } else {
+                panic!("hash missing");
+            }
+            if version.version_major() >= VersionMajor::CompressionEncryption {
+                if let Some(blocks) = &self.blocks {
+                    for block in blocks {
+                        block.write(writer)?;
+                    }
+                }
+                writer.write_bool(self.encrypted)?;
+                writer.write_u32::<LE>(self.block_uncompressed.unwrap_or_default())?;
+            }
+
+            Ok(())
         }
-        Ok(())
     }
 
     pub fn read_encoded<R: io::Read>(
diff --git a/repak/src/error.rs b/repak/src/error.rs
index b09c80b..edbad69 100644
--- a/repak/src/error.rs
+++ b/repak/src/error.rs
@@ -28,6 +28,8 @@ pub enum Error {
     },
     #[error("pak is encrypted but no key was provided")]
     Encrypted,
+    #[error("error with OsString")]
+    OsString(std::ffi::OsString),
     #[error("{0}")]
     Other(&'static str),
 }
diff --git a/repak/src/pak.rs b/repak/src/pak.rs
index 0893ca9..d5fd249 100644
--- a/repak/src/pak.rs
+++ b/repak/src/pak.rs
@@ -1,5 +1,6 @@
 use super::ext::{ReadExt, WriteExt};
 use super::{Version, VersionMajor};
+use aes::Aes256Enc;
 use byteorder::{ReadBytesExt, WriteBytesExt, LE};
 use std::collections::BTreeMap;
 use std::io::{self, Read, Seek, Write};
@@ -10,12 +11,14 @@ pub struct PakReader<R: Read + Seek> {
     reader: R,
     key: Option<aes::Aes256Dec>,
 }
+
 #[derive(Debug)]
 pub struct PakWriter<W: Write + Seek> {
     pak: Pak,
     writer: W,
     key: Option<aes::Aes256Enc>,
 }
+
 #[derive(Debug)]
 pub struct Pak {
     version: Version,
@@ -51,14 +54,14 @@ impl Index {
     fn entries(&self) -> &BTreeMap<String, super::entry::Entry> {
         match self {
             Index::V1(index) => &index.entries,
-            Index::V2(index) => &index.entries_by_path,
+            Index::V2(index) => &index.entries,
         }
     }
 
     fn add_entry(&mut self, path: &str, entry: super::entry::Entry) {
         match self {
             Index::V1(index) => index.entries.insert(path.to_string(), entry),
-            Index::V2(_index) => todo!(),
+            Index::V2(index) => index.entries.insert(path.to_string(), entry),
         };
     }
 }
@@ -71,10 +74,7 @@ pub struct IndexV1 {
 #[derive(Debug, Default)]
 pub struct IndexV2 {
     path_hash_seed: u64,
-    path_hash_index: Option<Vec<u8>>,
-    full_directory_index: Option<BTreeMap<String, BTreeMap<String, u32>>>,
-    encoded_entries: Vec<u8>,
-    entries_by_path: BTreeMap<String, super::entry::Entry>,
+    entries: BTreeMap<String, super::entry::Entry>,
 }
 
 fn decrypt(key: &Option<aes::Aes256Dec>, bytes: &mut [u8]) -> Result<(), super::Error> {
@@ -231,23 +231,34 @@ impl Pak {
         let index = if version.version_major() >= VersionMajor::PathHashIndex {
             let path_hash_seed = index.read_u64::<LE>()?;
 
-            let path_hash_index = if index.read_u32::<LE>()? != 0 {
+            // Left in for potential desire to verify path index hashes.
+            let _path_hash_index = if index.read_u32::<LE>()? != 0 {
                 let path_hash_index_offset = index.read_u64::<LE>()?;
                 let path_hash_index_size = index.read_u64::<LE>()?;
                 let _path_hash_index_hash = index.read_len(20)?;
 
                 reader.seek(io::SeekFrom::Start(path_hash_index_offset))?;
-                let mut path_hash_index = reader.read_len(path_hash_index_size as usize)?;
+                let mut path_hash_index_buf = reader.read_len(path_hash_index_size as usize)?;
                 // TODO verify hash
 
                 if footer.encrypted {
-                    decrypt(&key, &mut path_hash_index)?;
+                    decrypt(&key, &mut path_hash_index_buf)?;
                 }
+
+                let mut path_hash_index = vec![];
+                let mut phi_reader = io::Cursor::new(&mut path_hash_index_buf);
+                for _ in 0..len {
+                    let hash = phi_reader.read_u64::<LE>()?;
+                    let encoded_entry_offset = phi_reader.read_u32::<LE>()?;
+                    path_hash_index.push((hash, encoded_entry_offset));
+                }
+
                 Some(path_hash_index)
             } else {
                 None
             };
 
+            // Left in for potential desire to verify full directory index hashes.
             let full_directory_index = if index.read_u32::<LE>()? != 0 {
                 let full_directory_index_offset = index.read_u64::<LE>()?;
                 let full_directory_index_size = index.read_u64::<LE>()?;
@@ -290,12 +301,6 @@ impl Pak {
                         encoded_entries.seek(io::SeekFrom::Start(*encoded_offset as u64))?;
                         let entry =
                             super::entry::Entry::read_encoded(&mut encoded_entries, version)?;
-
-                        // entry next to file contains full metadata
-                        //reader.seek(io::SeekFrom::Start(entry.offset))?;
-                        //let _ = super::entry::Entry::new(&mut reader, version)?;
-
-                        // concat directory with file name to match IndexV1 but should provide a more direct access method
                         let path = format!(
                             "{}{}",
                             dir_name.strip_prefix('/').unwrap_or(dir_name),
@@ -310,10 +315,7 @@ impl Pak {
 
             Index::V2(IndexV2 {
                 path_hash_seed,
-                path_hash_index,
-                full_directory_index,
-                encoded_entries,
-                entries_by_path,
+                entries: entries_by_path,
             })
         } else {
             let mut entries = BTreeMap::new();
@@ -332,6 +334,7 @@ impl Pak {
             index,
         })
     }
+
     fn write<W: Write + Seek>(
         &self,
         writer: &mut W,
@@ -339,30 +342,118 @@ impl Pak {
     ) -> Result<(), super::Error> {
         let index_offset = writer.stream_position()?;
 
-        let mut index_cur = std::io::Cursor::new(vec![]);
-        index_cur.write_string(&self.mount_point)?;
+        let mut index_buf = vec![];
+        let mut index_writer = io::Cursor::new(&mut index_buf);
+        index_writer.write_string(&self.mount_point)?;
 
-        match &self.index {
+        let secondary_index = match &self.index {
             Index::V1(index) => {
-                index_cur.write_u32::<LE>(index.entries.len() as u32)?;
+                let record_count = index.entries.len() as u32;
+                index_writer.write_u32::<LE>(record_count)?;
                 for (path, entry) in &index.entries {
-                    index_cur.write_string(path)?;
+                    index_writer.write_string(path)?;
                     entry.write(
-                        &mut index_cur,
+                        &mut index_writer,
                         self.version,
                         super::entry::EntryLocation::Index,
                     )?;
                 }
+                None
             }
-            Index::V2(_index) => todo!(),
+            Index::V2(index) => {
+                let record_count = index.entries.len() as u32;
+                index_writer.write_u32::<LE>(record_count)?;
+                index_writer.write_u64::<LE>(index.path_hash_seed)?;
+
+                // The index is organized sequentially as:
+                // - Index Header, which contains:
+                //     - Mount Point (u32 len + string w/ terminating byte)
+                //     - Entry Count (u32)
+                //     - Path Hash Seed (u64)
+                //     - Has Path Hash Index (u32); if true, then:
+                //         - Path Hash Index Offset (u64)
+                //         - Path Hash Index Size (u64)
+                //         - Path Hash Index Hash ([u8; 20])
+                //     - Has Full Directory Index (u32); if true, then:
+                //         - Full Directory Index Offset (u64)
+                //         - Full Directory Index Size (u64)
+                //         - Full Directory Index Hash ([u8; 20])
+                //     - Encoded Index Records Size
+                //     - (Unused) File Count
+                // - Path Hash Index
+                // - Full Directory Index
+                // - Encoded Index Records; each encoded index record is (0xC bytes) from:
+                //     - Flags (u32)
+                //     - Offset (u32)
+                //     - Size (u32)
+                let bytes_before_phi = {
+                    let mut size = 0;
+                    size += 4; // mount point len
+                    size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte
+                    size += 8; // path hash seed
+                    size += 4; // record count
+                    size += 4; // has path hash index (since we're generating, always true)
+                    size += 8 + 8 + 20; // path hash index offset, size and hash
+                    size += 4; // has full directory index (since we're generating, always true)
+                    size += 8 + 8 + 20; // full directory index offset, size and hash
+                    size += 4; // encoded entry size
+                    size += index.entries.len() as u64 * {
+                        4 // flags
+                        + 4 // offset
+                        + 4 // size
+                    };
+                    size += 4; // unused file count
+                    size
+                };
+
+                let path_hash_index_offset = index_offset + bytes_before_phi;
+
+                let mut phi_buf = vec![];
+                let mut phi_writer = io::Cursor::new(&mut phi_buf);
+                generate_path_hash_index(&mut phi_writer, index.path_hash_seed, &index.entries)?;
+
+                let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64;
+
+                let mut fdi_buf = vec![];
+                let mut fdi_writer = io::Cursor::new(&mut fdi_buf);
+                generate_full_directory_index(&mut fdi_writer, &index.entries)?;
+
+                index_writer.write_u32::<LE>(1)?; // we have path hash index
+                index_writer.write_u64::<LE>(path_hash_index_offset)?;
+                index_writer.write_u64::<LE>(phi_buf.len() as u64)?; // path hash index size
+                index_writer.write_all(&hash(&phi_buf))?;
+
+                index_writer.write_u32::<LE>(1)?; // we have full directory index
+                index_writer.write_u64::<LE>(full_directory_index_offset)?;
+                index_writer.write_u64::<LE>(fdi_buf.len() as u64)?; // path hash index size
+                index_writer.write_all(&hash(&fdi_buf))?;
+
+                let encoded_entries_size = index.entries.len() as u32 * ENCODED_ENTRY_SIZE;
+                index_writer.write_u32::<LE>(encoded_entries_size)?;
+
+                for entry in index.entries.values() {
+                    entry.write(
+                        &mut index_writer,
+                        self.version,
+                        super::entry::EntryLocation::Index,
+                    )?;
+                }
+
+                index_writer.write_u32::<LE>(0)?;
+
+                Some((phi_buf, fdi_buf))
+            }
+        };
+
+        let index_hash = hash(&index_buf);
+
+        writer.write_all(&index_buf)?;
+
+        if let Some((phi_buf, fdi_buf)) = secondary_index {
+            writer.write_all(&phi_buf[..])?;
+            writer.write_all(&fdi_buf[..])?;
         }
 
-        let index_data = index_cur.into_inner();
-
-        use sha1::{Digest, Sha1};
-        let mut hasher = Sha1::new();
-        hasher.update(&index_data);
-
         let footer = super::footer::Footer {
             encryption_uuid: None,
             encrypted: false,
@@ -370,23 +461,130 @@ impl Pak {
             version: self.version,
             version_major: self.version.version_major(),
             index_offset,
-            index_size: index_data.len() as u64,
-            hash: hasher.finalize().into(),
+            index_size: index_buf.len() as u64,
+            hash: index_hash,
             frozen: false,
             compression: vec![],
         };
 
-        writer.write_all(&index_data)?;
-
         footer.write(writer)?;
 
         Ok(())
     }
 }
 
+fn hash(data: &[u8]) -> [u8; 20] {
+    use sha1::{Digest, Sha1};
+    let mut hasher = Sha1::new();
+    hasher.update(data);
+    hasher.finalize().into()
+}
+
+const ENCODED_ENTRY_SIZE: u32 = {
+    4 // flags
+    + 4 // offset
+    + 4 // size
+};
+
+fn generate_path_hash_index<W: Write>(
+    writer: &mut W,
+    path_hash_seed: u64,
+    entries: &BTreeMap<String, super::entry::Entry>,
+) -> Result<(), super::Error> {
+    writer.write_u32::<LE>(entries.len() as u32)?;
+    let mut offset = 0u32;
+    for path in entries.keys() {
+        let utf16le_path = path
+            .encode_utf16()
+            .flat_map(|c| c.to_le_bytes())
+            .collect::<Vec<_>>();
+        let path_hash = fnv64(&utf16le_path, path_hash_seed);
+        writer.write_u64::<LE>(path_hash)?;
+        writer.write_u32::<LE>(offset)?;
+        offset += ENCODED_ENTRY_SIZE;
+    }
+
+    writer.write_u32::<LE>(0)?;
+
+    Ok(())
+}
+
+fn fnv64(data: &[u8], offset: u64) -> u64 {
+    const OFFSET: u64 = 0xcbf29ce484222325;
+    const PRIME: u64 = 0x00000100000001b3;
+    let mut hash = OFFSET.wrapping_add(offset);
+    for &b in data {
+        hash ^= b as u64;
+        hash = hash.wrapping_mul(PRIME);
+    }
+    hash
+}
+
+fn generate_full_directory_index<W: Write>(
+    writer: &mut W,
+    entries: &BTreeMap<String, super::entry::Entry>,
+) -> Result<(), super::Error> {
+    let mut offset = 0u32;
+    let mut fdi = BTreeMap::new();
+    for path in entries.keys() {
+        let (directory, filename) = {
+            let i = path.rfind('/').map(|i| i + 1); // we want to include the slash on the directory
+            match i {
+                Some(i) => {
+                    let (l, r) = path.split_at(i);
+                    (l.to_owned(), r.to_owned())
+                }
+                None => ("/".to_owned(), path.to_owned()),
+            }
+        };
+
+        fdi.entry(directory)
+            .and_modify(|d: &mut BTreeMap<String, u32>| {
+                d.insert(filename.clone(), offset);
+            })
+            .or_insert_with(|| {
+                let mut files_and_offsets = BTreeMap::new();
+                files_and_offsets.insert(filename.clone(), offset);
+                files_and_offsets
+            });
+
+        offset += ENCODED_ENTRY_SIZE;
+    }
+
+    writer.write_u32::<LE>(fdi.len() as u32)?;
+    for (directory, files) in &fdi {
+        writer.write_string(directory)?;
+        writer.write_u32::<LE>(files.len() as u32)?;
+        for (filename, offset) in files {
+            writer.write_string(filename)?;
+            writer.write_u32::<LE>(*offset)?;
+        }
+    }
+
+    Ok(())
+}
+
+fn pad_zeros_to_alignment(v: &mut Vec<u8>, alignment: usize) {
+    assert!(alignment >= 1);
+    if v.len() % alignment != 0 {
+        v.extend(std::iter::repeat(0).take(((v.len() + alignment - 1) / alignment) * alignment))
+    }
+    assert!(v.len() % alignment == 0);
+}
+
+fn encrypt(key: Aes256Enc, bytes: &mut [u8]) {
+    use aes::cipher::BlockEncrypt;
+    for chunk in bytes.chunks_mut(16) {
+        key.encrypt_block(aes::Block::from_mut_slice(chunk))
+    }
+}
+
+#[cfg(test)]
 mod test {
+    use super::IndexV2;
+
     #[test]
-    fn test_rewrite_pak() {
+    fn test_rewrite_pak_v8b() {
         use std::io::Cursor;
         let bytes = include_bytes!("../tests/packs/pack_v8b.pak");
 
@@ -407,6 +605,45 @@ mod test {
         }
 
         let out_bytes = pak_writer.write_index().unwrap().into_inner();
-        assert_eq!(bytes.to_vec(), out_bytes);
+        assert_eq!(&bytes[..], &out_bytes[..]);
+    }
+
+    #[test]
+    fn test_rewrite_pak_v11() {
+        use std::io::Cursor;
+        let bytes = include_bytes!("../tests/packs/pack_v11.pak");
+
+        let mut reader = super::PakReader::new_any(Cursor::new(bytes), None).unwrap();
+        let writer = Cursor::new(vec![]);
+        let mut pak_writer = super::PakWriter::new(
+            writer,
+            None,
+            super::Version::V11,
+            reader.mount_point().to_owned(),
+        );
+
+        for path in reader.files() {
+            let data = reader.get(&path).unwrap();
+            pak_writer
+                .write_file(&path, &mut std::io::Cursor::new(data))
+                .unwrap();
+        }
+
+        // There's a caveat: UnrealPak uses the absolute path (in UTF-16LE) of the output pak
+        // passed to strcrc32() as the PathHashSeed. We don't want to require the user to do this.
+        if let super::Index::V2(index) = pak_writer.pak.index {
+            pak_writer.pak.index = super::Index::V2(IndexV2 {
+                path_hash_seed: u64::from_le_bytes([
+                    0x7D, 0x5A, 0x5C, 0x20, 0x00, 0x00, 0x00, 0x00,
+                ]),
+                ..index
+            });
+        } else {
+            panic!()
+        };
+
+        let out_bytes = pak_writer.write_index().unwrap().into_inner();
+
+        assert_eq!(&bytes[..], &out_bytes[..]);
     }
 }