From fefd02a3692c66538754d44e145c8a5ad8e09486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= <39484203+jieyouxu@users.noreply.github.com> Date: Fri, 3 Feb 2023 01:58:27 +0800 Subject: [PATCH] Implement support for rewriting plain v11 paks (#2) --- repak/src/entry.rs | 119 ++++++++++++----- repak/src/error.rs | 2 + repak/src/pak.rs | 313 +++++++++++++++++++++++++++++++++++++++------ 3 files changed, 364 insertions(+), 70 deletions(-) diff --git a/repak/src/entry.rs b/repak/src/entry.rs index 8ba1070..64ffd03 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -2,7 +2,7 @@ use super::{ext::ReadExt, ext::WriteExt, Compression, Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use std::io; -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum EntryLocation { Data, Index, @@ -119,47 +119,102 @@ impl Entry { }, }) } + pub fn write( &self, writer: &mut W, version: super::Version, location: EntryLocation, ) -> Result<(), super::Error> { - writer.write_u64::(match location { - EntryLocation::Data => 0, - EntryLocation::Index => self.offset, - })?; - writer.write_u64::(self.compressed)?; - writer.write_u64::(self.uncompressed)?; - let compression: u8 = match self.compression { - Compression::None => 0, - Compression::Zlib => 1, - Compression::Gzip => todo!(), - Compression::Oodle => todo!(), - }; - match version { - Version::V8A => writer.write_u8(compression)?, - _ => writer.write_u32::(compression.into())?, - } + if version >= super::Version::V10 && location == EntryLocation::Index { + let compression_block_size = self.block_uncompressed.unwrap_or_default(); + let compression_blocks_count = if self.compression != Compression::None { + self.blocks.as_ref().unwrap().len() as u32 + } else { + 0 + }; + let is_size_32_bit_safe = self.compressed <= u32::MAX as u64; + let is_uncompressed_size_32_bit_safe = self.uncompressed <= u32::MAX as u64; + let is_offset_32_bit_safe = self.offset <= u32::MAX as u64; - if version.version_major() == VersionMajor::Initial { - writer.write_u64::(self.timestamp.unwrap_or_default())?; - } - if let Some(hash) = self.hash { - writer.write_all(&hash)?; - } else { - panic!("hash missing"); - } - if version.version_major() >= VersionMajor::CompressionEncryption { - if let Some(blocks) = &self.blocks { - for block in blocks { - block.write(writer)?; + let flags = (compression_block_size) + | (compression_blocks_count << 6) + | ((self.encrypted as u32) << 22) + | ((self.compression as u32) << 23) + | ((is_size_32_bit_safe as u32) << 29) + | ((is_uncompressed_size_32_bit_safe as u32) << 30) + | ((is_offset_32_bit_safe as u32) << 31); + + writer.write_u32::(flags)?; + + if is_offset_32_bit_safe { + writer.write_u32::(self.offset as u32)?; + } else { + writer.write_u64::(self.offset)?; + } + + if is_uncompressed_size_32_bit_safe { + writer.write_u32::(self.uncompressed as u32)? + } else { + writer.write_u64::(self.uncompressed)? + } + + if self.compression != Compression::None { + if is_size_32_bit_safe { + writer.write_u32::(self.compressed as u32)?; + } else { + writer.write_u64::(self.compressed)?; + } + + assert!(self.blocks.is_some()); + let blocks = self.blocks.as_ref().unwrap(); + if blocks.len() > 1 || (blocks.len() == 1 && self.encrypted) { + for b in blocks { + let block_size = b.end - b.start; + writer.write_u64::(block_size)? + } } } - writer.write_bool(self.encrypted)?; - writer.write_u32::(self.block_uncompressed.unwrap_or_default())?; + + Ok(()) + } else { + writer.write_u64::(match location { + EntryLocation::Data => 0, + EntryLocation::Index => self.offset, + })?; + writer.write_u64::(self.compressed)?; + writer.write_u64::(self.uncompressed)?; + let compression: u8 = match self.compression { + Compression::None => 0, + Compression::Zlib => 1, + Compression::Gzip => todo!(), + Compression::Oodle => todo!(), + }; + match version { + Version::V8A => writer.write_u8(compression)?, + _ => writer.write_u32::(compression.into())?, + } + + if version.version_major() == VersionMajor::Initial { + writer.write_u64::(self.timestamp.unwrap_or_default())?; + } + if let Some(hash) = self.hash { + writer.write_all(&hash)?; + } else { + panic!("hash missing"); + } + if version.version_major() >= VersionMajor::CompressionEncryption { + if let Some(blocks) = &self.blocks { + for block in blocks { + block.write(writer)?; + } + } + writer.write_bool(self.encrypted)?; + writer.write_u32::(self.block_uncompressed.unwrap_or_default())?; + } + + Ok(()) } - Ok(()) } pub fn read_encoded( diff --git a/repak/src/error.rs b/repak/src/error.rs index b09c80b..edbad69 100644 --- a/repak/src/error.rs +++ b/repak/src/error.rs @@ -28,6 +28,8 @@ pub enum Error { }, #[error("pak is encrypted but no key was provided")] Encrypted, + #[error("error with OsString")] + OsString(std::ffi::OsString), #[error("{0}")] Other(&'static str), } diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 0893ca9..d5fd249 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -1,5 +1,6 @@ use super::ext::{ReadExt, WriteExt}; use super::{Version, VersionMajor}; +use aes::Aes256Enc; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use std::collections::BTreeMap; use std::io::{self, Read, Seek, Write}; @@ -10,12 +11,14 @@ pub struct PakReader { reader: R, key: Option, } + #[derive(Debug)] pub struct PakWriter { pak: Pak, writer: W, key: Option, } + #[derive(Debug)] pub struct Pak { version: Version, @@ -51,14 +54,14 @@ impl Index { fn entries(&self) -> &BTreeMap { match self { Index::V1(index) => &index.entries, - Index::V2(index) => &index.entries_by_path, + Index::V2(index) => &index.entries, } } fn add_entry(&mut self, path: &str, entry: super::entry::Entry) { match self { Index::V1(index) => index.entries.insert(path.to_string(), entry), - Index::V2(_index) => todo!(), + Index::V2(index) => index.entries.insert(path.to_string(), entry), }; } } @@ -71,10 +74,7 @@ pub struct IndexV1 { #[derive(Debug, Default)] pub struct IndexV2 { path_hash_seed: u64, - path_hash_index: Option>, - full_directory_index: Option>>, - encoded_entries: Vec, - entries_by_path: BTreeMap, + entries: BTreeMap, } fn decrypt(key: &Option, bytes: &mut [u8]) -> Result<(), super::Error> { @@ -231,23 +231,34 @@ impl Pak { let index = if version.version_major() >= VersionMajor::PathHashIndex { let path_hash_seed = index.read_u64::()?; - let path_hash_index = if index.read_u32::()? != 0 { + // Left in for potential desire to verify path index hashes. + let _path_hash_index = if index.read_u32::()? != 0 { let path_hash_index_offset = index.read_u64::()?; let path_hash_index_size = index.read_u64::()?; let _path_hash_index_hash = index.read_len(20)?; reader.seek(io::SeekFrom::Start(path_hash_index_offset))?; - let mut path_hash_index = reader.read_len(path_hash_index_size as usize)?; + let mut path_hash_index_buf = reader.read_len(path_hash_index_size as usize)?; // TODO verify hash if footer.encrypted { - decrypt(&key, &mut path_hash_index)?; + decrypt(&key, &mut path_hash_index_buf)?; } + + let mut path_hash_index = vec![]; + let mut phi_reader = io::Cursor::new(&mut path_hash_index_buf); + for _ in 0..len { + let hash = phi_reader.read_u64::()?; + let encoded_entry_offset = phi_reader.read_u32::()?; + path_hash_index.push((hash, encoded_entry_offset)); + } + Some(path_hash_index) } else { None }; + // Left in for potential desire to verify full directory index hashes. let full_directory_index = if index.read_u32::()? != 0 { let full_directory_index_offset = index.read_u64::()?; let full_directory_index_size = index.read_u64::()?; @@ -290,12 +301,6 @@ impl Pak { encoded_entries.seek(io::SeekFrom::Start(*encoded_offset as u64))?; let entry = super::entry::Entry::read_encoded(&mut encoded_entries, version)?; - - // entry next to file contains full metadata - //reader.seek(io::SeekFrom::Start(entry.offset))?; - //let _ = super::entry::Entry::new(&mut reader, version)?; - - // concat directory with file name to match IndexV1 but should provide a more direct access method let path = format!( "{}{}", dir_name.strip_prefix('/').unwrap_or(dir_name), @@ -310,10 +315,7 @@ impl Pak { Index::V2(IndexV2 { path_hash_seed, - path_hash_index, - full_directory_index, - encoded_entries, - entries_by_path, + entries: entries_by_path, }) } else { let mut entries = BTreeMap::new(); @@ -332,6 +334,7 @@ impl Pak { index, }) } + fn write( &self, writer: &mut W, @@ -339,30 +342,118 @@ impl Pak { ) -> Result<(), super::Error> { let index_offset = writer.stream_position()?; - let mut index_cur = std::io::Cursor::new(vec![]); - index_cur.write_string(&self.mount_point)?; + let mut index_buf = vec![]; + let mut index_writer = io::Cursor::new(&mut index_buf); + index_writer.write_string(&self.mount_point)?; - match &self.index { + let secondary_index = match &self.index { Index::V1(index) => { - index_cur.write_u32::(index.entries.len() as u32)?; + let record_count = index.entries.len() as u32; + index_writer.write_u32::(record_count)?; for (path, entry) in &index.entries { - index_cur.write_string(path)?; + index_writer.write_string(path)?; entry.write( - &mut index_cur, + &mut index_writer, self.version, super::entry::EntryLocation::Index, )?; } + None } - Index::V2(_index) => todo!(), + Index::V2(index) => { + let record_count = index.entries.len() as u32; + index_writer.write_u32::(record_count)?; + index_writer.write_u64::(index.path_hash_seed)?; + + // The index is organized sequentially as: + // - Index Header, which contains: + // - Mount Point (u32 len + string w/ terminating byte) + // - Entry Count (u32) + // - Path Hash Seed (u64) + // - Has Path Hash Index (u32); if true, then: + // - Path Hash Index Offset (u64) + // - Path Hash Index Size (u64) + // - Path Hash Index Hash ([u8; 20]) + // - Has Full Directory Index (u32); if true, then: + // - Full Directory Index Offset (u64) + // - Full Directory Index Size (u64) + // - Full Directory Index Hash ([u8; 20]) + // - Encoded Index Records Size + // - (Unused) File Count + // - Path Hash Index + // - Full Directory Index + // - Encoded Index Records; each encoded index record is (0xC bytes) from: + // - Flags (u32) + // - Offset (u32) + // - Size (u32) + let bytes_before_phi = { + let mut size = 0; + size += 4; // mount point len + size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte + size += 8; // path hash seed + size += 4; // record count + size += 4; // has path hash index (since we're generating, always true) + size += 8 + 8 + 20; // path hash index offset, size and hash + size += 4; // has full directory index (since we're generating, always true) + size += 8 + 8 + 20; // full directory index offset, size and hash + size += 4; // encoded entry size + size += index.entries.len() as u64 * { + 4 // flags + + 4 // offset + + 4 // size + }; + size += 4; // unused file count + size + }; + + let path_hash_index_offset = index_offset + bytes_before_phi; + + let mut phi_buf = vec![]; + let mut phi_writer = io::Cursor::new(&mut phi_buf); + generate_path_hash_index(&mut phi_writer, index.path_hash_seed, &index.entries)?; + + let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64; + + let mut fdi_buf = vec![]; + let mut fdi_writer = io::Cursor::new(&mut fdi_buf); + generate_full_directory_index(&mut fdi_writer, &index.entries)?; + + index_writer.write_u32::(1)?; // we have path hash index + index_writer.write_u64::(path_hash_index_offset)?; + index_writer.write_u64::(phi_buf.len() as u64)?; // path hash index size + index_writer.write_all(&hash(&phi_buf))?; + + index_writer.write_u32::(1)?; // we have full directory index + index_writer.write_u64::(full_directory_index_offset)?; + index_writer.write_u64::(fdi_buf.len() as u64)?; // path hash index size + index_writer.write_all(&hash(&fdi_buf))?; + + let encoded_entries_size = index.entries.len() as u32 * ENCODED_ENTRY_SIZE; + index_writer.write_u32::(encoded_entries_size)?; + + for entry in index.entries.values() { + entry.write( + &mut index_writer, + self.version, + super::entry::EntryLocation::Index, + )?; + } + + index_writer.write_u32::(0)?; + + Some((phi_buf, fdi_buf)) + } + }; + + let index_hash = hash(&index_buf); + + writer.write_all(&index_buf)?; + + if let Some((phi_buf, fdi_buf)) = secondary_index { + writer.write_all(&phi_buf[..])?; + writer.write_all(&fdi_buf[..])?; } - let index_data = index_cur.into_inner(); - - use sha1::{Digest, Sha1}; - let mut hasher = Sha1::new(); - hasher.update(&index_data); - let footer = super::footer::Footer { encryption_uuid: None, encrypted: false, @@ -370,23 +461,130 @@ impl Pak { version: self.version, version_major: self.version.version_major(), index_offset, - index_size: index_data.len() as u64, - hash: hasher.finalize().into(), + index_size: index_buf.len() as u64, + hash: index_hash, frozen: false, compression: vec![], }; - writer.write_all(&index_data)?; - footer.write(writer)?; Ok(()) } } +fn hash(data: &[u8]) -> [u8; 20] { + use sha1::{Digest, Sha1}; + let mut hasher = Sha1::new(); + hasher.update(data); + hasher.finalize().into() +} + +const ENCODED_ENTRY_SIZE: u32 = { + 4 // flags + + 4 // offset + + 4 // size +}; + +fn generate_path_hash_index( + writer: &mut W, + path_hash_seed: u64, + entries: &BTreeMap, +) -> Result<(), super::Error> { + writer.write_u32::(entries.len() as u32)?; + let mut offset = 0u32; + for path in entries.keys() { + let utf16le_path = path + .encode_utf16() + .flat_map(|c| c.to_le_bytes()) + .collect::>(); + let path_hash = fnv64(&utf16le_path, path_hash_seed); + writer.write_u64::(path_hash)?; + writer.write_u32::(offset)?; + offset += ENCODED_ENTRY_SIZE; + } + + writer.write_u32::(0)?; + + Ok(()) +} + +fn fnv64(data: &[u8], offset: u64) -> u64 { + const OFFSET: u64 = 0xcbf29ce484222325; + const PRIME: u64 = 0x00000100000001b3; + let mut hash = OFFSET.wrapping_add(offset); + for &b in data { + hash ^= b as u64; + hash = hash.wrapping_mul(PRIME); + } + hash +} + +fn generate_full_directory_index( + writer: &mut W, + entries: &BTreeMap, +) -> Result<(), super::Error> { + let mut offset = 0u32; + let mut fdi = BTreeMap::new(); + for path in entries.keys() { + let (directory, filename) = { + let i = path.rfind('/').map(|i| i + 1); // we want to include the slash on the directory + match i { + Some(i) => { + let (l, r) = path.split_at(i); + (l.to_owned(), r.to_owned()) + } + None => ("/".to_owned(), path.to_owned()), + } + }; + + fdi.entry(directory) + .and_modify(|d: &mut BTreeMap| { + d.insert(filename.clone(), offset); + }) + .or_insert_with(|| { + let mut files_and_offsets = BTreeMap::new(); + files_and_offsets.insert(filename.clone(), offset); + files_and_offsets + }); + + offset += ENCODED_ENTRY_SIZE; + } + + writer.write_u32::(fdi.len() as u32)?; + for (directory, files) in &fdi { + writer.write_string(directory)?; + writer.write_u32::(files.len() as u32)?; + for (filename, offset) in files { + writer.write_string(filename)?; + writer.write_u32::(*offset)?; + } + } + + Ok(()) +} + +fn pad_zeros_to_alignment(v: &mut Vec, alignment: usize) { + assert!(alignment >= 1); + if v.len() % alignment != 0 { + v.extend(std::iter::repeat(0).take(((v.len() + alignment - 1) / alignment) * alignment)) + } + assert!(v.len() % alignment == 0); +} + +fn encrypt(key: Aes256Enc, bytes: &mut [u8]) { + use aes::cipher::BlockEncrypt; + for chunk in bytes.chunks_mut(16) { + key.encrypt_block(aes::Block::from_mut_slice(chunk)) + } +} + +#[cfg(test)] mod test { + use super::IndexV2; + #[test] - fn test_rewrite_pak() { + fn test_rewrite_pak_v8b() { use std::io::Cursor; let bytes = include_bytes!("../tests/packs/pack_v8b.pak"); @@ -407,6 +605,45 @@ mod test { } let out_bytes = pak_writer.write_index().unwrap().into_inner(); - assert_eq!(bytes.to_vec(), out_bytes); + assert_eq!(&bytes[..], &out_bytes[..]); + } + + #[test] + fn test_rewrite_pak_v11() { + use std::io::Cursor; + let bytes = include_bytes!("../tests/packs/pack_v11.pak"); + + let mut reader = super::PakReader::new_any(Cursor::new(bytes), None).unwrap(); + let writer = Cursor::new(vec![]); + let mut pak_writer = super::PakWriter::new( + writer, + None, + super::Version::V11, + reader.mount_point().to_owned(), + ); + + for path in reader.files() { + let data = reader.get(&path).unwrap(); + pak_writer + .write_file(&path, &mut std::io::Cursor::new(data)) + .unwrap(); + } + + // There's a caveat: UnrealPak uses the absolute path (in UTF-16LE) of the output pak + // passed to strcrc32() as the PathHashSeed. We don't want to require the user to do this. + if let super::Index::V2(index) = pak_writer.pak.index { + pak_writer.pak.index = super::Index::V2(IndexV2 { + path_hash_seed: u64::from_le_bytes([ + 0x7D, 0x5A, 0x5C, 0x20, 0x00, 0x00, 0x00, 0x00, + ]), + ..index + }); + } else { + panic!() + }; + + let out_bytes = pak_writer.write_index().unwrap().into_inner(); + + assert_eq!(&bytes[..], &out_bytes[..]); } }