Implement support for rewriting plain v11 paks (#2)

This commit is contained in:
许杰友 Jieyou Xu (Joe) 2023-02-03 01:58:27 +08:00 committed by GitHub
parent d2bd98bb53
commit fefd02a369
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 364 additions and 70 deletions

View file

@ -2,7 +2,7 @@ use super::{ext::ReadExt, ext::WriteExt, Compression, Version, VersionMajor};
use byteorder::{ReadBytesExt, WriteBytesExt, LE};
use std::io;
#[derive(Debug)]
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum EntryLocation {
Data,
Index,
@ -119,12 +119,65 @@ impl Entry {
},
})
}
pub fn write<W: io::Write>(
&self,
writer: &mut W,
version: super::Version,
location: EntryLocation,
) -> Result<(), super::Error> {
if version >= super::Version::V10 && location == EntryLocation::Index {
let compression_block_size = self.block_uncompressed.unwrap_or_default();
let compression_blocks_count = if self.compression != Compression::None {
self.blocks.as_ref().unwrap().len() as u32
} else {
0
};
let is_size_32_bit_safe = self.compressed <= u32::MAX as u64;
let is_uncompressed_size_32_bit_safe = self.uncompressed <= u32::MAX as u64;
let is_offset_32_bit_safe = self.offset <= u32::MAX as u64;
let flags = (compression_block_size)
| (compression_blocks_count << 6)
| ((self.encrypted as u32) << 22)
| ((self.compression as u32) << 23)
| ((is_size_32_bit_safe as u32) << 29)
| ((is_uncompressed_size_32_bit_safe as u32) << 30)
| ((is_offset_32_bit_safe as u32) << 31);
writer.write_u32::<LE>(flags)?;
if is_offset_32_bit_safe {
writer.write_u32::<LE>(self.offset as u32)?;
} else {
writer.write_u64::<LE>(self.offset)?;
}
if is_uncompressed_size_32_bit_safe {
writer.write_u32::<LE>(self.uncompressed as u32)?
} else {
writer.write_u64::<LE>(self.uncompressed)?
}
if self.compression != Compression::None {
if is_size_32_bit_safe {
writer.write_u32::<LE>(self.compressed as u32)?;
} else {
writer.write_u64::<LE>(self.compressed)?;
}
assert!(self.blocks.is_some());
let blocks = self.blocks.as_ref().unwrap();
if blocks.len() > 1 || (blocks.len() == 1 && self.encrypted) {
for b in blocks {
let block_size = b.end - b.start;
writer.write_u64::<LE>(block_size)?
}
}
}
Ok(())
} else {
writer.write_u64::<LE>(match location {
EntryLocation::Data => 0,
EntryLocation::Index => self.offset,
@ -159,8 +212,10 @@ impl Entry {
writer.write_bool(self.encrypted)?;
writer.write_u32::<LE>(self.block_uncompressed.unwrap_or_default())?;
}
Ok(())
}
}
pub fn read_encoded<R: io::Read>(
reader: &mut R,

View file

@ -28,6 +28,8 @@ pub enum Error {
},
#[error("pak is encrypted but no key was provided")]
Encrypted,
#[error("error with OsString")]
OsString(std::ffi::OsString),
#[error("{0}")]
Other(&'static str),
}

View file

@ -1,5 +1,6 @@
use super::ext::{ReadExt, WriteExt};
use super::{Version, VersionMajor};
use aes::Aes256Enc;
use byteorder::{ReadBytesExt, WriteBytesExt, LE};
use std::collections::BTreeMap;
use std::io::{self, Read, Seek, Write};
@ -10,12 +11,14 @@ pub struct PakReader<R: Read + Seek> {
reader: R,
key: Option<aes::Aes256Dec>,
}
#[derive(Debug)]
pub struct PakWriter<W: Write + Seek> {
pak: Pak,
writer: W,
key: Option<aes::Aes256Enc>,
}
#[derive(Debug)]
pub struct Pak {
version: Version,
@ -51,14 +54,14 @@ impl Index {
fn entries(&self) -> &BTreeMap<String, super::entry::Entry> {
match self {
Index::V1(index) => &index.entries,
Index::V2(index) => &index.entries_by_path,
Index::V2(index) => &index.entries,
}
}
fn add_entry(&mut self, path: &str, entry: super::entry::Entry) {
match self {
Index::V1(index) => index.entries.insert(path.to_string(), entry),
Index::V2(_index) => todo!(),
Index::V2(index) => index.entries.insert(path.to_string(), entry),
};
}
}
@ -71,10 +74,7 @@ pub struct IndexV1 {
#[derive(Debug, Default)]
pub struct IndexV2 {
path_hash_seed: u64,
path_hash_index: Option<Vec<u8>>,
full_directory_index: Option<BTreeMap<String, BTreeMap<String, u32>>>,
encoded_entries: Vec<u8>,
entries_by_path: BTreeMap<String, super::entry::Entry>,
entries: BTreeMap<String, super::entry::Entry>,
}
fn decrypt(key: &Option<aes::Aes256Dec>, bytes: &mut [u8]) -> Result<(), super::Error> {
@ -231,23 +231,34 @@ impl Pak {
let index = if version.version_major() >= VersionMajor::PathHashIndex {
let path_hash_seed = index.read_u64::<LE>()?;
let path_hash_index = if index.read_u32::<LE>()? != 0 {
// Left in for potential desire to verify path index hashes.
let _path_hash_index = if index.read_u32::<LE>()? != 0 {
let path_hash_index_offset = index.read_u64::<LE>()?;
let path_hash_index_size = index.read_u64::<LE>()?;
let _path_hash_index_hash = index.read_len(20)?;
reader.seek(io::SeekFrom::Start(path_hash_index_offset))?;
let mut path_hash_index = reader.read_len(path_hash_index_size as usize)?;
let mut path_hash_index_buf = reader.read_len(path_hash_index_size as usize)?;
// TODO verify hash
if footer.encrypted {
decrypt(&key, &mut path_hash_index)?;
decrypt(&key, &mut path_hash_index_buf)?;
}
let mut path_hash_index = vec![];
let mut phi_reader = io::Cursor::new(&mut path_hash_index_buf);
for _ in 0..len {
let hash = phi_reader.read_u64::<LE>()?;
let encoded_entry_offset = phi_reader.read_u32::<LE>()?;
path_hash_index.push((hash, encoded_entry_offset));
}
Some(path_hash_index)
} else {
None
};
// Left in for potential desire to verify full directory index hashes.
let full_directory_index = if index.read_u32::<LE>()? != 0 {
let full_directory_index_offset = index.read_u64::<LE>()?;
let full_directory_index_size = index.read_u64::<LE>()?;
@ -290,12 +301,6 @@ impl Pak {
encoded_entries.seek(io::SeekFrom::Start(*encoded_offset as u64))?;
let entry =
super::entry::Entry::read_encoded(&mut encoded_entries, version)?;
// entry next to file contains full metadata
//reader.seek(io::SeekFrom::Start(entry.offset))?;
//let _ = super::entry::Entry::new(&mut reader, version)?;
// concat directory with file name to match IndexV1 but should provide a more direct access method
let path = format!(
"{}{}",
dir_name.strip_prefix('/').unwrap_or(dir_name),
@ -310,10 +315,7 @@ impl Pak {
Index::V2(IndexV2 {
path_hash_seed,
path_hash_index,
full_directory_index,
encoded_entries,
entries_by_path,
entries: entries_by_path,
})
} else {
let mut entries = BTreeMap::new();
@ -332,6 +334,7 @@ impl Pak {
index,
})
}
fn write<W: Write + Seek>(
&self,
writer: &mut W,
@ -339,29 +342,117 @@ impl Pak {
) -> Result<(), super::Error> {
let index_offset = writer.stream_position()?;
let mut index_cur = std::io::Cursor::new(vec![]);
index_cur.write_string(&self.mount_point)?;
let mut index_buf = vec![];
let mut index_writer = io::Cursor::new(&mut index_buf);
index_writer.write_string(&self.mount_point)?;
match &self.index {
let secondary_index = match &self.index {
Index::V1(index) => {
index_cur.write_u32::<LE>(index.entries.len() as u32)?;
let record_count = index.entries.len() as u32;
index_writer.write_u32::<LE>(record_count)?;
for (path, entry) in &index.entries {
index_cur.write_string(path)?;
index_writer.write_string(path)?;
entry.write(
&mut index_cur,
&mut index_writer,
self.version,
super::entry::EntryLocation::Index,
)?;
}
None
}
Index::V2(_index) => todo!(),
Index::V2(index) => {
let record_count = index.entries.len() as u32;
index_writer.write_u32::<LE>(record_count)?;
index_writer.write_u64::<LE>(index.path_hash_seed)?;
// The index is organized sequentially as:
// - Index Header, which contains:
// - Mount Point (u32 len + string w/ terminating byte)
// - Entry Count (u32)
// - Path Hash Seed (u64)
// - Has Path Hash Index (u32); if true, then:
// - Path Hash Index Offset (u64)
// - Path Hash Index Size (u64)
// - Path Hash Index Hash ([u8; 20])
// - Has Full Directory Index (u32); if true, then:
// - Full Directory Index Offset (u64)
// - Full Directory Index Size (u64)
// - Full Directory Index Hash ([u8; 20])
// - Encoded Index Records Size
// - (Unused) File Count
// - Path Hash Index
// - Full Directory Index
// - Encoded Index Records; each encoded index record is (0xC bytes) from:
// - Flags (u32)
// - Offset (u32)
// - Size (u32)
let bytes_before_phi = {
let mut size = 0;
size += 4; // mount point len
size += self.mount_point.len() as u64 + 1; // mount point string w/ NUL byte
size += 8; // path hash seed
size += 4; // record count
size += 4; // has path hash index (since we're generating, always true)
size += 8 + 8 + 20; // path hash index offset, size and hash
size += 4; // has full directory index (since we're generating, always true)
size += 8 + 8 + 20; // full directory index offset, size and hash
size += 4; // encoded entry size
size += index.entries.len() as u64 * {
4 // flags
+ 4 // offset
+ 4 // size
};
size += 4; // unused file count
size
};
let path_hash_index_offset = index_offset + bytes_before_phi;
let mut phi_buf = vec![];
let mut phi_writer = io::Cursor::new(&mut phi_buf);
generate_path_hash_index(&mut phi_writer, index.path_hash_seed, &index.entries)?;
let full_directory_index_offset = path_hash_index_offset + phi_buf.len() as u64;
let mut fdi_buf = vec![];
let mut fdi_writer = io::Cursor::new(&mut fdi_buf);
generate_full_directory_index(&mut fdi_writer, &index.entries)?;
index_writer.write_u32::<LE>(1)?; // we have path hash index
index_writer.write_u64::<LE>(path_hash_index_offset)?;
index_writer.write_u64::<LE>(phi_buf.len() as u64)?; // path hash index size
index_writer.write_all(&hash(&phi_buf))?;
index_writer.write_u32::<LE>(1)?; // we have full directory index
index_writer.write_u64::<LE>(full_directory_index_offset)?;
index_writer.write_u64::<LE>(fdi_buf.len() as u64)?; // path hash index size
index_writer.write_all(&hash(&fdi_buf))?;
let encoded_entries_size = index.entries.len() as u32 * ENCODED_ENTRY_SIZE;
index_writer.write_u32::<LE>(encoded_entries_size)?;
for entry in index.entries.values() {
entry.write(
&mut index_writer,
self.version,
super::entry::EntryLocation::Index,
)?;
}
let index_data = index_cur.into_inner();
index_writer.write_u32::<LE>(0)?;
use sha1::{Digest, Sha1};
let mut hasher = Sha1::new();
hasher.update(&index_data);
Some((phi_buf, fdi_buf))
}
};
let index_hash = hash(&index_buf);
writer.write_all(&index_buf)?;
if let Some((phi_buf, fdi_buf)) = secondary_index {
writer.write_all(&phi_buf[..])?;
writer.write_all(&fdi_buf[..])?;
}
let footer = super::footer::Footer {
encryption_uuid: None,
@ -370,23 +461,130 @@ impl Pak {
version: self.version,
version_major: self.version.version_major(),
index_offset,
index_size: index_data.len() as u64,
hash: hasher.finalize().into(),
index_size: index_buf.len() as u64,
hash: index_hash,
frozen: false,
compression: vec![],
};
writer.write_all(&index_data)?;
footer.write(writer)?;
Ok(())
}
}
fn hash(data: &[u8]) -> [u8; 20] {
use sha1::{Digest, Sha1};
let mut hasher = Sha1::new();
hasher.update(data);
hasher.finalize().into()
}
const ENCODED_ENTRY_SIZE: u32 = {
4 // flags
+ 4 // offset
+ 4 // size
};
fn generate_path_hash_index<W: Write>(
writer: &mut W,
path_hash_seed: u64,
entries: &BTreeMap<String, super::entry::Entry>,
) -> Result<(), super::Error> {
writer.write_u32::<LE>(entries.len() as u32)?;
let mut offset = 0u32;
for path in entries.keys() {
let utf16le_path = path
.encode_utf16()
.flat_map(|c| c.to_le_bytes())
.collect::<Vec<_>>();
let path_hash = fnv64(&utf16le_path, path_hash_seed);
writer.write_u64::<LE>(path_hash)?;
writer.write_u32::<LE>(offset)?;
offset += ENCODED_ENTRY_SIZE;
}
writer.write_u32::<LE>(0)?;
Ok(())
}
fn fnv64(data: &[u8], offset: u64) -> u64 {
const OFFSET: u64 = 0xcbf29ce484222325;
const PRIME: u64 = 0x00000100000001b3;
let mut hash = OFFSET.wrapping_add(offset);
for &b in data {
hash ^= b as u64;
hash = hash.wrapping_mul(PRIME);
}
hash
}
fn generate_full_directory_index<W: Write>(
writer: &mut W,
entries: &BTreeMap<String, super::entry::Entry>,
) -> Result<(), super::Error> {
let mut offset = 0u32;
let mut fdi = BTreeMap::new();
for path in entries.keys() {
let (directory, filename) = {
let i = path.rfind('/').map(|i| i + 1); // we want to include the slash on the directory
match i {
Some(i) => {
let (l, r) = path.split_at(i);
(l.to_owned(), r.to_owned())
}
None => ("/".to_owned(), path.to_owned()),
}
};
fdi.entry(directory)
.and_modify(|d: &mut BTreeMap<String, u32>| {
d.insert(filename.clone(), offset);
})
.or_insert_with(|| {
let mut files_and_offsets = BTreeMap::new();
files_and_offsets.insert(filename.clone(), offset);
files_and_offsets
});
offset += ENCODED_ENTRY_SIZE;
}
writer.write_u32::<LE>(fdi.len() as u32)?;
for (directory, files) in &fdi {
writer.write_string(directory)?;
writer.write_u32::<LE>(files.len() as u32)?;
for (filename, offset) in files {
writer.write_string(filename)?;
writer.write_u32::<LE>(*offset)?;
}
}
Ok(())
}
fn pad_zeros_to_alignment(v: &mut Vec<u8>, alignment: usize) {
assert!(alignment >= 1);
if v.len() % alignment != 0 {
v.extend(std::iter::repeat(0).take(((v.len() + alignment - 1) / alignment) * alignment))
}
assert!(v.len() % alignment == 0);
}
fn encrypt(key: Aes256Enc, bytes: &mut [u8]) {
use aes::cipher::BlockEncrypt;
for chunk in bytes.chunks_mut(16) {
key.encrypt_block(aes::Block::from_mut_slice(chunk))
}
}
#[cfg(test)]
mod test {
use super::IndexV2;
#[test]
fn test_rewrite_pak() {
fn test_rewrite_pak_v8b() {
use std::io::Cursor;
let bytes = include_bytes!("../tests/packs/pack_v8b.pak");
@ -407,6 +605,45 @@ mod test {
}
let out_bytes = pak_writer.write_index().unwrap().into_inner();
assert_eq!(bytes.to_vec(), out_bytes);
assert_eq!(&bytes[..], &out_bytes[..]);
}
#[test]
fn test_rewrite_pak_v11() {
use std::io::Cursor;
let bytes = include_bytes!("../tests/packs/pack_v11.pak");
let mut reader = super::PakReader::new_any(Cursor::new(bytes), None).unwrap();
let writer = Cursor::new(vec![]);
let mut pak_writer = super::PakWriter::new(
writer,
None,
super::Version::V11,
reader.mount_point().to_owned(),
);
for path in reader.files() {
let data = reader.get(&path).unwrap();
pak_writer
.write_file(&path, &mut std::io::Cursor::new(data))
.unwrap();
}
// There's a caveat: UnrealPak uses the absolute path (in UTF-16LE) of the output pak
// passed to strcrc32() as the PathHashSeed. We don't want to require the user to do this.
if let super::Index::V2(index) = pak_writer.pak.index {
pak_writer.pak.index = super::Index::V2(IndexV2 {
path_hash_seed: u64::from_le_bytes([
0x7D, 0x5A, 0x5C, 0x20, 0x00, 0x00, 0x00, 0x00,
]),
..index
});
} else {
panic!()
};
let out_bytes = pak_writer.write_index().unwrap().into_inner();
assert_eq!(&bytes[..], &out_bytes[..]);
}
}