From 0b5bded4b44367d3b9eae3aedb25870929307887 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Mon, 16 Jan 2023 19:10:25 -0600 Subject: [PATCH] Implement parsing of v11 --- src/entry.rs | 129 +++++++++++++++++++++++++++++++++++++-- src/lib.rs | 1 + src/pak.rs | 169 +++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 276 insertions(+), 23 deletions(-) diff --git a/src/entry.rs b/src/entry.rs index 9969d59..b73bcc2 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -17,6 +17,11 @@ impl Block { } } +fn align(offset: u64) -> u64 { + // add alignment (aes block size: 16) then zero out alignment bits + (offset + 15) & !15 +} + #[derive(Debug)] pub struct Entry { pub offset: u64, @@ -24,13 +29,40 @@ pub struct Entry { pub uncompressed: u64, pub compression: Compression, pub timestamp: Option, - pub hash: [u8; 20], + pub hash: Option<[u8; 20]>, pub blocks: Option>, pub encrypted: bool, pub block_uncompressed: Option, } impl Entry { + pub fn get_serialized_size( + version: super::Version, + compression: Compression, + block_count: u32, + ) -> u64 { + let mut size = 0; + size += 8; // offset + size += 8; // compressed + size += 8; // uncompressed + size += 4; // compression + size += match version == Version::Initial { + true => 8, // timestamp + false => 0, + }; + size += 20; // hash + size += match compression != Compression::None { + true => 4 + (8 + 8) * block_count as u64, // blocks + false => 0, + }; + size += 1; // encrypted + size += match version >= Version::CompressionEncryption { + true => 4, // blocks uncompressed + false => 0, + }; + size + } + pub fn new(reader: &mut R, version: super::Version) -> Result { // since i need the compression flags, i have to store these as variables which is mildly annoying let offset = reader.read_u64::()?; @@ -49,7 +81,7 @@ impl Entry { true => Some(reader.read_u64::()?), false => None, }, - hash: reader.read_guid()?, + hash: Some(reader.read_guid()?), blocks: match version >= Version::CompressionEncryption && compression != Compression::None { @@ -64,6 +96,96 @@ impl Entry { }) } + pub fn new_encoded( + reader: &mut R, + version: super::Version, + ) -> Result { + let bits = reader.read_u32::()?; + let compression = match (bits >> 23) & 0x3f { + 0x01 | 0x10 | 0x20 => Compression::Zlib, + _ => Compression::None, + }; + + let encrypted = (bits & (1 << 22)) != 0; + let compression_block_count: u32 = (bits >> 6) & 0xffff; + let mut block_uncompressed = bits & 0x3f; + + if block_uncompressed == 0x3f { + block_uncompressed = reader.read_u32::()?; + } else { + block_uncompressed = block_uncompressed << 11; + } + + let mut var_int = |bit: u32| -> Result<_, super::Error> { + Ok(if (bits & (1 << bit)) != 0 { + reader.read_u32::()? as u64 + } else { + reader.read_u64::()? + }) + }; + + let offset = var_int(31)?; + let uncompressed = var_int(30)?; + let compressed = match compression { + Compression::None => uncompressed, + _ => var_int(29)?, + }; + + block_uncompressed = if compression_block_count <= 0 { + 0 + } else if uncompressed < block_uncompressed.into() { + uncompressed.try_into().unwrap() + } else { + block_uncompressed + }; + + let offset_base = + match version >= super::Version::RelativeChunkOffsets { + true => 0, + false => offset, + } + Entry::get_serialized_size(version, compression, compression_block_count); + + let blocks = if compression_block_count == 1 && !encrypted { + Some(vec![Block { + start: offset_base, + end: offset_base + compressed, + }]) + } else if compression_block_count > 0 { + let mut index = offset_base; + Some( + (0..compression_block_count) + .into_iter() + .map(|_| { + let mut block_size = reader.read_u32::()? as u64; + let block = Block { + start: index, + end: index + block_size, + }; + if encrypted { + block_size = align(block_size); + } + index += block_size; + Ok(block) + }) + .collect::, super::Error>>()?, + ) + } else { + None + }; + + Ok(Entry { + offset, + compressed, + uncompressed, + timestamp: None, + compression, + hash: None, + blocks, + encrypted, + block_uncompressed: Some(block_uncompressed), + }) + } + pub fn read( &self, reader: &mut R, @@ -75,8 +197,7 @@ impl Entry { Entry::new(reader, version)?; let data_offset = reader.stream_position()?; let mut data = reader.read_len(match self.encrypted { - // add alignment (aes block size: 16) then zero out alignment bits - true => (self.compressed + 15) & !15, + true => align(self.compressed), false => self.compressed, } as usize)?; if self.encrypted { diff --git a/src/lib.rs b/src/lib.rs index 5625ecd..91ad64e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ pub const MAGIC: u32 = 0x5A6F12E1; #[derive( Clone, Copy, PartialEq, Eq, PartialOrd, Debug, strum::Display, strum::FromRepr, strum::EnumIter, )] + pub enum Version { Unknown, // v0 unknown (mostly just for padding) Initial, // v1 initial specification diff --git a/src/pak.rs b/src/pak.rs index dde1016..9b5c79e 100644 --- a/src/pak.rs +++ b/src/pak.rs @@ -1,15 +1,57 @@ use super::Version; -use std::io; +use hashbrown::HashMap; +use std::io::{self, Seek}; #[derive(Debug)] pub struct Pak { version: Version, mount_point: String, key: Option, - entries: hashbrown::HashMap, + index: Index, reader: R, } +#[derive(Debug)] +pub enum Index { + V1(IndexV1), + V2(IndexV2), +} + +impl Index { + fn entries(&self) -> &HashMap { + match self { + Index::V1(index) => &index.entries, + Index::V2(index) => &index.entries_by_path, + } + } +} + +#[derive(Debug)] +pub struct IndexV1 { + entries: HashMap, +} + +#[derive(Debug)] +pub struct IndexV2 { + path_hash_seed: u64, + path_hash_index: Option>, + full_directory_index: Option>>, + encoded_entries: Vec, + entries_by_path: HashMap, +} + +fn decrypt(key: &Option, bytes: &mut [u8]) -> Result<(), super::Error> { + if let Some(key) = &key { + use aes::cipher::BlockDecrypt; + for chunk in bytes.chunks_mut(16) { + key.decrypt_block(aes::Block::from_mut_slice(chunk)) + } + Ok(()) + } else { + Err(super::Error::Encrypted) + } +} + impl Pak { pub fn new( mut reader: R, @@ -24,31 +66,119 @@ impl Pak { // read index to get all the entry info reader.seek(io::SeekFrom::Start(footer.index_offset))?; let mut index = reader.read_len(footer.index_size as usize)?; + // decrypt index if needed if footer.encrypted { - let Some(key) = &key else { - return Err(super::Error::Encrypted); - }; - use aes::cipher::BlockDecrypt; - for chunk in index.chunks_mut(16) { - key.decrypt_block(aes::Block::from_mut_slice(chunk)) - } + decrypt(&key, &mut index)?; } + let mut index = io::Cursor::new(index); let mount_point = index.read_string()?; let len = index.read_u32::()? as usize; - let mut entries = hashbrown::HashMap::with_capacity(len); - for _ in 0..len { - entries.insert( - index.read_string()?, - super::entry::Entry::new(&mut index, version)?, - ); - } + + let index = if version >= Version::PathHashIndex { + let path_hash_seed = index.read_u64::()?; + + let path_hash_index = if index.read_u32::()? != 0 { + let path_hash_index_offset = index.read_u64::()?; + let path_hash_index_size = index.read_u64::()?; + let _path_hash_index_hash = index.read_len(20)?; + + reader.seek(io::SeekFrom::Start(path_hash_index_offset))?; + let mut path_hash_index = reader.read_len(path_hash_index_size as usize)?; + // TODO verify hash + + if footer.encrypted { + decrypt(&key, &mut path_hash_index)?; + } + Some(path_hash_index) + } else { + None + }; + + let full_directory_index = if index.read_u32::()? != 0 { + let full_directory_index_offset = index.read_u64::()?; + let full_directory_index_size = index.read_u64::()?; + let _full_directory_index_hash = index.read_len(20)?; + + reader.seek(io::SeekFrom::Start(full_directory_index_offset))?; + let mut full_directory_index = + reader.read_len(full_directory_index_size as usize)?; + // TODO verify hash + + if footer.encrypted { + decrypt(&key, &mut full_directory_index)?; + } + let mut fdi = io::Cursor::new(full_directory_index); + + let dir_count = fdi.read_u32::()? as usize; + let mut directories = HashMap::with_capacity(dir_count); + for _ in 0..dir_count { + let dir_name = fdi.read_string()?; + let file_count = fdi.read_u32::()? as usize; + let mut files = HashMap::with_capacity(file_count); + for _ in 0..file_count { + let file_name = fdi.read_string()?; + files.insert(file_name, fdi.read_u32::()?); + } + directories.insert(dir_name, files); + } + Some(directories) + } else { + None + }; + let size = index.read_u32::()? as usize; + let encoded_entries = index.read_len(size)?; + + let mut entries_by_path = HashMap::new(); + if let Some(fdi) = &full_directory_index { + let mut encoded_entries = io::Cursor::new(&encoded_entries); + for (dir_name, dir) in fdi { + for (file_name, encoded_offset) in dir { + encoded_entries.seek(io::SeekFrom::Start(*encoded_offset as u64))?; + let entry = + super::entry::Entry::new_encoded(&mut encoded_entries, version)?; + + // entry next to file contains full metadata + //reader.seek(io::SeekFrom::Start(entry.offset))?; + //let _ = super::entry::Entry::new(&mut reader, version)?; + + // concat directory with file name to match IndexV1 but should provide a more direct access method + let path = format!( + "{}{}", + dir_name.strip_prefix("/").unwrap_or(dir_name), + file_name + ); + entries_by_path.insert(path, entry); + } + } + } + + assert_eq!(index.read_u32::()?, 0, "remaining index bytes are 0"); // TODO possibly remaining unencoded entries? + + Index::V2(IndexV2 { + path_hash_seed, + path_hash_index, + full_directory_index, + encoded_entries, + entries_by_path, + }) + } else { + let mut entries = HashMap::with_capacity(len); + for _ in 0..len { + entries.insert( + index.read_string()?, + super::entry::Entry::new(&mut index, version)?, + ); + } + Index::V1(IndexV1 { entries }) + }; + Ok(Self { version, mount_point, key, - entries, + index, reader, }) } @@ -68,14 +198,15 @@ impl Pak { } pub fn read(&mut self, path: &str, writer: &mut W) -> Result<(), super::Error> { - match self.entries.get(path) { + match self.index.entries().get(path) { Some(entry) => entry.read(&mut self.reader, self.version, self.key.as_ref(), writer), None => Err(super::Error::Other("no file found at given path")), } } pub fn files(&self) -> std::vec::IntoIter { - self.entries + self.index + .entries() .keys() .cloned() .collect::>()