Implement parsing of v11

This commit is contained in:
Truman Kilen 2023-01-16 19:10:25 -06:00
parent 254e0ce4c0
commit 0b5bded4b4
3 changed files with 276 additions and 23 deletions

View file

@ -17,6 +17,11 @@ impl Block {
} }
} }
fn align(offset: u64) -> u64 {
// add alignment (aes block size: 16) then zero out alignment bits
(offset + 15) & !15
}
#[derive(Debug)] #[derive(Debug)]
pub struct Entry { pub struct Entry {
pub offset: u64, pub offset: u64,
@ -24,13 +29,40 @@ pub struct Entry {
pub uncompressed: u64, pub uncompressed: u64,
pub compression: Compression, pub compression: Compression,
pub timestamp: Option<u64>, pub timestamp: Option<u64>,
pub hash: [u8; 20], pub hash: Option<[u8; 20]>,
pub blocks: Option<Vec<Block>>, pub blocks: Option<Vec<Block>>,
pub encrypted: bool, pub encrypted: bool,
pub block_uncompressed: Option<u32>, pub block_uncompressed: Option<u32>,
} }
impl Entry { impl Entry {
pub fn get_serialized_size(
version: super::Version,
compression: Compression,
block_count: u32,
) -> u64 {
let mut size = 0;
size += 8; // offset
size += 8; // compressed
size += 8; // uncompressed
size += 4; // compression
size += match version == Version::Initial {
true => 8, // timestamp
false => 0,
};
size += 20; // hash
size += match compression != Compression::None {
true => 4 + (8 + 8) * block_count as u64, // blocks
false => 0,
};
size += 1; // encrypted
size += match version >= Version::CompressionEncryption {
true => 4, // blocks uncompressed
false => 0,
};
size
}
pub fn new<R: io::Read>(reader: &mut R, version: super::Version) -> Result<Self, super::Error> { pub fn new<R: io::Read>(reader: &mut R, version: super::Version) -> Result<Self, super::Error> {
// since i need the compression flags, i have to store these as variables which is mildly annoying // since i need the compression flags, i have to store these as variables which is mildly annoying
let offset = reader.read_u64::<LE>()?; let offset = reader.read_u64::<LE>()?;
@ -49,7 +81,7 @@ impl Entry {
true => Some(reader.read_u64::<LE>()?), true => Some(reader.read_u64::<LE>()?),
false => None, false => None,
}, },
hash: reader.read_guid()?, hash: Some(reader.read_guid()?),
blocks: match version >= Version::CompressionEncryption blocks: match version >= Version::CompressionEncryption
&& compression != Compression::None && compression != Compression::None
{ {
@ -64,6 +96,96 @@ impl Entry {
}) })
} }
pub fn new_encoded<R: io::Read>(
reader: &mut R,
version: super::Version,
) -> Result<Self, super::Error> {
let bits = reader.read_u32::<LE>()?;
let compression = match (bits >> 23) & 0x3f {
0x01 | 0x10 | 0x20 => Compression::Zlib,
_ => Compression::None,
};
let encrypted = (bits & (1 << 22)) != 0;
let compression_block_count: u32 = (bits >> 6) & 0xffff;
let mut block_uncompressed = bits & 0x3f;
if block_uncompressed == 0x3f {
block_uncompressed = reader.read_u32::<LE>()?;
} else {
block_uncompressed = block_uncompressed << 11;
}
let mut var_int = |bit: u32| -> Result<_, super::Error> {
Ok(if (bits & (1 << bit)) != 0 {
reader.read_u32::<LE>()? as u64
} else {
reader.read_u64::<LE>()?
})
};
let offset = var_int(31)?;
let uncompressed = var_int(30)?;
let compressed = match compression {
Compression::None => uncompressed,
_ => var_int(29)?,
};
block_uncompressed = if compression_block_count <= 0 {
0
} else if uncompressed < block_uncompressed.into() {
uncompressed.try_into().unwrap()
} else {
block_uncompressed
};
let offset_base =
match version >= super::Version::RelativeChunkOffsets {
true => 0,
false => offset,
} + Entry::get_serialized_size(version, compression, compression_block_count);
let blocks = if compression_block_count == 1 && !encrypted {
Some(vec![Block {
start: offset_base,
end: offset_base + compressed,
}])
} else if compression_block_count > 0 {
let mut index = offset_base;
Some(
(0..compression_block_count)
.into_iter()
.map(|_| {
let mut block_size = reader.read_u32::<LE>()? as u64;
let block = Block {
start: index,
end: index + block_size,
};
if encrypted {
block_size = align(block_size);
}
index += block_size;
Ok(block)
})
.collect::<Result<Vec<_>, super::Error>>()?,
)
} else {
None
};
Ok(Entry {
offset,
compressed,
uncompressed,
timestamp: None,
compression,
hash: None,
blocks,
encrypted,
block_uncompressed: Some(block_uncompressed),
})
}
pub fn read<R: io::Read + io::Seek, W: io::Write>( pub fn read<R: io::Read + io::Seek, W: io::Write>(
&self, &self,
reader: &mut R, reader: &mut R,
@ -75,8 +197,7 @@ impl Entry {
Entry::new(reader, version)?; Entry::new(reader, version)?;
let data_offset = reader.stream_position()?; let data_offset = reader.stream_position()?;
let mut data = reader.read_len(match self.encrypted { let mut data = reader.read_len(match self.encrypted {
// add alignment (aes block size: 16) then zero out alignment bits true => align(self.compressed),
true => (self.compressed + 15) & !15,
false => self.compressed, false => self.compressed,
} as usize)?; } as usize)?;
if self.encrypted { if self.encrypted {

View file

@ -13,6 +13,7 @@ pub const MAGIC: u32 = 0x5A6F12E1;
#[derive( #[derive(
Clone, Copy, PartialEq, Eq, PartialOrd, Debug, strum::Display, strum::FromRepr, strum::EnumIter, Clone, Copy, PartialEq, Eq, PartialOrd, Debug, strum::Display, strum::FromRepr, strum::EnumIter,
)] )]
pub enum Version { pub enum Version {
Unknown, // v0 unknown (mostly just for padding) Unknown, // v0 unknown (mostly just for padding)
Initial, // v1 initial specification Initial, // v1 initial specification

View file

@ -1,15 +1,57 @@
use super::Version; use super::Version;
use std::io; use hashbrown::HashMap;
use std::io::{self, Seek};
#[derive(Debug)] #[derive(Debug)]
pub struct Pak<R: io::Read + io::Seek> { pub struct Pak<R: io::Read + io::Seek> {
version: Version, version: Version,
mount_point: String, mount_point: String,
key: Option<aes::Aes256Dec>, key: Option<aes::Aes256Dec>,
entries: hashbrown::HashMap<String, super::entry::Entry>, index: Index,
reader: R, reader: R,
} }
#[derive(Debug)]
pub enum Index {
V1(IndexV1),
V2(IndexV2),
}
impl Index {
fn entries(&self) -> &HashMap<String, super::entry::Entry> {
match self {
Index::V1(index) => &index.entries,
Index::V2(index) => &index.entries_by_path,
}
}
}
#[derive(Debug)]
pub struct IndexV1 {
entries: HashMap<String, super::entry::Entry>,
}
#[derive(Debug)]
pub struct IndexV2 {
path_hash_seed: u64,
path_hash_index: Option<Vec<u8>>,
full_directory_index: Option<HashMap<String, HashMap<String, u32>>>,
encoded_entries: Vec<u8>,
entries_by_path: HashMap<String, super::entry::Entry>,
}
fn decrypt(key: &Option<aes::Aes256Dec>, bytes: &mut [u8]) -> Result<(), super::Error> {
if let Some(key) = &key {
use aes::cipher::BlockDecrypt;
for chunk in bytes.chunks_mut(16) {
key.decrypt_block(aes::Block::from_mut_slice(chunk))
}
Ok(())
} else {
Err(super::Error::Encrypted)
}
}
impl<R: io::Read + io::Seek> Pak<R> { impl<R: io::Read + io::Seek> Pak<R> {
pub fn new( pub fn new(
mut reader: R, mut reader: R,
@ -24,31 +66,119 @@ impl<R: io::Read + io::Seek> Pak<R> {
// read index to get all the entry info // read index to get all the entry info
reader.seek(io::SeekFrom::Start(footer.index_offset))?; reader.seek(io::SeekFrom::Start(footer.index_offset))?;
let mut index = reader.read_len(footer.index_size as usize)?; let mut index = reader.read_len(footer.index_size as usize)?;
// decrypt index if needed // decrypt index if needed
if footer.encrypted { if footer.encrypted {
let Some(key) = &key else { decrypt(&key, &mut index)?;
return Err(super::Error::Encrypted);
};
use aes::cipher::BlockDecrypt;
for chunk in index.chunks_mut(16) {
key.decrypt_block(aes::Block::from_mut_slice(chunk))
}
} }
let mut index = io::Cursor::new(index); let mut index = io::Cursor::new(index);
let mount_point = index.read_string()?; let mount_point = index.read_string()?;
let len = index.read_u32::<LE>()? as usize; let len = index.read_u32::<LE>()? as usize;
let mut entries = hashbrown::HashMap::with_capacity(len);
for _ in 0..len { let index = if version >= Version::PathHashIndex {
entries.insert( let path_hash_seed = index.read_u64::<LE>()?;
index.read_string()?,
super::entry::Entry::new(&mut index, version)?, let path_hash_index = if index.read_u32::<LE>()? != 0 {
); let path_hash_index_offset = index.read_u64::<LE>()?;
} let path_hash_index_size = index.read_u64::<LE>()?;
let _path_hash_index_hash = index.read_len(20)?;
reader.seek(io::SeekFrom::Start(path_hash_index_offset))?;
let mut path_hash_index = reader.read_len(path_hash_index_size as usize)?;
// TODO verify hash
if footer.encrypted {
decrypt(&key, &mut path_hash_index)?;
}
Some(path_hash_index)
} else {
None
};
let full_directory_index = if index.read_u32::<LE>()? != 0 {
let full_directory_index_offset = index.read_u64::<LE>()?;
let full_directory_index_size = index.read_u64::<LE>()?;
let _full_directory_index_hash = index.read_len(20)?;
reader.seek(io::SeekFrom::Start(full_directory_index_offset))?;
let mut full_directory_index =
reader.read_len(full_directory_index_size as usize)?;
// TODO verify hash
if footer.encrypted {
decrypt(&key, &mut full_directory_index)?;
}
let mut fdi = io::Cursor::new(full_directory_index);
let dir_count = fdi.read_u32::<LE>()? as usize;
let mut directories = HashMap::with_capacity(dir_count);
for _ in 0..dir_count {
let dir_name = fdi.read_string()?;
let file_count = fdi.read_u32::<LE>()? as usize;
let mut files = HashMap::with_capacity(file_count);
for _ in 0..file_count {
let file_name = fdi.read_string()?;
files.insert(file_name, fdi.read_u32::<LE>()?);
}
directories.insert(dir_name, files);
}
Some(directories)
} else {
None
};
let size = index.read_u32::<LE>()? as usize;
let encoded_entries = index.read_len(size)?;
let mut entries_by_path = HashMap::new();
if let Some(fdi) = &full_directory_index {
let mut encoded_entries = io::Cursor::new(&encoded_entries);
for (dir_name, dir) in fdi {
for (file_name, encoded_offset) in dir {
encoded_entries.seek(io::SeekFrom::Start(*encoded_offset as u64))?;
let entry =
super::entry::Entry::new_encoded(&mut encoded_entries, version)?;
// entry next to file contains full metadata
//reader.seek(io::SeekFrom::Start(entry.offset))?;
//let _ = super::entry::Entry::new(&mut reader, version)?;
// concat directory with file name to match IndexV1 but should provide a more direct access method
let path = format!(
"{}{}",
dir_name.strip_prefix("/").unwrap_or(dir_name),
file_name
);
entries_by_path.insert(path, entry);
}
}
}
assert_eq!(index.read_u32::<LE>()?, 0, "remaining index bytes are 0"); // TODO possibly remaining unencoded entries?
Index::V2(IndexV2 {
path_hash_seed,
path_hash_index,
full_directory_index,
encoded_entries,
entries_by_path,
})
} else {
let mut entries = HashMap::with_capacity(len);
for _ in 0..len {
entries.insert(
index.read_string()?,
super::entry::Entry::new(&mut index, version)?,
);
}
Index::V1(IndexV1 { entries })
};
Ok(Self { Ok(Self {
version, version,
mount_point, mount_point,
key, key,
entries, index,
reader, reader,
}) })
} }
@ -68,14 +198,15 @@ impl<R: io::Read + io::Seek> Pak<R> {
} }
pub fn read<W: io::Write>(&mut self, path: &str, writer: &mut W) -> Result<(), super::Error> { pub fn read<W: io::Write>(&mut self, path: &str, writer: &mut W) -> Result<(), super::Error> {
match self.entries.get(path) { match self.index.entries().get(path) {
Some(entry) => entry.read(&mut self.reader, self.version, self.key.as_ref(), writer), Some(entry) => entry.read(&mut self.reader, self.version, self.key.as_ref(), writer),
None => Err(super::Error::Other("no file found at given path")), None => Err(super::Error::Other("no file found at given path")),
} }
} }
pub fn files(&self) -> std::vec::IntoIter<String> { pub fn files(&self) -> std::vec::IntoIter<String> {
self.entries self.index
.entries()
.keys() .keys()
.cloned() .cloned()
.collect::<Vec<String>>() .collect::<Vec<String>>()