From 0a06dbcf3118930a0583b1c8ea845dd2f65c05bf Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Thu, 11 Jan 2024 16:17:57 -0600 Subject: [PATCH] Add compressed pak writing --- repak/src/entry.rs | 184 +++++++++++++++++++++++++++++++++++++------- repak/src/footer.rs | 20 +++-- repak/src/lib.rs | 6 +- repak/src/pak.rs | 71 +++++++++-------- 4 files changed, 210 insertions(+), 71 deletions(-) diff --git a/repak/src/entry.rs b/repak/src/entry.rs index 04e47f4..7246547 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -1,3 +1,5 @@ +use crate::Error; + use super::{ext::BoolExt, ext::ReadExt, Compression, Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use std::io; @@ -39,7 +41,7 @@ pub(crate) struct Entry { pub offset: u64, pub compressed: u64, pub uncompressed: u64, - pub compression: Option, + pub compression_slot: Option, pub timestamp: Option, pub hash: Option<[u8; 20]>, pub blocks: Option>, @@ -84,6 +86,135 @@ impl Entry { size } + pub(crate) fn write_file( + writer: &mut W, + version: Version, + compression_slots: &mut Vec>, + allowed_compression: &[Compression], + data: impl AsRef<[u8]>, + ) -> Result { + // TODO hash needs to be post-compression + use sha1::{Digest, Sha1}; + let mut hasher = Sha1::new(); + hasher.update(&data); + + let offset = writer.stream_position()?; + let len = data.as_ref().len() as u64; + + // TODO possibly select best compression based on some criteria instead of picking first + let compression = allowed_compression.first().cloned(); + + let compression_slot = if let Some(compression) = compression { + // find existing + let slot = compression_slots + .iter() + .enumerate() + .find(|(_, s)| **s == Some(compression)); + Some(if let Some((i, _)) = slot { + // existing found + i + } else { + if version.version_major() < VersionMajor::FNameBasedCompression { + return Err(Error::Other(format!( + "cannot use {compression:?} prior to FNameBasedCompression (pak version 8)" + ))); + } + + // find empty slot + if let Some((i, empty_slot)) = compression_slots + .iter_mut() + .enumerate() + .find(|(_, s)| s.is_none()) + { + // empty found, set it to used compression type + *empty_slot = Some(compression); + i + } else { + // no empty slot found, add a new one + compression_slots.push(Some(compression)); + compression_slots.len() - 1 + } + } as u32) + } else { + None + }; + + let (blocks, compressed) = match compression { + #[cfg(not(feature = "compression"))] + Some(_) => { + unreachable!("should not be able to reach this point without compression feature") + } + #[cfg(feature = "compression")] + Some(compression) => { + use std::io::Write; + + let entry_size = Entry::get_serialized_size(version, compression_slot, 1); + let data_offset = offset + entry_size; + + let compressed = match compression { + Compression::Zlib => { + let mut compress = flate2::write::ZlibEncoder::new( + Vec::new(), + flate2::Compression::fast(), + ); + compress.write_all(data.as_ref())?; + compress.finish()? + } + Compression::Gzip => { + let mut compress = + flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::fast()); + compress.write_all(data.as_ref())?; + compress.finish()? + } + Compression::Zstd => zstd::stream::encode_all(data.as_ref(), 0)?, + Compression::Oodle => { + return Err(Error::Other("writing Oodle compression unsupported".into())) + } + }; + + let compute_offset = |index: usize| -> u64 { + match version.version_major() >= VersionMajor::RelativeChunkOffsets { + true => index as u64 + (data_offset - offset), + false => index as u64 + data_offset, + } + }; + + let blocks = vec![Block { + start: compute_offset(0), + end: compute_offset(compressed.len()), + }]; + + (Some(blocks), Some(compressed)) + } + None => (None, None), + }; + + let entry = super::entry::Entry { + offset, + compressed: compressed + .as_ref() + .map(|c: &Vec| c.len() as u64) + .unwrap_or_default(), + uncompressed: len, + compression_slot, + timestamp: None, + hash: Some(hasher.finalize().into()), + blocks, + flags: 0, + compression_block_size: compressed.as_ref().map(|_| len as u32).unwrap_or_default(), + }; + + entry.write(writer, version, EntryLocation::Data)?; + + if let Some(compressed) = compressed { + writer.write_all(&compressed)?; + } else { + writer.write_all(data.as_ref())?; + } + + Ok(entry) + } + pub fn read( reader: &mut R, version: super::Version, @@ -114,7 +245,7 @@ impl Entry { offset, compressed, uncompressed, - compression, + compression_slot: compression, timestamp, hash, blocks, @@ -135,7 +266,7 @@ impl Entry { })?; writer.write_u64::(self.compressed)?; writer.write_u64::(self.uncompressed)?; - let compression = self.compression.map_or(0, |n| n + 1); + let compression = self.compression_slot.map_or(0, |n| n + 1); match version { Version::V8A => writer.write_u8(compression.try_into().unwrap())?, _ => writer.write_u32::(compression)?, @@ -232,7 +363,7 @@ impl Entry { compressed, uncompressed, timestamp: None, - compression, + compression_slot: compression, hash: None, blocks, flags: encrypted as u8, @@ -245,7 +376,7 @@ impl Entry { if (compression_block_size << 11) != self.compression_block_size { compression_block_size = 0x3f; } - let compression_blocks_count = if self.compression.is_some() { + let compression_blocks_count = if self.compression_slot.is_some() { self.blocks.as_ref().unwrap().len() as u32 } else { 0 @@ -257,7 +388,7 @@ impl Entry { let flags = (compression_block_size) | (compression_blocks_count << 6) | ((self.is_encrypted() as u32) << 22) - | (self.compression.map_or(0, |n| n + 1) << 23) + | (self.compression_slot.map_or(0, |n| n + 1) << 23) | ((is_size_32_bit_safe as u32) << 29) | ((is_uncompressed_size_32_bit_safe as u32) << 30) | ((is_offset_32_bit_safe as u32) << 31); @@ -280,7 +411,7 @@ impl Entry { writer.write_u64::(self.uncompressed)? } - if self.compression.is_some() { + if self.compression_slot.is_some() { if is_size_32_bit_safe { writer.write_u32::(self.compressed as u32)?; } else { @@ -304,7 +435,7 @@ impl Entry { &self, reader: &mut R, version: Version, - compression: &[Compression], + compression: &[Option], #[allow(unused)] key: &super::Key, #[allow(unused)] oodle: &super::Oodle, buf: &mut W, @@ -335,23 +466,22 @@ impl Entry { } #[cfg(any(feature = "compression", feature = "oodle"))] - let ranges = match &self.blocks { - Some(blocks) => blocks - .iter() - .map( - |block| match version.version_major() >= VersionMajor::RelativeChunkOffsets { - true => { - (block.start - (data_offset - self.offset)) as usize - ..(block.end - (data_offset - self.offset)) as usize - } - false => { - (block.start - data_offset) as usize..(block.end - data_offset) as usize - } - }, - ) - .collect::>(), - #[allow(clippy::single_range_in_vec_init)] - None => vec![0..data.len()], + let ranges = { + let offset = |index: u64| -> usize { + (match version.version_major() >= VersionMajor::RelativeChunkOffsets { + true => index - (data_offset - self.offset), + false => index - data_offset, + }) as usize + }; + + match &self.blocks { + Some(blocks) => blocks + .iter() + .map(|block| offset(block.start)..offset(block.end)) + .collect::>(), + #[allow(clippy::single_range_in_vec_init)] + None => vec![0..data.len()], + } }; #[cfg(feature = "compression")] @@ -363,8 +493,8 @@ impl Entry { }; } - match self.compression.map(|c| compression[c as usize]) { - None | Some(Compression::None) => buf.write_all(&data)?, + match self.compression_slot.and_then(|c| compression[c as usize]) { + None => buf.write_all(&data)?, #[cfg(feature = "compression")] Some(Compression::Zlib) => decompress!(flate2::read::ZlibDecoder<&[u8]>), #[cfg(feature = "compression")] diff --git a/repak/src/footer.rs b/repak/src/footer.rs index b7bcb2d..0bc35d5 100644 --- a/repak/src/footer.rs +++ b/repak/src/footer.rs @@ -15,7 +15,7 @@ pub struct Footer { pub index_size: u64, pub hash: [u8; 20], pub frozen: bool, - pub compression: Vec, + pub compression: Vec>, } impl Footer { @@ -47,13 +47,13 @@ impl Footer { .filter_map(|&ch| (ch != 0).then_some(ch as char)) .collect::(), ) - .unwrap_or_default(), + .ok(), ) } - if version < Version::V8A { - compression.push(Compression::Zlib); - compression.push(Compression::Gzip); - compression.push(Compression::Oodle); + if version.version_major() < VersionMajor::FNameBasedCompression { + compression.push(Some(Compression::Zlib)); + compression.push(Some(Compression::Gzip)); + compression.push(Some(Compression::Oodle)); } compression }; @@ -103,11 +103,9 @@ impl Footer { // TODO: handle if compression.len() > algo_size for i in 0..algo_size { let mut name = [0; 32]; - if let Some(algo) = self.compression.get(i) { - if algo != &Compression::None { - for (i, b) in algo.to_string().as_bytes().iter().enumerate() { - name[i] = *b; - } + if let Some(algo) = self.compression.get(i).cloned().flatten() { + for (i, b) in algo.to_string().as_bytes().iter().enumerate() { + name[i] = *b; } } writer.write_all(&name)?; diff --git a/repak/src/lib.rs b/repak/src/lib.rs index 159c06e..d950a1c 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -119,10 +119,10 @@ impl Version { } } -#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, strum::Display, strum::EnumString)] +#[derive( + Clone, Copy, PartialEq, Eq, Debug, strum::Display, strum::EnumString, strum::EnumVariantNames, +)] pub enum Compression { - #[default] - None, Zlib, Gzip, Oodle, diff --git a/repak/src/pak.rs b/repak/src/pak.rs index dac7b85..d7fa80a 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -1,3 +1,6 @@ +use crate::entry::Entry; +use crate::Compression; + use super::ext::{ReadExt, WriteExt}; use super::{Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; @@ -8,6 +11,7 @@ use std::io::{self, Read, Seek, Write}; pub struct PakBuilder { key: super::Key, oodle: super::Oodle, + allowed_compression: Vec, } impl PakBuilder { @@ -24,6 +28,11 @@ impl PakBuilder { self.oodle = super::Oodle::Some(oodle_getter); self } + #[cfg(feature = "compression")] + pub fn compression(mut self, compression: impl IntoIterator) -> Self { + self.allowed_compression = compression.into_iter().collect(); + self + } pub fn reader(self, reader: &mut R) -> Result { PakReader::new_any_inner(reader, self.key, self.oodle) } @@ -41,7 +50,14 @@ impl PakBuilder { mount_point: String, path_hash_seed: Option, ) -> PakWriter { - PakWriter::new_inner(writer, self.key, version, mount_point, path_hash_seed) + PakWriter::new_inner( + writer, + self.key, + version, + mount_point, + path_hash_seed, + self.allowed_compression, + ) } } @@ -57,6 +73,7 @@ pub struct PakWriter { pak: Pak, writer: W, key: super::Key, + allowed_compression: Vec, } #[derive(Debug)] @@ -67,7 +84,7 @@ pub(crate) struct Pak { index: Index, encrypted_index: bool, encryption_guid: Option, - compression: Vec, + compression: Vec>, } impl Pak { @@ -79,7 +96,15 @@ impl Pak { index: Index::new(path_hash_seed), encrypted_index: false, encryption_guid: None, - compression: vec![], + compression: (if version.version_major() < VersionMajor::FNameBasedCompression { + vec![ + Some(Compression::Zlib), + Some(Compression::Gzip), + Some(Compression::Oodle), + ] + } else { + vec![] + }), } } } @@ -202,6 +227,7 @@ impl PakReader { ) -> Result, super::Error> { writer.seek(io::SeekFrom::Start(self.pak.index_offset.unwrap()))?; Ok(PakWriter { + allowed_compression: self.pak.compression.iter().filter_map(|c| *c).collect(), pak: self.pak, key: self.key, writer, @@ -216,11 +242,13 @@ impl PakWriter { version: Version, mount_point: String, path_hash_seed: Option, + allowed_compression: Vec, ) -> Self { PakWriter { pak: Pak::new(version, mount_point, path_hash_seed), writer, key, + allowed_compression, } } @@ -229,34 +257,17 @@ impl PakWriter { } pub fn write_file(&mut self, path: &str, data: impl AsRef<[u8]>) -> Result<(), super::Error> { - use sha1::{Digest, Sha1}; - let mut hasher = Sha1::new(); - hasher.update(&data); + self.pak.index.add_entry( + path, + Entry::write_file( + &mut self.writer, + self.pak.version, + &mut self.pak.compression, + &self.allowed_compression, + data, + )?, + ); - let offset = self.writer.stream_position()?; - let len = data.as_ref().len() as u64; - - let entry = super::entry::Entry { - offset, - compressed: len, - uncompressed: len, - compression: None, - timestamp: None, - hash: Some(hasher.finalize().into()), - blocks: None, - flags: 0, - compression_block_size: 0, - }; - - entry.write( - &mut self.writer, - self.pak.version, - super::entry::EntryLocation::Data, - )?; - - self.pak.index.add_entry(path, entry); - - self.writer.write_all(data.as_ref())?; Ok(()) }