Add compressed pak writing

This commit is contained in:
Truman Kilen 2024-01-11 16:17:57 -06:00
parent 7b647d9179
commit 0a06dbcf31
4 changed files with 210 additions and 71 deletions

View file

@ -1,3 +1,5 @@
use crate::Error;
use super::{ext::BoolExt, ext::ReadExt, Compression, Version, VersionMajor}; use super::{ext::BoolExt, ext::ReadExt, Compression, Version, VersionMajor};
use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use byteorder::{ReadBytesExt, WriteBytesExt, LE};
use std::io; use std::io;
@ -39,7 +41,7 @@ pub(crate) struct Entry {
pub offset: u64, pub offset: u64,
pub compressed: u64, pub compressed: u64,
pub uncompressed: u64, pub uncompressed: u64,
pub compression: Option<u32>, pub compression_slot: Option<u32>,
pub timestamp: Option<u64>, pub timestamp: Option<u64>,
pub hash: Option<[u8; 20]>, pub hash: Option<[u8; 20]>,
pub blocks: Option<Vec<Block>>, pub blocks: Option<Vec<Block>>,
@ -84,6 +86,135 @@ impl Entry {
size size
} }
pub(crate) fn write_file<W: io::Write + io::Seek>(
writer: &mut W,
version: Version,
compression_slots: &mut Vec<Option<Compression>>,
allowed_compression: &[Compression],
data: impl AsRef<[u8]>,
) -> Result<Self, super::Error> {
// TODO hash needs to be post-compression
use sha1::{Digest, Sha1};
let mut hasher = Sha1::new();
hasher.update(&data);
let offset = writer.stream_position()?;
let len = data.as_ref().len() as u64;
// TODO possibly select best compression based on some criteria instead of picking first
let compression = allowed_compression.first().cloned();
let compression_slot = if let Some(compression) = compression {
// find existing
let slot = compression_slots
.iter()
.enumerate()
.find(|(_, s)| **s == Some(compression));
Some(if let Some((i, _)) = slot {
// existing found
i
} else {
if version.version_major() < VersionMajor::FNameBasedCompression {
return Err(Error::Other(format!(
"cannot use {compression:?} prior to FNameBasedCompression (pak version 8)"
)));
}
// find empty slot
if let Some((i, empty_slot)) = compression_slots
.iter_mut()
.enumerate()
.find(|(_, s)| s.is_none())
{
// empty found, set it to used compression type
*empty_slot = Some(compression);
i
} else {
// no empty slot found, add a new one
compression_slots.push(Some(compression));
compression_slots.len() - 1
}
} as u32)
} else {
None
};
let (blocks, compressed) = match compression {
#[cfg(not(feature = "compression"))]
Some(_) => {
unreachable!("should not be able to reach this point without compression feature")
}
#[cfg(feature = "compression")]
Some(compression) => {
use std::io::Write;
let entry_size = Entry::get_serialized_size(version, compression_slot, 1);
let data_offset = offset + entry_size;
let compressed = match compression {
Compression::Zlib => {
let mut compress = flate2::write::ZlibEncoder::new(
Vec::new(),
flate2::Compression::fast(),
);
compress.write_all(data.as_ref())?;
compress.finish()?
}
Compression::Gzip => {
let mut compress =
flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::fast());
compress.write_all(data.as_ref())?;
compress.finish()?
}
Compression::Zstd => zstd::stream::encode_all(data.as_ref(), 0)?,
Compression::Oodle => {
return Err(Error::Other("writing Oodle compression unsupported".into()))
}
};
let compute_offset = |index: usize| -> u64 {
match version.version_major() >= VersionMajor::RelativeChunkOffsets {
true => index as u64 + (data_offset - offset),
false => index as u64 + data_offset,
}
};
let blocks = vec![Block {
start: compute_offset(0),
end: compute_offset(compressed.len()),
}];
(Some(blocks), Some(compressed))
}
None => (None, None),
};
let entry = super::entry::Entry {
offset,
compressed: compressed
.as_ref()
.map(|c: &Vec<u8>| c.len() as u64)
.unwrap_or_default(),
uncompressed: len,
compression_slot,
timestamp: None,
hash: Some(hasher.finalize().into()),
blocks,
flags: 0,
compression_block_size: compressed.as_ref().map(|_| len as u32).unwrap_or_default(),
};
entry.write(writer, version, EntryLocation::Data)?;
if let Some(compressed) = compressed {
writer.write_all(&compressed)?;
} else {
writer.write_all(data.as_ref())?;
}
Ok(entry)
}
pub fn read<R: io::Read>( pub fn read<R: io::Read>(
reader: &mut R, reader: &mut R,
version: super::Version, version: super::Version,
@ -114,7 +245,7 @@ impl Entry {
offset, offset,
compressed, compressed,
uncompressed, uncompressed,
compression, compression_slot: compression,
timestamp, timestamp,
hash, hash,
blocks, blocks,
@ -135,7 +266,7 @@ impl Entry {
})?; })?;
writer.write_u64::<LE>(self.compressed)?; writer.write_u64::<LE>(self.compressed)?;
writer.write_u64::<LE>(self.uncompressed)?; writer.write_u64::<LE>(self.uncompressed)?;
let compression = self.compression.map_or(0, |n| n + 1); let compression = self.compression_slot.map_or(0, |n| n + 1);
match version { match version {
Version::V8A => writer.write_u8(compression.try_into().unwrap())?, Version::V8A => writer.write_u8(compression.try_into().unwrap())?,
_ => writer.write_u32::<LE>(compression)?, _ => writer.write_u32::<LE>(compression)?,
@ -232,7 +363,7 @@ impl Entry {
compressed, compressed,
uncompressed, uncompressed,
timestamp: None, timestamp: None,
compression, compression_slot: compression,
hash: None, hash: None,
blocks, blocks,
flags: encrypted as u8, flags: encrypted as u8,
@ -245,7 +376,7 @@ impl Entry {
if (compression_block_size << 11) != self.compression_block_size { if (compression_block_size << 11) != self.compression_block_size {
compression_block_size = 0x3f; compression_block_size = 0x3f;
} }
let compression_blocks_count = if self.compression.is_some() { let compression_blocks_count = if self.compression_slot.is_some() {
self.blocks.as_ref().unwrap().len() as u32 self.blocks.as_ref().unwrap().len() as u32
} else { } else {
0 0
@ -257,7 +388,7 @@ impl Entry {
let flags = (compression_block_size) let flags = (compression_block_size)
| (compression_blocks_count << 6) | (compression_blocks_count << 6)
| ((self.is_encrypted() as u32) << 22) | ((self.is_encrypted() as u32) << 22)
| (self.compression.map_or(0, |n| n + 1) << 23) | (self.compression_slot.map_or(0, |n| n + 1) << 23)
| ((is_size_32_bit_safe as u32) << 29) | ((is_size_32_bit_safe as u32) << 29)
| ((is_uncompressed_size_32_bit_safe as u32) << 30) | ((is_uncompressed_size_32_bit_safe as u32) << 30)
| ((is_offset_32_bit_safe as u32) << 31); | ((is_offset_32_bit_safe as u32) << 31);
@ -280,7 +411,7 @@ impl Entry {
writer.write_u64::<LE>(self.uncompressed)? writer.write_u64::<LE>(self.uncompressed)?
} }
if self.compression.is_some() { if self.compression_slot.is_some() {
if is_size_32_bit_safe { if is_size_32_bit_safe {
writer.write_u32::<LE>(self.compressed as u32)?; writer.write_u32::<LE>(self.compressed as u32)?;
} else { } else {
@ -304,7 +435,7 @@ impl Entry {
&self, &self,
reader: &mut R, reader: &mut R,
version: Version, version: Version,
compression: &[Compression], compression: &[Option<Compression>],
#[allow(unused)] key: &super::Key, #[allow(unused)] key: &super::Key,
#[allow(unused)] oodle: &super::Oodle, #[allow(unused)] oodle: &super::Oodle,
buf: &mut W, buf: &mut W,
@ -335,23 +466,22 @@ impl Entry {
} }
#[cfg(any(feature = "compression", feature = "oodle"))] #[cfg(any(feature = "compression", feature = "oodle"))]
let ranges = match &self.blocks { let ranges = {
Some(blocks) => blocks let offset = |index: u64| -> usize {
.iter() (match version.version_major() >= VersionMajor::RelativeChunkOffsets {
.map( true => index - (data_offset - self.offset),
|block| match version.version_major() >= VersionMajor::RelativeChunkOffsets { false => index - data_offset,
true => { }) as usize
(block.start - (data_offset - self.offset)) as usize };
..(block.end - (data_offset - self.offset)) as usize
} match &self.blocks {
false => { Some(blocks) => blocks
(block.start - data_offset) as usize..(block.end - data_offset) as usize .iter()
} .map(|block| offset(block.start)..offset(block.end))
}, .collect::<Vec<_>>(),
) #[allow(clippy::single_range_in_vec_init)]
.collect::<Vec<_>>(), None => vec![0..data.len()],
#[allow(clippy::single_range_in_vec_init)] }
None => vec![0..data.len()],
}; };
#[cfg(feature = "compression")] #[cfg(feature = "compression")]
@ -363,8 +493,8 @@ impl Entry {
}; };
} }
match self.compression.map(|c| compression[c as usize]) { match self.compression_slot.and_then(|c| compression[c as usize]) {
None | Some(Compression::None) => buf.write_all(&data)?, None => buf.write_all(&data)?,
#[cfg(feature = "compression")] #[cfg(feature = "compression")]
Some(Compression::Zlib) => decompress!(flate2::read::ZlibDecoder<&[u8]>), Some(Compression::Zlib) => decompress!(flate2::read::ZlibDecoder<&[u8]>),
#[cfg(feature = "compression")] #[cfg(feature = "compression")]

View file

@ -15,7 +15,7 @@ pub struct Footer {
pub index_size: u64, pub index_size: u64,
pub hash: [u8; 20], pub hash: [u8; 20],
pub frozen: bool, pub frozen: bool,
pub compression: Vec<Compression>, pub compression: Vec<Option<Compression>>,
} }
impl Footer { impl Footer {
@ -47,13 +47,13 @@ impl Footer {
.filter_map(|&ch| (ch != 0).then_some(ch as char)) .filter_map(|&ch| (ch != 0).then_some(ch as char))
.collect::<String>(), .collect::<String>(),
) )
.unwrap_or_default(), .ok(),
) )
} }
if version < Version::V8A { if version.version_major() < VersionMajor::FNameBasedCompression {
compression.push(Compression::Zlib); compression.push(Some(Compression::Zlib));
compression.push(Compression::Gzip); compression.push(Some(Compression::Gzip));
compression.push(Compression::Oodle); compression.push(Some(Compression::Oodle));
} }
compression compression
}; };
@ -103,11 +103,9 @@ impl Footer {
// TODO: handle if compression.len() > algo_size // TODO: handle if compression.len() > algo_size
for i in 0..algo_size { for i in 0..algo_size {
let mut name = [0; 32]; let mut name = [0; 32];
if let Some(algo) = self.compression.get(i) { if let Some(algo) = self.compression.get(i).cloned().flatten() {
if algo != &Compression::None { for (i, b) in algo.to_string().as_bytes().iter().enumerate() {
for (i, b) in algo.to_string().as_bytes().iter().enumerate() { name[i] = *b;
name[i] = *b;
}
} }
} }
writer.write_all(&name)?; writer.write_all(&name)?;

View file

@ -119,10 +119,10 @@ impl Version {
} }
} }
#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, strum::Display, strum::EnumString)] #[derive(
Clone, Copy, PartialEq, Eq, Debug, strum::Display, strum::EnumString, strum::EnumVariantNames,
)]
pub enum Compression { pub enum Compression {
#[default]
None,
Zlib, Zlib,
Gzip, Gzip,
Oodle, Oodle,

View file

@ -1,3 +1,6 @@
use crate::entry::Entry;
use crate::Compression;
use super::ext::{ReadExt, WriteExt}; use super::ext::{ReadExt, WriteExt};
use super::{Version, VersionMajor}; use super::{Version, VersionMajor};
use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use byteorder::{ReadBytesExt, WriteBytesExt, LE};
@ -8,6 +11,7 @@ use std::io::{self, Read, Seek, Write};
pub struct PakBuilder { pub struct PakBuilder {
key: super::Key, key: super::Key,
oodle: super::Oodle, oodle: super::Oodle,
allowed_compression: Vec<Compression>,
} }
impl PakBuilder { impl PakBuilder {
@ -24,6 +28,11 @@ impl PakBuilder {
self.oodle = super::Oodle::Some(oodle_getter); self.oodle = super::Oodle::Some(oodle_getter);
self self
} }
#[cfg(feature = "compression")]
pub fn compression(mut self, compression: impl IntoIterator<Item = Compression>) -> Self {
self.allowed_compression = compression.into_iter().collect();
self
}
pub fn reader<R: Read + Seek>(self, reader: &mut R) -> Result<PakReader, super::Error> { pub fn reader<R: Read + Seek>(self, reader: &mut R) -> Result<PakReader, super::Error> {
PakReader::new_any_inner(reader, self.key, self.oodle) PakReader::new_any_inner(reader, self.key, self.oodle)
} }
@ -41,7 +50,14 @@ impl PakBuilder {
mount_point: String, mount_point: String,
path_hash_seed: Option<u64>, path_hash_seed: Option<u64>,
) -> PakWriter<W> { ) -> PakWriter<W> {
PakWriter::new_inner(writer, self.key, version, mount_point, path_hash_seed) PakWriter::new_inner(
writer,
self.key,
version,
mount_point,
path_hash_seed,
self.allowed_compression,
)
} }
} }
@ -57,6 +73,7 @@ pub struct PakWriter<W: Write + Seek> {
pak: Pak, pak: Pak,
writer: W, writer: W,
key: super::Key, key: super::Key,
allowed_compression: Vec<Compression>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -67,7 +84,7 @@ pub(crate) struct Pak {
index: Index, index: Index,
encrypted_index: bool, encrypted_index: bool,
encryption_guid: Option<u128>, encryption_guid: Option<u128>,
compression: Vec<super::Compression>, compression: Vec<Option<Compression>>,
} }
impl Pak { impl Pak {
@ -79,7 +96,15 @@ impl Pak {
index: Index::new(path_hash_seed), index: Index::new(path_hash_seed),
encrypted_index: false, encrypted_index: false,
encryption_guid: None, encryption_guid: None,
compression: vec![], compression: (if version.version_major() < VersionMajor::FNameBasedCompression {
vec![
Some(Compression::Zlib),
Some(Compression::Gzip),
Some(Compression::Oodle),
]
} else {
vec![]
}),
} }
} }
} }
@ -202,6 +227,7 @@ impl PakReader {
) -> Result<PakWriter<W>, super::Error> { ) -> Result<PakWriter<W>, super::Error> {
writer.seek(io::SeekFrom::Start(self.pak.index_offset.unwrap()))?; writer.seek(io::SeekFrom::Start(self.pak.index_offset.unwrap()))?;
Ok(PakWriter { Ok(PakWriter {
allowed_compression: self.pak.compression.iter().filter_map(|c| *c).collect(),
pak: self.pak, pak: self.pak,
key: self.key, key: self.key,
writer, writer,
@ -216,11 +242,13 @@ impl<W: Write + Seek> PakWriter<W> {
version: Version, version: Version,
mount_point: String, mount_point: String,
path_hash_seed: Option<u64>, path_hash_seed: Option<u64>,
allowed_compression: Vec<Compression>,
) -> Self { ) -> Self {
PakWriter { PakWriter {
pak: Pak::new(version, mount_point, path_hash_seed), pak: Pak::new(version, mount_point, path_hash_seed),
writer, writer,
key, key,
allowed_compression,
} }
} }
@ -229,34 +257,17 @@ impl<W: Write + Seek> PakWriter<W> {
} }
pub fn write_file(&mut self, path: &str, data: impl AsRef<[u8]>) -> Result<(), super::Error> { pub fn write_file(&mut self, path: &str, data: impl AsRef<[u8]>) -> Result<(), super::Error> {
use sha1::{Digest, Sha1}; self.pak.index.add_entry(
let mut hasher = Sha1::new(); path,
hasher.update(&data); Entry::write_file(
&mut self.writer,
self.pak.version,
&mut self.pak.compression,
&self.allowed_compression,
data,
)?,
);
let offset = self.writer.stream_position()?;
let len = data.as_ref().len() as u64;
let entry = super::entry::Entry {
offset,
compressed: len,
uncompressed: len,
compression: None,
timestamp: None,
hash: Some(hasher.finalize().into()),
blocks: None,
flags: 0,
compression_block_size: 0,
};
entry.write(
&mut self.writer,
self.pak.version,
super::entry::EntryLocation::Data,
)?;
self.pak.index.add_entry(path, entry);
self.writer.write_all(data.as_ref())?;
Ok(()) Ok(())
} }