Do not require moving data into parallel writer

This commit is contained in:
Truman Kilen 2025-01-20 17:39:58 -06:00
parent bdeb0df8c7
commit 194e800270
4 changed files with 150 additions and 109 deletions

View file

@ -1,3 +1,5 @@
use std::io::Write;
use crate::{
entry::{Block, Entry},
Compression, Error, Hash, Version, VersionMajor,
@ -5,17 +7,21 @@ use crate::{
type Result<T, E = Error> = std::result::Result<T, E>;
pub(crate) struct PartialEntry {
pub(crate) struct PartialEntry<D: AsRef<[u8]>> {
compression: Option<Compression>,
compressed_size: u64,
uncompressed_size: u64,
compression_block_size: u32,
pub(crate) blocks: Vec<PartialBlock>,
data: PartialEntryData<D>,
hash: Hash,
}
pub(crate) struct PartialBlock {
uncompressed_size: usize,
pub(crate) data: Vec<u8>,
data: Vec<u8>,
}
pub(crate) enum PartialEntryData<D> {
Slice(D),
Blocks(Vec<PartialBlock>),
}
#[cfg(feature = "compression")]
@ -55,7 +61,7 @@ fn get_compression_slot(
} as u32)
}
impl PartialEntry {
impl<D: AsRef<[u8]>> PartialEntry<D> {
pub(crate) fn build_entry(
&self,
version: Version,
@ -70,16 +76,19 @@ impl PartialEntry {
#[cfg(not(feature = "compression"))]
let compression_slot = None;
let blocks = (!self.blocks.is_empty()).then(|| {
let blocks = match &self.data {
PartialEntryData::Slice(_) => None,
PartialEntryData::Blocks(blocks) => {
let entry_size =
Entry::get_serialized_size(version, compression_slot, self.blocks.len() as u32);
Entry::get_serialized_size(version, compression_slot, blocks.len() as u32);
let mut offset = entry_size;
if version.version_major() < VersionMajor::RelativeChunkOffsets {
offset += file_offset;
};
self.blocks
Some(
blocks
.iter()
.map(|block| {
let start = offset;
@ -87,8 +96,10 @@ impl PartialEntry {
let end = offset;
Block { start, end }
})
.collect()
});
.collect(),
)
}
};
Ok(Entry {
offset: file_offset,
@ -102,22 +113,38 @@ impl PartialEntry {
compression_block_size: self.compression_block_size,
})
}
pub(crate) fn write_data<S: Write>(&self, stream: &mut S) -> Result<()> {
match &self.data {
PartialEntryData::Slice(data) => {
stream.write_all(data.as_ref())?;
}
PartialEntryData::Blocks(blocks) => {
for block in blocks {
stream.write_all(&block.data)?;
}
}
}
Ok(())
}
}
pub(crate) fn build_partial_entry(
pub(crate) fn build_partial_entry<D>(
allowed_compression: &[Compression],
data: &[u8],
) -> Result<PartialEntry> {
data: D,
) -> Result<PartialEntry<D>>
where
D: AsRef<[u8]>,
{
// TODO hash needs to be post-compression/encryption
use sha1::{Digest, Sha1};
let mut hasher = Sha1::new();
// TODO possibly select best compression based on some criteria instead of picking first
let compression = allowed_compression.first().cloned();
let uncompressed_size = data.len() as u64;
let uncompressed_size = data.as_ref().len() as u64;
let compression_block_size;
let (blocks, compressed_size) = match compression {
let (data, compressed_size) = match compression {
#[cfg(not(feature = "compression"))]
Some(_) => {
unreachable!("should not be able to reach this point without compression feature")
@ -129,7 +156,7 @@ pub(crate) fn build_partial_entry(
compression_block_size = 0x10000;
let mut compressed_size = 0;
let mut blocks = vec![];
for chunk in data.chunks(compression_block_size as usize) {
for chunk in data.as_ref().chunks(compression_block_size as usize) {
let data = compress(compression, chunk)?;
compressed_size += data.len() as u64;
hasher.update(&data);
@ -139,12 +166,12 @@ pub(crate) fn build_partial_entry(
})
}
(blocks, compressed_size)
(PartialEntryData::Blocks(blocks), compressed_size)
}
None => {
compression_block_size = 0;
hasher.update(data);
(vec![], uncompressed_size)
hasher.update(data.as_ref());
(PartialEntryData::Slice(data), uncompressed_size)
}
};
@ -153,7 +180,7 @@ pub(crate) fn build_partial_entry(
compressed_size,
uncompressed_size,
compression_block_size,
blocks,
data,
hash: Hash(hasher.finalize().into()),
})
}

View file

@ -109,13 +109,7 @@ impl Entry {
let stream_position = writer.stream_position()?;
let entry = partial_entry.build_entry(version, compression_slots, stream_position)?;
entry.write(writer, version, crate::entry::EntryLocation::Data)?;
if partial_entry.blocks.is_empty() {
writer.write_all(data)?;
} else {
for block in partial_entry.blocks {
writer.write_all(&block.data)?;
}
}
partial_entry.write_data(writer)?;
Ok(entry)
}

View file

@ -7,7 +7,6 @@ use super::{Version, VersionMajor};
use byteorder::{ReadBytesExt, WriteBytesExt, LE};
use std::collections::BTreeMap;
use std::io::{self, Read, Seek, Write};
use std::sync::Arc;
#[derive(Default, Clone, Copy)]
pub(crate) struct Hash(pub(crate) [u8; 20]);
@ -88,10 +87,6 @@ pub struct PakWriter<W: Write + Seek> {
allowed_compression: Vec<Compression>,
}
pub struct ParallelPakWriter {
tx: std::sync::mpsc::SyncSender<(String, bool, Arc<Vec<u8>>)>,
}
#[derive(Debug)]
pub(crate) struct Pak {
version: Version,
@ -147,8 +142,8 @@ impl Index {
self.entries
}
fn add_entry(&mut self, path: &str, entry: super::entry::Entry) {
self.entries.insert(path.to_string(), entry);
fn add_entry(&mut self, path: String, entry: super::entry::Entry) {
self.entries.insert(path, entry);
}
}
@ -280,7 +275,7 @@ impl<W: Write + Seek> PakWriter<W> {
data: impl AsRef<[u8]>,
) -> Result<(), super::Error> {
self.pak.index.add_entry(
path,
path.to_string(),
Entry::write_file(
&mut self.writer,
self.pak.version,
@ -297,40 +292,29 @@ impl<W: Write + Seek> PakWriter<W> {
Ok(())
}
pub fn parallel<F, E>(&mut self, f: F) -> Result<&mut Self, E>
pub fn parallel<'scope, F, E>(&mut self, f: F) -> Result<&mut Self, E>
where
F: Send + Sync + FnOnce(&mut ParallelPakWriter) -> Result<(), E>,
F: Send + Sync + FnOnce(&mut ParallelPakWriter<'scope>) -> Result<(), E>,
E: From<Error> + Send,
{
{
use pariter::IteratorExt as _;
let allowed_compression = self.allowed_compression.as_slice();
pariter::scope(|scope: &pariter::Scope<'_>| -> Result<(), E> {
let (tx, rx) = std::sync::mpsc::sync_channel(0);
pariter::scope(|scope| -> Result<(), E> {
let handle = scope.spawn(|_| -> Result<(), E> {
f(&mut ParallelPakWriter { tx })?;
Ok(())
});
let handle = scope.spawn(|_| f(&mut ParallelPakWriter { tx }));
let result = rx
.into_iter()
.parallel_map_scoped(
scope,
|(path, allow_compress, data): (String, bool, Arc<Vec<u8>>)| -> Result<_, Error> {
let allowed_compression = if allow_compress {
self.allowed_compression.as_slice()
} else {
&[]
};
let partial_entry = build_partial_entry(allowed_compression, &data)?;
let data = partial_entry.blocks.is_empty().then(|| Arc::new(data));
Ok((path, data, partial_entry))
},
)
.parallel_map_scoped(scope, |(path, compress, data)| -> Result<_, Error> {
let compression = compress.then_some(allowed_compression).unwrap_or_default();
let partial_entry = build_partial_entry(compression, data)?;
Ok((path, partial_entry))
})
.try_for_each(|message| -> Result<(), Error> {
let stream_position = self.writer.stream_position()?;
let (path, data, partial_entry) = message?;
let (path, partial_entry) = message?;
let entry = partial_entry.build_entry(
self.pak.version,
@ -344,15 +328,8 @@ impl<W: Write + Seek> PakWriter<W> {
crate::entry::EntryLocation::Data,
)?;
self.pak.index.add_entry(&path, entry);
if let Some(data) = data {
self.writer.write_all(&data)?;
} else {
for block in partial_entry.blocks {
self.writer.write_all(&block.data)?;
}
}
self.pak.index.add_entry(path, entry);
partial_entry.write_data(&mut self.writer)?;
Ok(())
});
@ -365,7 +342,6 @@ impl<W: Write + Seek> PakWriter<W> {
}
})
.unwrap()?;
}
Ok(self)
}
@ -375,13 +351,30 @@ impl<W: Write + Seek> PakWriter<W> {
}
}
impl ParallelPakWriter {
pub fn write_file(&self, path: String, compress: bool, data: Vec<u8>) -> Result<(), Error> {
self.tx.send((path, compress, Arc::new(data))).unwrap();
pub struct ParallelPakWriter<'scope> {
tx: std::sync::mpsc::SyncSender<(String, bool, Data<'scope>)>,
}
impl<'scope> ParallelPakWriter<'scope> {
pub fn write_file<D: AsRef<[u8]> + Send + Sync + 'scope>(
&self,
path: String,
compress: bool,
data: D,
) -> Result<(), Error> {
self.tx
.send((path, compress, Data(Box::new(data))))
.unwrap();
Ok(())
}
}
struct Data<'d>(Box<dyn AsRef<[u8]> + Send + Sync + 'd>);
impl AsRef<[u8]> for Data<'_> {
fn as_ref(&self) -> &[u8] {
self.0.as_ref().as_ref()
}
}
impl Pak {
fn read<R: Read + Seek>(
reader: &mut R,

View file

@ -88,6 +88,33 @@ mod test {
}
}
#[test]
fn test_parallel_writer() -> Result<(), repak::Error> {
let mut cur = Cursor::new(vec![]);
let mut writer = repak::PakBuilder::new().writer(
&mut cur,
repak::Version::V11,
"../../../".to_string(),
Some(0x12345678),
);
let outside_scope1 = vec![1, 2, 3];
let outside_scope2 = vec![4, 5, 6];
writer.parallel(|writer| -> Result<(), repak::Error> {
let inside_scope = vec![7, 8, 9];
writer.write_file("pass/takes/ownership".to_string(), true, outside_scope1)?;
writer.write_file("pass/outlives/scope".to_string(), true, &outside_scope2)?;
writer.write_file("pass/takes/ownership".to_string(), true, inside_scope)?;
// writer.write_file("fail/doesnt/outlive/scope".to_string(), true, &inside_scope)?;
Ok(())
})?;
Ok(())
}
static AES_KEY: &str = "lNJbw660IOC+kU7cnVQ1oeqrXyhk4J6UAZrCBbcnp94=";
fn test_read(version: repak::Version, _file_name: &str, bytes: &[u8]) {