Rework PakReader to allow for parallel reads

This commit is contained in:
Truman Kilen 2023-02-07 20:54:04 -06:00
parent fefd02a369
commit 399f2f0187
4 changed files with 68 additions and 61 deletions

View file

@ -6,9 +6,8 @@ use std::collections::BTreeMap;
use std::io::{self, Read, Seek, Write}; use std::io::{self, Read, Seek, Write};
#[derive(Debug)] #[derive(Debug)]
pub struct PakReader<R: Read + Seek> { pub struct PakReader {
pak: Pak, pak: Pak,
reader: R,
key: Option<aes::Aes256Dec>, key: Option<aes::Aes256Dec>,
} }
@ -58,6 +57,13 @@ impl Index {
} }
} }
fn into_entries(self) -> BTreeMap<String, super::entry::Entry> {
match self {
Index::V1(index) => index.entries,
Index::V2(index) => index.entries,
}
}
fn add_entry(&mut self, path: &str, entry: super::entry::Entry) { fn add_entry(&mut self, path: &str, entry: super::entry::Entry) {
match self { match self {
Index::V1(index) => index.entries.insert(path.to_string(), entry), Index::V1(index) => index.entries.insert(path.to_string(), entry),
@ -89,12 +95,15 @@ fn decrypt(key: &Option<aes::Aes256Dec>, bytes: &mut [u8]) -> Result<(), super::
} }
} }
impl<R: Read + Seek> PakReader<R> { impl PakReader {
pub fn new_any(mut reader: R, key: Option<aes::Aes256Dec>) -> Result<Self, super::Error> { pub fn new_any<R: Read + Seek>(
mut reader: R,
key: Option<aes::Aes256Dec>,
) -> Result<Self, super::Error> {
for ver in Version::iter() { for ver in Version::iter() {
match Pak::read(&mut reader, ver, key.clone()) { match Pak::read(&mut reader, ver, key.clone()) {
Ok(pak) => { Ok(pak) => {
return Ok(PakReader { pak, reader, key }); return Ok(PakReader { pak, key });
} }
_ => continue, _ => continue,
} }
@ -102,10 +111,6 @@ impl<R: Read + Seek> PakReader<R> {
Err(super::Error::Other("version unsupported")) Err(super::Error::Other("version unsupported"))
} }
pub fn into_reader(self) -> R {
self.reader
}
pub fn version(&self) -> super::Version { pub fn version(&self) -> super::Version {
self.pak.version self.pak.version
} }
@ -114,36 +119,30 @@ impl<R: Read + Seek> PakReader<R> {
&self.pak.mount_point &self.pak.mount_point
} }
pub fn get(&mut self, path: &str) -> Result<Vec<u8>, super::Error> { pub fn get<R: Read + Seek>(
&mut self,
path: &str,
reader: &mut R,
) -> Result<Vec<u8>, super::Error> {
let mut data = Vec::new(); let mut data = Vec::new();
self.read_file(path, &mut data)?; self.read_file(path, reader, &mut data)?;
Ok(data) Ok(data)
} }
pub fn read_file<W: io::Write>( pub fn read_file<R: Read + Seek, W: io::Write>(
&mut self, &self,
path: &str, path: &str,
reader: &mut R,
writer: &mut W, writer: &mut W,
) -> Result<(), super::Error> { ) -> Result<(), super::Error> {
match self.pak.index.entries().get(path) { match self.pak.index.entries().get(path) {
Some(entry) => entry.read_file( Some(entry) => entry.read_file(reader, self.pak.version, self.key.as_ref(), writer),
&mut self.reader,
self.pak.version,
self.key.as_ref(),
writer,
),
None => Err(super::Error::Other("no file found at given path")), None => Err(super::Error::Other("no file found at given path")),
} }
} }
pub fn files(&self) -> std::vec::IntoIter<String> { pub fn files(&self) -> Vec<String> {
self.pak self.pak.index.entries().keys().cloned().collect()
.index
.entries()
.keys()
.cloned()
.collect::<Vec<String>>()
.into_iter()
} }
} }
@ -588,17 +587,18 @@ mod test {
use std::io::Cursor; use std::io::Cursor;
let bytes = include_bytes!("../tests/packs/pack_v8b.pak"); let bytes = include_bytes!("../tests/packs/pack_v8b.pak");
let mut reader = super::PakReader::new_any(Cursor::new(bytes), None).unwrap(); let mut reader = Cursor::new(bytes);
let mut pak_reader = super::PakReader::new_any(&mut reader, None).unwrap();
let writer = Cursor::new(vec![]); let writer = Cursor::new(vec![]);
let mut pak_writer = super::PakWriter::new( let mut pak_writer = super::PakWriter::new(
writer, writer,
None, None,
super::Version::V8B, super::Version::V8B,
reader.mount_point().to_owned(), pak_reader.mount_point().to_owned(),
); );
for path in reader.files() { for path in pak_reader.files() {
let data = reader.get(&path).unwrap(); let data = pak_reader.get(&path, &mut reader).unwrap();
pak_writer pak_writer
.write_file(&path, &mut std::io::Cursor::new(data)) .write_file(&path, &mut std::io::Cursor::new(data))
.unwrap(); .unwrap();
@ -613,17 +613,18 @@ mod test {
use std::io::Cursor; use std::io::Cursor;
let bytes = include_bytes!("../tests/packs/pack_v11.pak"); let bytes = include_bytes!("../tests/packs/pack_v11.pak");
let mut reader = super::PakReader::new_any(Cursor::new(bytes), None).unwrap(); let mut reader = Cursor::new(bytes);
let mut pak_reader = super::PakReader::new_any(&mut reader, None).unwrap();
let writer = Cursor::new(vec![]); let writer = Cursor::new(vec![]);
let mut pak_writer = super::PakWriter::new( let mut pak_writer = super::PakWriter::new(
writer, writer,
None, None,
super::Version::V11, super::Version::V11,
reader.mount_point().to_owned(), pak_reader.mount_point().to_owned(),
); );
for path in reader.files() { for path in pak_reader.files() {
let data = reader.get(&path).unwrap(); let data = pak_reader.get(&path, &mut reader).unwrap();
pak_writer pak_writer
.write_file(&path, &mut std::io::Cursor::new(data)) .write_file(&path, &mut std::io::Cursor::new(data))
.unwrap(); .unwrap();

View file

@ -119,11 +119,9 @@ macro_rules! encryptindex {
let mut inner_reader = std::io::Cursor::new(include_bytes!(concat!("packs/pack_", $version, $compress, $encrypt, $encryptindex, ".pak"))); let mut inner_reader = std::io::Cursor::new(include_bytes!(concat!("packs/pack_", $version, $compress, $encrypt, $encryptindex, ".pak")));
let len = inner_reader.seek(SeekFrom::End(0)).unwrap(); let len = inner_reader.seek(SeekFrom::End(0)).unwrap();
let mut reader = ReadCounter::new_size(inner_reader, len as usize);
let mut pak = repak::PakReader::new_any( let pak = repak::PakReader::new_any(&mut reader, Some(key)).unwrap();
ReadCounter::new_size(inner_reader, len as usize),
Some(key),
).unwrap();
assert_eq!(pak.mount_point(), "../mount/point/root/"); assert_eq!(pak.mount_point(), "../mount/point/root/");
assert_eq!(pak.version(), $exp_version); assert_eq!(pak.version(), $exp_version);
@ -134,7 +132,7 @@ macro_rules! encryptindex {
for file in files { for file in files {
let mut buf = vec![]; let mut buf = vec![];
let mut writer = std::io::Cursor::new(&mut buf); let mut writer = std::io::Cursor::new(&mut buf);
pak.read_file(&file, &mut writer).unwrap(); pak.read_file(&file, &mut reader, &mut writer).unwrap();
match file.as_str() { match file.as_str() {
"test.txt" => assert_eq!(buf, include_bytes!("pack/root/test.txt"), "test.txt incorrect contents"), "test.txt" => assert_eq!(buf, include_bytes!("pack/root/test.txt"), "test.txt incorrect contents"),
"test.png" => assert_eq!(buf, include_bytes!("pack/root/test.png"), "test.png incorrect contents"), "test.png" => assert_eq!(buf, include_bytes!("pack/root/test.png"), "test.png incorrect contents"),
@ -144,7 +142,7 @@ macro_rules! encryptindex {
} }
} }
for r in pak.into_reader().into_reads() { for r in reader.into_reads() {
// sanity check. a pak file can be constructed with a lot of dead space // sanity check. a pak file can be constructed with a lot of dead space
// which wouldn't have to be read, but so far all bytes in paks generated // which wouldn't have to be read, but so far all bytes in paks generated
// by UnrealPak are meaningful // by UnrealPak are meaningful

View file

@ -15,4 +15,5 @@ aes = { workspace = true }
base64 = { workspace = true } base64 = { workspace = true }
clap = { version = "4.1.4", features = ["derive"] } clap = { version = "4.1.4", features = ["derive"] }
path-clean = "0.1.0" path-clean = "0.1.0"
rayon = "1.6.1"
repak = { version = "0.1.0", path = "../repak" } repak = { version = "0.1.0", path = "../repak" }

View file

@ -4,6 +4,7 @@ use std::path::{Path, PathBuf};
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use path_clean::PathClean; use path_clean::PathClean;
use rayon::prelude::*;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
struct ActionInfo { struct ActionInfo {
@ -133,7 +134,7 @@ fn list(args: ActionInfo) -> Result<(), repak::Error> {
} }
fn unpack(args: ActionUnpack) -> Result<(), repak::Error> { fn unpack(args: ActionUnpack) -> Result<(), repak::Error> {
let mut pak = repak::PakReader::new_any( let pak = repak::PakReader::new_any(
BufReader::new(File::open(&args.input)?), BufReader::new(File::open(&args.input)?),
args.aes_key.map(|k| aes_key(k.as_str())).transpose()?, args.aes_key.map(|k| aes_key(k.as_str())).transpose()?,
)?; )?;
@ -151,25 +152,31 @@ fn unpack(args: ActionUnpack) -> Result<(), repak::Error> {
} }
let mount_point = PathBuf::from(pak.mount_point()); let mount_point = PathBuf::from(pak.mount_point());
let prefix = Path::new(&args.strip_prefix); let prefix = Path::new(&args.strip_prefix);
for file in pak.files() { pak.files().into_par_iter().try_for_each_init(
if args.verbose { || File::open(&args.input),
println!("extracting {}", &file); |file, path| -> Result<(), repak::Error> {
} if args.verbose {
let file_path = output.join( println!("extracting {path}");
mount_point }
.join(&file) let file_path = output.join(
.strip_prefix(prefix) mount_point
.map_err(|_| repak::Error::Other("prefix does not match"))?, .join(&path)
); .strip_prefix(prefix)
if !file_path.clean().starts_with(&output) { .map_err(|_| repak::Error::Other("prefix does not match"))?,
return Err(repak::Error::Other( );
"tried to write file outside of output directory", if !file_path.clean().starts_with(&output) {
)); return Err(repak::Error::Other(
} "tried to write file outside of output directory",
fs::create_dir_all(file_path.parent().expect("will be a file"))?; ));
pak.read_file(&file, &mut fs::File::create(file_path)?)?; }
} fs::create_dir_all(file_path.parent().expect("will be a file"))?;
Ok(()) pak.read_file(
&path,
&mut BufReader::new(file.as_ref().unwrap()), // TODO: avoid this unwrap
&mut fs::File::create(file_path)?,
)
},
)
} }
fn pack(args: ActionPack) -> Result<(), repak::Error> { fn pack(args: ActionPack) -> Result<(), repak::Error> {