From 255486b96215ccfa13e3d14738b6ae79b152e296 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Fri, 17 Jan 2025 18:12:18 -0600 Subject: [PATCH 01/22] Change Oodle source and add compression wrapper --- Cargo.lock | 53 +--- oodle_loader/Cargo.toml | 13 +- oodle_loader/src/lib.rs | 635 +++++++++++++++++----------------------- repak/src/entry.rs | 2 +- repak/src/lib.rs | 2 +- repak/src/pak.rs | 2 +- 6 files changed, 270 insertions(+), 437 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5659237..cc3d9d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -229,21 +229,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc" -version = "3.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" -dependencies = [ - "crc-catalog", -] - -[[package]] -name = "crc-catalog" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" - [[package]] name = "crc32fast" version = "1.4.2" @@ -416,12 +401,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "hex-literal" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" - [[package]] name = "icu_collections" version = "1.5.0" @@ -658,16 +637,6 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" -[[package]] -name = "lzma-rs" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" -dependencies = [ - "byteorder", - "crc", -] - [[package]] name = "memchr" version = "2.7.4" @@ -689,15 +658,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" -[[package]] -name = "object" -version = "0.36.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "memchr", -] - [[package]] name = "once_cell" version = "1.20.2" @@ -710,13 +670,8 @@ version = "0.2.2" dependencies = [ "anyhow", "hex", - "hex-literal", - "libc", "libloading", - "lzma-rs", - "object", - "seq-macro", - "sha1", + "sha2", "ureq", ] @@ -941,12 +896,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - [[package]] name = "serde" version = "1.0.217" diff --git a/oodle_loader/Cargo.toml b/oodle_loader/Cargo.toml index 7e57ccd..4dbfff7 100644 --- a/oodle_loader/Cargo.toml +++ b/oodle_loader/Cargo.toml @@ -6,18 +6,9 @@ license.workspace = true version.workspace = true edition.workspace = true -[target.'cfg(windows)'.dependencies] -libloading = "0.8" - -[target.'cfg(unix)'.dependencies] -object = { version = "0.36.7", default-features = false, features = ["std", "read"] } -libc = "0.2.169" -seq-macro = "0.3.5" - [dependencies] -sha1 = { workspace = true } +libloading = "0.8" ureq = "2.12" -hex-literal = "0.4" hex = { workspace = true } anyhow = "1.0.95" -lzma-rs = "0.3.0" +sha2 = "0.10.8" diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index 2803770..7066088 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -1,405 +1,298 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{bail, Result}; -use std::sync::OnceLock; +use std::{ + io::{Read, Write}, + sync::OnceLock, +}; -type OodleDecompress = fn(comp_buf: &[u8], raw_buf: &mut [u8]) -> i32; +pub use oodle_lz::{CompressionLevel, Compressor}; -#[allow(non_camel_case_types)] -type OodleLZ_Decompress = unsafe extern "win64" fn( - compBuf: *const u8, - compBufSize: usize, - rawBuf: *mut u8, - rawLen: usize, - fuzzSafe: u32, - checkCRC: u32, - verbosity: u32, - decBufBase: u64, - decBufSize: usize, - fpCallback: u64, - callbackUserData: u64, - decoderMemory: *mut u8, - decoderMemorySize: usize, - threadPhase: u32, -) -> i32; +mod oodle_lz { + #[derive(Debug, Clone, Copy)] + #[repr(i32)] + pub enum Compressor { + /// None = memcpy, pass through uncompressed bytes + None = 3, -pub fn decompress() -> Result> { - #[cfg(windows)] - return Ok(windows_oodle::decompress_wrapper_windows); - #[cfg(unix)] - return Ok(linux_oodle::oodle_loader_linux()); -} - -fn call_decompress(comp_buf: &[u8], raw_buf: &mut [u8], decompress: OodleLZ_Decompress) -> i32 { - unsafe { - decompress( - comp_buf.as_ptr(), - comp_buf.len(), - raw_buf.as_mut_ptr(), - raw_buf.len(), - 1, - 1, - 0, - 0, - 0, - 0, - 0, - std::ptr::null_mut(), - 0, - 3, - ) - } -} - -static OODLE_HASH: [u8; 20] = hex_literal::hex!("4bcc73614cb8fd2b0bce8d0f91ee5f3202d9d624"); -static OODLE_DLL_NAME: &str = "oo2core_9_win64.dll"; - -fn fetch_oodle() -> Result { - use sha1::{Digest, Sha1}; - - let oodle_path = std::env::current_exe()?.with_file_name(OODLE_DLL_NAME); - - if !oodle_path.exists() { - let mut compressed = vec![]; - ureq::get("https://origin.warframe.com/origin/50F7040A/index.txt.lzma") - .call()? - .into_reader() - .read_to_end(&mut compressed)?; - - let mut decompressed = vec![]; - lzma_rs::lzma_decompress(&mut std::io::Cursor::new(compressed), &mut decompressed).unwrap(); - let index = String::from_utf8(decompressed)?; - let line = index - .lines() - .find(|l| l.contains(OODLE_DLL_NAME)) - .with_context(|| format!("{OODLE_DLL_NAME} not found in index"))?; - let path = line.split_once(',').context("failed to parse index")?.0; - - let mut compressed = vec![]; - ureq::get(&format!("https://content.warframe.com{path}")) - .call()? - .into_reader() - .read_to_end(&mut compressed)?; - - let mut decompressed = vec![]; - lzma_rs::lzma_decompress(&mut std::io::Cursor::new(compressed), &mut decompressed).unwrap(); - - std::fs::write(&oodle_path, decompressed)?; + /// Fast decompression and high compression ratios, amazing! + Kraken = 8, + /// Leviathan = Kraken's big brother with higher compression, slightly slower decompression. + Leviathan = 13, + /// Mermaid is between Kraken & Selkie - crazy fast, still decent compression. + Mermaid = 9, + /// Selkie is a super-fast relative of Mermaid. For maximum decode speed. + Selkie = 11, + /// Hydra, the many-headed beast = Leviathan, Kraken, Mermaid, or Selkie (see $OodleLZ_About_Hydra) + Hydra = 12, } - let mut hasher = Sha1::new(); - hasher.update(std::fs::read(&oodle_path)?); - let hash = hasher.finalize(); - (hash[..] == OODLE_HASH).then_some(()).ok_or_else(|| { - anyhow!( - "oodle hash mismatch expected: {} got: {} ", - hex::encode(OODLE_HASH), - hex::encode(hash) - ) - })?; + #[derive(Debug, Clone, Copy)] + #[repr(i32)] + pub enum CompressionLevel { + /// don't compress, just copy raw bytes + None = 0, + /// super fast mode, lower compression ratio + SuperFast = 1, + /// fastest LZ mode with still decent compression ratio + VeryFast = 2, + /// fast - good for daily use + Fast = 3, + /// standard medium speed LZ mode + Normal = 4, - Ok(oodle_path) + /// optimal parse level 1 (faster optimal encoder) + Optimal1 = 5, + /// optimal parse level 2 (recommended baseline optimal encoder) + Optimal2 = 6, + /// optimal parse level 3 (slower optimal encoder) + Optimal3 = 7, + /// optimal parse level 4 (very slow optimal encoder) + Optimal4 = 8, + /// optimal parse level 5 (don't care about encode speed, maximum compression) + Optimal5 = 9, + + /// faster than SuperFast, less compression + HyperFast1 = -1, + /// faster than HyperFast1, less compression + HyperFast2 = -2, + /// faster than HyperFast2, less compression + HyperFast3 = -3, + /// fastest, less compression + HyperFast4 = -4, + } + + pub type Compress = unsafe extern "system" fn( + compressor: Compressor, + rawBuf: *const u8, + rawLen: usize, + compBuf: *mut u8, + level: CompressionLevel, + pOptions: *const (), + dictionaryBase: *const (), + lrm: *const (), + scratchMem: *mut u8, + scratchSize: usize, + ) -> isize; + + pub type Decompress = unsafe extern "system" fn( + compBuf: *const u8, + compBufSize: usize, + rawBuf: *mut u8, + rawLen: usize, + fuzzSafe: u32, + checkCRC: u32, + verbosity: u32, + decBufBase: u64, + decBufSize: usize, + fpCallback: u64, + callbackUserData: u64, + decoderMemory: *mut u8, + decoderMemorySize: usize, + threadPhase: u32, + ) -> isize; + + pub type GetCompressedBufferSizeNeeded = + unsafe extern "system" fn(compressor: Compressor, rawSize: usize) -> usize; } -#[cfg(windows)] -mod windows_oodle { - use super::*; +static OODLE_VERSION: &str = "2.9.10"; +static OODLE_BASE_URL: &str = "https://github.com/WorkingRobot/OodleUE/raw/refs/heads/main/Engine/Source/Programs/Shared/EpicGames.Oodle/Sdk/"; - static DECOMPRESS: OnceLock<(OodleLZ_Decompress, libloading::Library)> = OnceLock::new(); - - pub fn decompress_wrapper_windows(comp_buf: &[u8], raw_buf: &mut [u8]) -> i32 { - let decompress = DECOMPRESS.get_or_init(|| { - let path = fetch_oodle().context("failed to fetch oodle").unwrap(); - - let lib = unsafe { libloading::Library::new(path) } - .context("failed to load oodle") - .unwrap(); - - (*unsafe { lib.get(b"OodleLZ_Decompress") }.unwrap(), lib) - }); - call_decompress(comp_buf, raw_buf, decompress.0) - } +struct OodlePlatform { + path: &'static str, + name: &'static str, + hash: &'static str, } #[cfg(unix)] -mod linux_oodle { - use super::*; +static OODLE_PLATFORM: OodlePlatform = OodlePlatform { + path: "linux/lib", + name: "liboo2corelinux64.so.9", + hash: "ed7e98f70be1254a80644efd3ae442ff61f854a2fe9debb0b978b95289884e9c", +}; - use object::pe::{ - ImageNtHeaders64, IMAGE_REL_BASED_DIR64, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, - IMAGE_SCN_MEM_WRITE, - }; - use object::read::pe::{ImageOptionalHeader, ImageThunkData, PeFile64}; +#[cfg(windows)] +static OODLE_PLATFORM: OodlePlatform = OodlePlatform { + path: "win/redist", + name: "oo2core_9_win64.dll", + hash: "6f5d41a7892ea6b2db420f2458dad2f84a63901c9a93ce9497337b16c195f457", +}; - use object::{LittleEndian as LE, Object, ObjectSection}; - use std::collections::HashMap; - use std::ffi::{c_void, CStr}; +fn url() -> String { + format!( + "{OODLE_BASE_URL}/{}/{}/{}", + OODLE_VERSION, OODLE_PLATFORM.path, OODLE_PLATFORM.name + ) +} - #[repr(C)] - struct ThreadInformationBlock { - exception_list: *const c_void, - stack_base: *const c_void, - stack_limit: *const c_void, - sub_system_tib: *const c_void, - fiber_data: *const c_void, - arbitrary_user_pointer: *const c_void, - teb: *const c_void, +fn check_hash(buffer: &[u8]) -> Result<()> { + use sha2::{Digest, Sha256}; + + let mut hasher = Sha256::new(); + hasher.update(buffer); + let hash = hex::encode(hasher.finalize()); + if hash != OODLE_PLATFORM.hash { + anyhow::bail!( + "Oodle library hash mismatch: expected {} got {}", + OODLE_PLATFORM.hash, + hash + ); } - const TIB: ThreadInformationBlock = ThreadInformationBlock { - exception_list: std::ptr::null(), - stack_base: std::ptr::null(), - stack_limit: std::ptr::null(), - sub_system_tib: std::ptr::null(), - fiber_data: std::ptr::null(), - arbitrary_user_pointer: std::ptr::null(), - teb: std::ptr::null(), - }; + Ok(()) +} - static DECOMPRESS: OnceLock = OnceLock::new(); +fn fetch_oodle() -> Result { + let oodle_path = std::env::current_exe()?.with_file_name(OODLE_PLATFORM.name); + if !oodle_path.exists() { + let mut buffer = vec![]; + ureq::get(&url()) + .call()? + .into_reader() + .read_to_end(&mut buffer)?; + check_hash(&buffer)?; + std::fs::write(&oodle_path, buffer)?; + } + check_hash(&std::fs::read(&oodle_path)?)?; + Ok(oodle_path) +} - fn decompress_wrapper(comp_buf: &[u8], raw_buf: &mut [u8]) -> i32 { +pub struct Oodle { + _library: libloading::Library, + compress: oodle_lz::Compress, + decompress: oodle_lz::Decompress, + get_compressed_buffer_size_needed: oodle_lz::GetCompressedBufferSizeNeeded, +} +impl Oodle { + pub fn compress( + &self, + input: &[u8], + mut output: S, + compressor: Compressor, + compression_level: CompressionLevel, + ) -> Result { unsafe { - // Set GS register in calling thread - const ARCH_SET_GS: i32 = 0x1001; - libc::syscall(libc::SYS_arch_prctl, ARCH_SET_GS, &TIB); + let buffer_size = self.get_compressed_buffer_size_needed(compressor, input.len()); + let mut buffer = vec![0; buffer_size]; - // Call actual decompress function - call_decompress(comp_buf, raw_buf, *DECOMPRESS.get().unwrap()) - } - } - - #[allow(non_snake_case)] - mod imports { - use super::*; - - pub unsafe extern "win64" fn OutputDebugStringA(string: *const std::ffi::c_char) { - print!("[OODLE] {}", CStr::from_ptr(string).to_string_lossy()); - } - pub unsafe extern "win64" fn GetProcessHeap() -> *const c_void { - 0x12345678 as *const c_void - } - pub unsafe extern "win64" fn HeapAlloc( - _heap: *const c_void, - flags: i32, - size: usize, - ) -> *const c_void { - assert_eq!(0, flags); - libc::malloc(size) - } - pub unsafe extern "win64" fn HeapFree( - _heap: *const c_void, - _flags: i32, - ptr: *mut c_void, - ) -> bool { - libc::free(ptr); - true - } - pub unsafe extern "win64" fn memset( - ptr: *mut c_void, - value: i32, - num: usize, - ) -> *const c_void { - libc::memset(ptr, value, num) - } - pub unsafe extern "win64" fn memmove( - destination: *mut c_void, - source: *const c_void, - num: usize, - ) -> *const c_void { - libc::memmove(destination, source, num) - } - pub unsafe extern "win64" fn memcpy( - destination: *mut c_void, - source: *const c_void, - num: usize, - ) -> *const c_void { - libc::memcpy(destination, source, num) - } - } - - // Create some unique function pointers to use for unimplemented imports - const DEBUG_FNS: [*const fn(); 100] = gen_debug_fns(); - static mut DEBUG_NAMES: [&str; 100] = [""; 100]; - const fn gen_debug_fns() -> [*const fn(); 100] { - fn log() { - unimplemented!("import {:?}", unsafe { DEBUG_NAMES[I] }); - } - let mut array = [std::ptr::null(); 100]; - seq_macro::seq!(N in 0..100 { - array[N] = log:: as *const fn(); - }); - array - } - - pub fn oodle_loader_linux() -> OodleDecompress { - DECOMPRESS.get_or_init(|| get_decompress_inner().unwrap()); - decompress_wrapper - } - - fn get_decompress_inner() -> Result { - fetch_oodle()?; - let oodle = std::env::current_exe() - .unwrap() - .with_file_name(OODLE_DLL_NAME); - let dll = std::fs::read(oodle)?; - - let obj_file = PeFile64::parse(&*dll)?; - - let size = obj_file.nt_headers().optional_header.size_of_image() as usize; - let header_size = obj_file.nt_headers().optional_header.size_of_headers() as usize; - - let image_base = obj_file.relative_address_base() as usize; - - // Create map - let mmap = unsafe { - std::slice::from_raw_parts_mut( - libc::mmap( - std::ptr::null_mut(), - size, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - -1, - 0, - ) as *mut u8, - size, - ) - }; - - let map_base = mmap.as_ptr(); - - // Copy header to map - mmap[0..header_size].copy_from_slice(&dll[0..header_size]); - unsafe { - assert_eq!( + let len = (self.compress)( + compressor, + input.as_ptr(), + input.len(), + buffer.as_mut_ptr(), + compression_level, + std::ptr::null(), + std::ptr::null(), + std::ptr::null(), + std::ptr::null_mut(), 0, - libc::mprotect( - mmap.as_mut_ptr() as *mut c_void, - header_size, - libc::PROT_READ - ) ); - } - // Copy section data to map - for section in obj_file.sections() { - let address = section.address() as usize; - let data = section.data()?; - mmap[(address - image_base)..(address - image_base + data.len())] - .copy_from_slice(section.data()?); - } - - // Apply relocations - let sections = obj_file.section_table(); - let mut blocks = obj_file - .data_directories() - .relocation_blocks(&*dll, §ions)? - .unwrap(); - - while let Some(block) = blocks.next()? { - let block_address = block.virtual_address(); - let block_data = sections.pe_data_at(&*dll, block_address).map(object::Bytes); - for reloc in block { - let offset = (reloc.virtual_address - block_address) as usize; - match reloc.typ { - IMAGE_REL_BASED_DIR64 => { - let addend = block_data - .and_then(|data| data.read_at::>(offset).ok()) - .map(|addend| addend.get(LE)); - if let Some(addend) = addend { - mmap[reloc.virtual_address as usize - ..reloc.virtual_address as usize + 8] - .copy_from_slice(&u64::to_le_bytes( - addend - image_base as u64 + map_base as u64, - )); - } - } - _ => unimplemented!(), - } + if len == -1 { + bail!("Oodle compression failed"); } + let len = len as usize; + + output.write_all(&buffer[..len])?; + + Ok(len) } - - // Fix up imports - let import_table = obj_file.import_table()?.unwrap(); - let mut import_descs = import_table.descriptors()?; - - let mut i = 0; - while let Some(import_desc) = import_descs.next()? { - let mut thunks = import_table.thunks(import_desc.original_first_thunk.get(LE))?; - - let mut address = import_desc.first_thunk.get(LE) as usize; - while let Some(thunk) = thunks.next::()? { - let (_hint, name) = import_table.hint_name(thunk.address())?; - let name = String::from_utf8_lossy(name).to_string(); - - use imports::*; - - let fn_addr = match name.as_str() { - "OutputDebugStringA" => OutputDebugStringA as usize, - "GetProcessHeap" => GetProcessHeap as usize, - "HeapAlloc" => HeapAlloc as usize, - "HeapFree" => HeapFree as usize, - "memset" => memset as usize, - "memcpy" => memcpy as usize, - "memmove" => memmove as usize, - _ => { - unsafe { DEBUG_NAMES[i] = name.leak() } - let a = DEBUG_FNS[i] as usize; - i += 1; - a - } - }; - - mmap[address..address + 8].copy_from_slice(&usize::to_le_bytes(fn_addr)); - - address += 8; - } + } + pub fn decompress(&self, input: &[u8], output: &mut [u8]) -> isize { + unsafe { + (self.decompress)( + input.as_ptr(), + input.len(), + output.as_mut_ptr(), + output.len(), + 1, + 1, + 0, + 0, + 0, + 0, + 0, + std::ptr::null_mut(), + 0, + 3, + ) } + } + fn get_compressed_buffer_size_needed( + &self, + compressor: oodle_lz::Compressor, + raw_buffer: usize, + ) -> usize { + unsafe { (self.get_compressed_buffer_size_needed)(compressor, raw_buffer) } + } +} - // Build export table - let mut exports = HashMap::new(); - for export in obj_file.exports()? { - let name = String::from_utf8_lossy(export.name()); - let address = export.address() - image_base as u64 + map_base as u64; - exports.insert(name, address as *const c_void); - } +static OODLE: OnceLock> = OnceLock::new(); - // Fix section permissions - for section in obj_file.sections() { - let address = section.address() as usize; - let data = section.data()?; - let size = data.len(); +fn load_oodle() -> Result { + let path = fetch_oodle()?; + unsafe { + let library = libloading::Library::new(path)?; - let mut permissions = 0; - - let flags = match section.flags() { - object::SectionFlags::Coff { characteristics } => characteristics, - _ => unreachable!(), - }; - - if 0 != flags & IMAGE_SCN_MEM_READ { - permissions |= libc::PROT_READ; - } - if 0 != flags & IMAGE_SCN_MEM_WRITE { - permissions |= libc::PROT_WRITE; - } - if 0 != flags & IMAGE_SCN_MEM_EXECUTE { - permissions |= libc::PROT_EXEC; - } - - unsafe { - assert_eq!( - 0, - libc::mprotect( - mmap.as_mut_ptr().add(address - image_base) as *mut c_void, - size, - permissions - ) - ); - } - } - - Ok(unsafe { - std::mem::transmute::<*const c_void, OodleLZ_Decompress>(exports["OodleLZ_Decompress"]) + Ok(Oodle { + compress: *library.get(b"OodleLZ_Compress")?, + decompress: *library.get(b"OodleLZ_Decompress")?, + get_compressed_buffer_size_needed: *library + .get(b"OodleLZ_GetCompressedBufferSizeNeeded")?, + _library: library, }) } } + +pub fn oodle() -> Result<&'static Oodle, Box> { + let mut result = None; + let oodle = OODLE.get_or_init(|| match load_oodle() { + Err(err) => { + result = Some(Err(err)); + None + } + Ok(oodle) => Some(oodle), + }); + match (result, oodle) { + // oodle initialized so return + (_, Some(oodle)) => Ok(oodle), + // error during initialization + (Some(result), _) => result?, + // no error because initialization was tried and failed before + _ => Err(anyhow::anyhow!("oodle failed to initialized previously").into()), + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_oodle() { + let oodle = oodle().unwrap(); + + let data = b"In tools and when compressing large inputs in one call, consider using + $OodleXLZ_Compress_AsyncAndWait (in the Oodle2 Ext lib) instead to get parallelism. Alternatively, + chop the data into small fixed size chunks (we recommend at least 256KiB, i.e. 262144 bytes) and + call compress on each of them, which decreases compression ratio but makes for trivial parallel + compression and decompression."; + + let mut buffer = vec![]; + oodle + .compress( + data, + &mut buffer, + Compressor::Mermaid, + CompressionLevel::Optimal5, + ) + .unwrap(); + + std::fs::write("comp.bin", &buffer).unwrap(); + dbg!((data.len(), buffer.len())); + + let mut uncomp = vec![0; data.len()]; + oodle.decompress(&buffer, &mut uncomp); + + assert_eq!(data[..], uncomp[..]); + } +} diff --git a/repak/src/entry.rs b/repak/src/entry.rs index b5f25e7..a361eab 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -535,7 +535,7 @@ impl Entry { .min(self.uncompressed as usize - compress_offset) }; let buffer = &mut data[range]; - let out = oodle( + let out = oodle.decompress( buffer, &mut decompressed[decompress_offset..decompress_offset + decomp], ); diff --git a/repak/src/lib.rs b/repak/src/lib.rs index ec4292f..458ea12 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -11,7 +11,7 @@ pub const MAGIC: u32 = 0x5A6F12E1; #[cfg(feature = "oodle")] mod oodle { - pub type OodleGetter = fn() -> Result>; + pub type OodleGetter = fn() -> Result<&'static oodle_loader::Oodle, Box>; pub type OodleDecompress = fn(comp_buf: &[u8], raw_buf: &mut [u8]) -> i32; } diff --git a/repak/src/pak.rs b/repak/src/pak.rs index e3825d0..2fdc8d7 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -27,7 +27,7 @@ impl PakBuilder { #[cfg(not(feature = "oodle_implicit_dynamic"))] oodle: super::Oodle::None, #[cfg(feature = "oodle_implicit_dynamic")] - oodle: super::Oodle::Some(oodle_loader::decompress), + oodle: super::Oodle::Some(oodle_loader::oodle), allowed_compression: Default::default(), } } From dbe16c9001fed3802d724f1aed3c8ad1d39da6ce Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Fri, 17 Jan 2025 18:27:22 -0600 Subject: [PATCH 02/22] Replace anyhow with thiserror for oodle_loader --- Cargo.lock | 8 +------- oodle_loader/Cargo.toml | 2 +- oodle_loader/src/lib.rs | 35 +++++++++++++++++++++++++---------- repak/src/lib.rs | 2 +- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cc3d9d4..c246f27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,12 +68,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "anyhow" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" - [[package]] name = "assert_cmd" version = "2.0.16" @@ -668,10 +662,10 @@ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" name = "oodle_loader" version = "0.2.2" dependencies = [ - "anyhow", "hex", "libloading", "sha2", + "thiserror", "ureq", ] diff --git a/oodle_loader/Cargo.toml b/oodle_loader/Cargo.toml index 4dbfff7..c5255eb 100644 --- a/oodle_loader/Cargo.toml +++ b/oodle_loader/Cargo.toml @@ -10,5 +10,5 @@ edition.workspace = true libloading = "0.8" ureq = "2.12" hex = { workspace = true } -anyhow = "1.0.95" sha2 = "0.10.8" +thiserror = "2.0.11" diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index 7066088..753aec8 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -1,10 +1,10 @@ -use anyhow::{bail, Result}; - use std::{ io::{Read, Write}, sync::OnceLock, }; +type Result = std::result::Result; + pub use oodle_lz::{CompressionLevel, Compressor}; mod oodle_lz { @@ -125,6 +125,22 @@ fn url() -> String { ) } +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("hash mismatch expected: {expected} got {found}")] + HashMismatch { expected: String, found: String }, + #[error("Oodle compression failed")] + CompressionFailed, + #[error("Oodle initialization failed previously")] + InitializationFailed, + #[error("IO error {0:?}")] + Io(#[from] std::io::Error), + #[error("ureq error {0:?}")] + Ureq(#[from] ureq::Error), + #[error("libloading error {0:?}")] + LibLoading(#[from] libloading::Error), +} + fn check_hash(buffer: &[u8]) -> Result<()> { use sha2::{Digest, Sha256}; @@ -132,11 +148,10 @@ fn check_hash(buffer: &[u8]) -> Result<()> { hasher.update(buffer); let hash = hex::encode(hasher.finalize()); if hash != OODLE_PLATFORM.hash { - anyhow::bail!( - "Oodle library hash mismatch: expected {} got {}", - OODLE_PLATFORM.hash, - hash - ); + return Err(Error::HashMismatch { + expected: OODLE_PLATFORM.hash.into(), + found: hash, + }); } Ok(()) @@ -189,7 +204,7 @@ impl Oodle { ); if len == -1 { - bail!("Oodle compression failed"); + return Err(Error::CompressionFailed); } let len = len as usize; @@ -244,7 +259,7 @@ fn load_oodle() -> Result { } } -pub fn oodle() -> Result<&'static Oodle, Box> { +pub fn oodle() -> Result<&'static Oodle> { let mut result = None; let oodle = OODLE.get_or_init(|| match load_oodle() { Err(err) => { @@ -259,7 +274,7 @@ pub fn oodle() -> Result<&'static Oodle, Box> { // error during initialization (Some(result), _) => result?, // no error because initialization was tried and failed before - _ => Err(anyhow::anyhow!("oodle failed to initialized previously").into()), + _ => Err(Error::InitializationFailed), } } diff --git a/repak/src/lib.rs b/repak/src/lib.rs index 458ea12..fd98618 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -11,7 +11,7 @@ pub const MAGIC: u32 = 0x5A6F12E1; #[cfg(feature = "oodle")] mod oodle { - pub type OodleGetter = fn() -> Result<&'static oodle_loader::Oodle, Box>; + pub type OodleGetter = fn() -> Result<&'static oodle_loader::Oodle, oodle_loader::Error>; pub type OodleDecompress = fn(comp_buf: &[u8], raw_buf: &mut [u8]) -> i32; } From c2b5461f25ac1ecf98fe36d3b49a2219c19f6601 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 11:20:45 -0600 Subject: [PATCH 03/22] Add support for parallel file compression --- Cargo.lock | 60 +++++++++++++ repak/Cargo.toml | 2 + repak/src/data.rs | 190 ++++++++++++++++++++++++++++++++++++++++++ repak/src/entry.rs | 139 ++++-------------------------- repak/src/footer.rs | 11 ++- repak/src/lib.rs | 1 + repak/src/pak.rs | 101 ++++++++++++++++++++-- repak/tests/test.rs | 10 ++- repak_cli/src/main.rs | 21 +++-- 9 files changed, 389 insertions(+), 146 deletions(-) create mode 100644 repak/src/data.rs diff --git a/Cargo.lock b/Cargo.lock index c246f27..f6995a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -232,6 +232,28 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -251,6 +273,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -389,6 +420,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "hex" version = "0.4.3" @@ -646,6 +683,16 @@ dependencies = [ "adler2", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" @@ -669,6 +716,17 @@ dependencies = [ "ureq", ] +[[package]] +name = "pariter" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "324a62b9e7b5f270c0acc92a2040f8028bb643f959f9c068f11a7864f327e3d9" +dependencies = [ + "crossbeam", + "crossbeam-channel", + "num_cpus", +] + [[package]] name = "paste" version = "1.0.15" @@ -784,7 +842,9 @@ dependencies = [ "base64", "byteorder", "flate2", + "hex", "oodle_loader", + "pariter", "paste", "sha1", "strum", diff --git a/repak/Cargo.toml b/repak/Cargo.toml index c9e412f..07fea7e 100644 --- a/repak/Cargo.toml +++ b/repak/Cargo.toml @@ -25,6 +25,8 @@ oodle_loader = { path = "../oodle_loader", optional = true} thiserror = "2.0" sha1 = { workspace = true } strum = { workspace = true } +pariter = "0.5.1" +hex.workspace = true [dev-dependencies] base64 = { workspace = true } diff --git a/repak/src/data.rs b/repak/src/data.rs new file mode 100644 index 0000000..d6a1cca --- /dev/null +++ b/repak/src/data.rs @@ -0,0 +1,190 @@ +use crate::{ + entry::{Block, Entry}, + Compression, Error, Hash, Version, VersionMajor, +}; + +type Result = std::result::Result; + +pub(crate) struct PartialEntry { + compression: Option, + compressed_size: u64, + uncompressed_size: u64, + compression_block_size: u32, + pub(crate) blocks: Vec, + hash: Hash, +} +pub(crate) struct PartialBlock { + uncompressed_size: usize, + pub(crate) data: Vec, +} + +fn get_compression_slot( + version: Version, + compression_slots: &mut Vec>, + compression: Compression, +) -> Result { + let slot = compression_slots + .iter() + .enumerate() + .find(|(_, s)| **s == Some(compression)); + Ok(if let Some((i, _)) = slot { + // existing found + i + } else { + if version.version_major() < VersionMajor::FNameBasedCompression { + return Err(Error::Other(format!( + "cannot use {compression:?} prior to FNameBasedCompression (pak version 8)" + ))); + } + + // find empty slot + if let Some((i, empty_slot)) = compression_slots + .iter_mut() + .enumerate() + .find(|(_, s)| s.is_none()) + { + // empty found, set it to used compression type + *empty_slot = Some(compression); + i + } else { + // no empty slot found, add a new one + compression_slots.push(Some(compression)); + compression_slots.len() - 1 + } + } as u32) +} + +impl PartialEntry { + pub(crate) fn into_entry( + &self, + version: Version, + compression_slots: &mut Vec>, + file_offset: u64, + ) -> Result { + let compression_slot = self + .compression + .map(|c| get_compression_slot(version, compression_slots, c)) + .transpose()?; + + let blocks = (!self.blocks.is_empty()).then(|| { + let entry_size = + Entry::get_serialized_size(version, compression_slot, self.blocks.len() as u32); + + let mut offset = entry_size; + if version.version_major() < VersionMajor::RelativeChunkOffsets { + offset += file_offset; + }; + + self.blocks + .iter() + .map(|block| { + let start = offset; + offset += block.data.len() as u64; + let end = offset; + Block { start, end } + }) + .collect() + }); + + Ok(Entry { + offset: file_offset, + compressed: self.compressed_size, + uncompressed: self.uncompressed_size, + compression_slot, + timestamp: None, + hash: Some(self.hash.clone()), + blocks, + flags: 0, + compression_block_size: self.compression_block_size, + }) + } +} + +pub(crate) fn build_partial_entry( + //version: Version, + allowed_compression: &[Compression], + data: &[u8], +) -> Result { + // TODO hash needs to be post-compression/encryption + use sha1::{Digest, Sha1}; + let mut hasher = Sha1::new(); + + // TODO possibly select best compression based on some criteria instead of picking first + let compression = allowed_compression.first().cloned(); + let uncompressed_size = data.len() as u64; + let compression_block_size; + + let (blocks, compressed_size) = match compression { + #[cfg(not(feature = "compression"))] + Some(_) => { + unreachable!("should not be able to reach this point without compression feature") + } + #[cfg(feature = "compression")] + Some(compression) => { + compression_block_size = 0x10000; + let mut compressed_size = 0; + let mut blocks = vec![]; + for chunk in data.chunks(compression_block_size as usize) { + let data = compress(compression, chunk)?; + compressed_size += data.len() as u64; + hasher.update(&data); + blocks.push(PartialBlock { + uncompressed_size: chunk.len(), + data, + }) + } + + (blocks, compressed_size) + } + None => { + compression_block_size = 0; + hasher.update(&data); + (vec![], uncompressed_size) + } + }; + + Ok(PartialEntry { + compression, + compressed_size, + uncompressed_size, + compression_block_size, + blocks, + hash: Hash(hasher.finalize().into()), + }) +} + +fn compress(compression: Compression, data: &[u8]) -> Result> { + use std::io::Write; + + let compressed = match compression { + Compression::Zlib => { + let mut compress = + flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::fast()); + compress.write_all(data.as_ref())?; + compress.finish()? + } + Compression::Gzip => { + let mut compress = + flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::fast()); + compress.write_all(data.as_ref())?; + compress.finish()? + } + Compression::Zstd => zstd::stream::encode_all(data, 0)?, + Compression::Oodle => { + let mut output = vec![]; + oodle_loader::oodle() + .unwrap() + .compress( + data.as_ref(), + &mut output, + oodle_loader::Compressor::Mermaid, + oodle_loader::CompressionLevel::Normal, + ) + .unwrap(); + output + //return Err(Error::Other("writing Oodle compression unsupported".into())) + } + }; + + Ok(compressed) +} diff --git a/repak/src/entry.rs b/repak/src/entry.rs index a361eab..6e353e4 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -1,7 +1,8 @@ -use crate::Error; +use crate::{data::build_partial_entry, Error, Hash}; use super::{ext::BoolExt, ext::ReadExt, Compression, Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; +use oodle_loader::oodle; use std::io; #[derive(Debug, PartialEq, Clone, Copy)] @@ -10,7 +11,7 @@ pub(crate) enum EntryLocation { Index, } -#[derive(Debug)] +#[derive(Debug, Default, Clone)] pub(crate) struct Block { pub start: u64, pub end: u64, @@ -55,7 +56,7 @@ pub(crate) struct Entry { pub uncompressed: u64, pub compression_slot: Option, pub timestamp: Option, - pub hash: Option<[u8; 20]>, + pub hash: Option, pub blocks: Option>, pub flags: u8, pub compression_block_size: u32, @@ -103,127 +104,19 @@ impl Entry { version: Version, compression_slots: &mut Vec>, allowed_compression: &[Compression], - data: impl AsRef<[u8]>, - ) -> Result { - // TODO hash needs to be post-compression - use sha1::{Digest, Sha1}; - let mut hasher = Sha1::new(); - hasher.update(&data); - - let offset = writer.stream_position()?; - let len = data.as_ref().len() as u64; - - // TODO possibly select best compression based on some criteria instead of picking first - let compression = allowed_compression.first().cloned(); - - let compression_slot = if let Some(compression) = compression { - // find existing - let slot = compression_slots - .iter() - .enumerate() - .find(|(_, s)| **s == Some(compression)); - Some(if let Some((i, _)) = slot { - // existing found - i - } else { - if version.version_major() < VersionMajor::FNameBasedCompression { - return Err(Error::Other(format!( - "cannot use {compression:?} prior to FNameBasedCompression (pak version 8)" - ))); - } - - // find empty slot - if let Some((i, empty_slot)) = compression_slots - .iter_mut() - .enumerate() - .find(|(_, s)| s.is_none()) - { - // empty found, set it to used compression type - *empty_slot = Some(compression); - i - } else { - // no empty slot found, add a new one - compression_slots.push(Some(compression)); - compression_slots.len() - 1 - } - } as u32) + data: &[u8], + ) -> Result { + let partial_entry = build_partial_entry(allowed_compression, data)?; + let stream_position = writer.stream_position()?; + let entry = partial_entry.into_entry(version, compression_slots, stream_position)?; + entry.write(writer, version, crate::entry::EntryLocation::Data)?; + if partial_entry.blocks.is_empty() { + writer.write_all(&data)?; } else { - None - }; - - let (blocks, compressed) = match compression { - #[cfg(not(feature = "compression"))] - Some(_) => { - unreachable!("should not be able to reach this point without compression feature") + for block in partial_entry.blocks { + writer.write_all(&block.data)?; } - #[cfg(feature = "compression")] - Some(compression) => { - use std::io::Write; - - let entry_size = Entry::get_serialized_size(version, compression_slot, 1); - let data_offset = offset + entry_size; - - let compressed = match compression { - Compression::Zlib => { - let mut compress = flate2::write::ZlibEncoder::new( - Vec::new(), - flate2::Compression::fast(), - ); - compress.write_all(data.as_ref())?; - compress.finish()? - } - Compression::Gzip => { - let mut compress = - flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::fast()); - compress.write_all(data.as_ref())?; - compress.finish()? - } - Compression::Zstd => zstd::stream::encode_all(data.as_ref(), 0)?, - Compression::Oodle => { - return Err(Error::Other("writing Oodle compression unsupported".into())) - } - }; - - let compute_offset = |index: usize| -> u64 { - match version.version_major() >= VersionMajor::RelativeChunkOffsets { - true => index as u64 + (data_offset - offset), - false => index as u64 + data_offset, - } - }; - - let blocks = vec![Block { - start: compute_offset(0), - end: compute_offset(compressed.len()), - }]; - - (Some(blocks), Some(compressed)) - } - None => (None, None), - }; - - let entry = super::entry::Entry { - offset, - compressed: compressed - .as_ref() - .map(|c: &Vec| c.len() as u64) - .unwrap_or(len), - uncompressed: len, - compression_slot, - timestamp: None, - hash: Some(hasher.finalize().into()), - blocks, - flags: 0, - compression_block_size: compressed.as_ref().map(|_| len as u32).unwrap_or_default(), - }; - - entry.write(writer, version, EntryLocation::Data)?; - - if let Some(compressed) = compressed { - writer.write_all(&compressed)?; - } else { - writer.write_all(data.as_ref())?; } - Ok(entry) } @@ -243,7 +136,7 @@ impl Entry { n => Some(n - 1), }; let timestamp = (ver == VersionMajor::Initial).then_try(|| reader.read_u64::())?; - let hash = Some(reader.read_guid()?); + let hash = Some(Hash(reader.read_guid()?)); let blocks = (ver >= VersionMajor::CompressionEncryption && compression.is_some()) .then_try(|| reader.read_array(Block::read))?; let flags = (ver >= VersionMajor::CompressionEncryption) @@ -287,7 +180,7 @@ impl Entry { writer.write_u64::(self.timestamp.unwrap_or_default())?; } if let Some(hash) = self.hash { - writer.write_all(&hash)?; + writer.write_all(&hash.0)?; } else { panic!("hash missing"); } diff --git a/repak/src/footer.rs b/repak/src/footer.rs index 0bc35d5..7e3c1c4 100644 --- a/repak/src/footer.rs +++ b/repak/src/footer.rs @@ -1,4 +1,7 @@ -use crate::ext::{BoolExt, WriteExt}; +use crate::{ + ext::{BoolExt, WriteExt}, + Hash, +}; use super::{ext::ReadExt, Compression, Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; @@ -13,7 +16,7 @@ pub struct Footer { pub version_major: VersionMajor, pub index_offset: u64, pub index_size: u64, - pub hash: [u8; 20], + pub hash: Hash, pub frozen: bool, pub compression: Vec>, } @@ -29,7 +32,7 @@ impl Footer { VersionMajor::from_repr(reader.read_u32::()?).unwrap_or(version.version_major()); let index_offset = reader.read_u64::()?; let index_size = reader.read_u64::()?; - let hash = reader.read_guid()?; + let hash = Hash(reader.read_guid()?); let frozen = version.version_major() == VersionMajor::FrozenIndex && reader.read_bool()?; let compression = { let mut compression = Vec::with_capacity(match version { @@ -91,7 +94,7 @@ impl Footer { writer.write_u32::(self.version_major as u32)?; writer.write_u64::(self.index_offset)?; writer.write_u64::(self.index_size)?; - writer.write_all(&self.hash)?; + writer.write_all(&self.hash.0)?; if self.version_major == VersionMajor::FrozenIndex { writer.write_bool(self.frozen)?; } diff --git a/repak/src/lib.rs b/repak/src/lib.rs index fd98618..5ff71a8 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -1,4 +1,5 @@ #![allow(dead_code)] +mod data; mod entry; mod error; mod ext; diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 2fdc8d7..3b4d728 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -1,11 +1,21 @@ +use crate::data::build_partial_entry; use crate::entry::Entry; -use crate::Compression; +use crate::{Compression, Error}; use super::ext::{ReadExt, WriteExt}; use super::{Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use std::collections::BTreeMap; use std::io::{self, Read, Seek, Write}; +use std::sync::Arc; + +#[derive(Default, Clone, Copy)] +pub(crate) struct Hash(pub(crate) [u8; 20]); +impl std::fmt::Debug for Hash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Hash({})", hex::encode(self.0)) + } +} #[derive(Debug)] pub struct PakBuilder { @@ -89,6 +99,10 @@ pub struct PakWriter { allowed_compression: Vec, } +pub struct ParallelPakWriter { + tx: std::sync::mpsc::SyncSender<(String, Arc>)>, +} + #[derive(Debug)] pub(crate) struct Pak { version: Version, @@ -281,19 +295,94 @@ impl PakWriter { self.pak.version, &mut self.pak.compression, &self.allowed_compression, - data, + data.as_ref(), )?, ); Ok(()) } + pub fn parallel(&mut self, mut f: F) -> Result<&mut Self, E> + where + F: Send + Sync + FnMut(&mut ParallelPakWriter) -> Result<(), E>, + E: From + Send, + { + { + use pariter::IteratorExt as _; + + let (tx, rx) = std::sync::mpsc::sync_channel(0); + + pariter::scope(|scope| -> Result<(), E> { + let handle = scope.spawn(|_| -> Result<(), E> { + f(&mut ParallelPakWriter { tx })?; + Ok(()) + }); + + let result = rx + .into_iter() + .parallel_map_scoped( + scope, + |(path, data): (String, Arc>)| -> Result<_, Error> { + let partial_entry = + build_partial_entry(&self.allowed_compression, &data)?; + let data = partial_entry.blocks.is_empty().then(|| Arc::new(data)); + Ok((path, data, partial_entry)) + }, + ) + .try_for_each(|message| -> Result<(), Error> { + let stream_position = self.writer.stream_position()?; + let (path, data, partial_entry) = message?; + + let entry = partial_entry.into_entry( + self.pak.version, + &mut self.pak.compression, + stream_position, + )?; + + entry.write( + &mut self.writer, + self.pak.version, + crate::entry::EntryLocation::Data, + )?; + + self.pak.index.add_entry(&path, entry); + + if let Some(data) = data { + self.writer.write_all(&data)?; + } else { + for block in partial_entry.blocks { + self.writer.write_all(&block.data)?; + } + } + Ok(()) + }); + + if let Err(err) = handle.join().unwrap() { + Err(err.into()) // prioritize error from user code + } else if let Err(err) = result { + Err(err.into()) // user code was successful, check pak writer error + } else { + Ok(()) // neither returned error so return success + } + }) + .unwrap()?; + } + Ok(self) + } + pub fn write_index(mut self) -> Result { self.pak.write(&mut self.writer, &self.key)?; Ok(self.writer) } } +impl ParallelPakWriter { + pub fn write_file(&mut self, path: String, data: Vec) -> Result<(), Error> { + self.tx.send((path, Arc::new(data))).unwrap(); + Ok(()) + } +} + impl Pak { fn read( reader: &mut R, @@ -541,12 +630,12 @@ impl Pak { index_writer.write_u32::(1)?; // we have path hash index index_writer.write_u64::(path_hash_index_offset)?; index_writer.write_u64::(phi_buf.len() as u64)?; // path hash index size - index_writer.write_all(&hash(&phi_buf))?; + index_writer.write_all(&hash(&phi_buf).0)?; index_writer.write_u32::(1)?; // we have full directory index index_writer.write_u64::(full_directory_index_offset)?; index_writer.write_u64::(fdi_buf.len() as u64)?; // path hash index size - index_writer.write_all(&hash(&fdi_buf))?; + index_writer.write_all(&hash(&fdi_buf).0)?; index_writer.write_u32::(encoded_entries.len() as u32)?; index_writer.write_all(&encoded_entries)?; @@ -584,11 +673,11 @@ impl Pak { } } -fn hash(data: &[u8]) -> [u8; 20] { +fn hash(data: &[u8]) -> Hash { use sha1::{Digest, Sha1}; let mut hasher = Sha1::new(); hasher.update(data); - hasher.finalize().into() + Hash(hasher.finalize().into()) } fn generate_path_hash_index( diff --git a/repak/tests/test.rs b/repak/tests/test.rs index 5c63f8c..51e6d22 100644 --- a/repak/tests/test.rs +++ b/repak/tests/test.rs @@ -183,10 +183,12 @@ fn test_write(_version: repak::Version, _file_name: &str, bytes: &[u8]) { Some(0x205C5A7D), ); - for path in pak_reader.files() { - let data = pak_reader.get(&path, &mut reader).unwrap(); - pak_writer.write_file(&path, data).unwrap(); - } + pak_writer.parallel(|writer| { + for path in pak_reader.files() { + let data = pak_reader.get(&path, &mut reader).unwrap(); + writer.write_file(path, data).unwrap(); + } + }).unwrap(); assert!(pak_writer.write_index().unwrap().into_inner() == reader.into_inner()); } diff --git a/repak_cli/src/main.rs b/repak_cli/src/main.rs index 1c13b51..b2ae887 100644 --- a/repak_cli/src/main.rs +++ b/repak_cli/src/main.rs @@ -498,16 +498,19 @@ fn pack(args: ActionPack) -> Result<(), repak::Error> { (Output::Stdout, itertools::Either::Right(iter)) }; let log = log.clone(); - iter.try_for_each(|p| { - let rel = &p - .strip_prefix(input_path) - .expect("file not in input directory") - .to_slash() - .expect("failed to convert to slash path"); - if args.verbose { - log.println(format!("packing {}", &rel)); + pak.parallel(|writer| -> Result<(), repak::Error> { + for p in &mut iter { + let rel = &p + .strip_prefix(input_path) + .expect("file not in input directory") + .to_slash() + .expect("failed to convert to slash path"); + if args.verbose { + log.println(format!("packing {}", &rel)); + } + writer.write_file(rel.to_string(), std::fs::read(p)?)?; } - pak.write_file(rel, std::fs::read(p)?) + Ok(()) })?; pak.write_index()?; From 1d1ad7138d86d805be31fcb6af34c0d066ad3303 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 14:18:21 -0600 Subject: [PATCH 04/22] Abandon other means of oodle for now --- oodle_loader/comp.bin | 6 ++++++ oodle_loader/src/lib.rs | 7 ++++++- repak/Cargo.toml | 5 +---- repak/src/data.rs | 42 ++++++++++++++++++++++++----------------- repak/src/entry.rs | 12 +++--------- repak/src/error.rs | 5 +++-- repak/src/lib.rs | 17 ----------------- repak/src/pak.rs | 26 ++++++------------------- repak/tests/test.rs | 10 ++++------ repak_cli/Cargo.toml | 2 +- 10 files changed, 55 insertions(+), 77 deletions(-) create mode 100644 oodle_loader/comp.bin diff --git a/oodle_loader/comp.bin b/oodle_loader/comp.bin new file mode 100644 index 0000000..d43c5b4 --- /dev/null +++ b/oodle_loader/comp.bin @@ -0,0 +1,6 @@ +Ì +In tools and when compressing large inputs in one call, consider using + $OodleXLZ_Compress_AsyncAndWait (in the Oodle2 Ext lib) instead to get parallelism. Alternatively, + chop the data into small fixed size chunks (we recommend at least 256KiB, i.e. 262144 bytes) and + call compress on each of them, which decreases compression ratio but makes for trivial parallel + compression and decompression. \ No newline at end of file diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index 753aec8..7fe5f89 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -136,10 +136,15 @@ pub enum Error { #[error("IO error {0:?}")] Io(#[from] std::io::Error), #[error("ureq error {0:?}")] - Ureq(#[from] ureq::Error), + Ureq(Box), #[error("libloading error {0:?}")] LibLoading(#[from] libloading::Error), } +impl From for Error { + fn from(value: ureq::Error) -> Self { + Self::Ureq(value.into()) + } +} fn check_hash(buffer: &[u8]) -> Result<()> { use sha2::{Digest, Sha256}; diff --git a/repak/Cargo.toml b/repak/Cargo.toml index 07fea7e..bb3fcb9 100644 --- a/repak/Cargo.toml +++ b/repak/Cargo.toml @@ -10,10 +10,7 @@ keywords.workspace = true [features] default = ["compression", "encryption"] compression = ["dep:flate2", "dep:zstd"] -oodle = [] -oodle_loader = ["dep:oodle_loader"] -oodle_explicit = ["oodle"] -oodle_implicit_dynamic = ["dep:oodle_loader", "oodle"] +oodle = ["dep:oodle_loader"] encryption = ["dep:aes"] [dependencies] diff --git a/repak/src/data.rs b/repak/src/data.rs index d6a1cca..7832db7 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -18,6 +18,7 @@ pub(crate) struct PartialBlock { pub(crate) data: Vec, } +#[cfg(feature = "compression")] fn get_compression_slot( version: Version, compression_slots: &mut Vec>, @@ -55,16 +56,19 @@ fn get_compression_slot( } impl PartialEntry { - pub(crate) fn into_entry( + pub(crate) fn build_entry( &self, version: Version, - compression_slots: &mut Vec>, + #[allow(unused)] compression_slots: &mut Vec>, file_offset: u64, ) -> Result { + #[cfg(feature = "compression")] let compression_slot = self .compression .map(|c| get_compression_slot(version, compression_slots, c)) .transpose()?; + #[cfg(not(feature = "compression"))] + let compression_slot = None; let blocks = (!self.blocks.is_empty()).then(|| { let entry_size = @@ -92,7 +96,7 @@ impl PartialEntry { uncompressed: self.uncompressed_size, compression_slot, timestamp: None, - hash: Some(self.hash.clone()), + hash: Some(self.hash), blocks, flags: 0, compression_block_size: self.compression_block_size, @@ -101,7 +105,6 @@ impl PartialEntry { } pub(crate) fn build_partial_entry( - //version: Version, allowed_compression: &[Compression], data: &[u8], ) -> Result { @@ -138,7 +141,7 @@ pub(crate) fn build_partial_entry( } None => { compression_block_size = 0; - hasher.update(&data); + hasher.update(data); (vec![], uncompressed_size) } }; @@ -153,6 +156,7 @@ pub(crate) fn build_partial_entry( }) } +#[cfg(feature = "compression")] fn compress(compression: Compression, data: &[u8]) -> Result> { use std::io::Write; @@ -171,18 +175,22 @@ fn compress(compression: Compression, data: &[u8]) -> Result> { } Compression::Zstd => zstd::stream::encode_all(data, 0)?, Compression::Oodle => { - let mut output = vec![]; - oodle_loader::oodle() - .unwrap() - .compress( - data.as_ref(), - &mut output, - oodle_loader::Compressor::Mermaid, - oodle_loader::CompressionLevel::Normal, - ) - .unwrap(); - output - //return Err(Error::Other("writing Oodle compression unsupported".into())) + #[cfg(not(feature = "oodle"))] + return Err(super::Error::Oodle); + #[cfg(feature = "oodle")] + { + let mut output = vec![]; + oodle_loader::oodle() + .unwrap() + .compress( + data.as_ref(), + &mut output, + oodle_loader::Compressor::Mermaid, + oodle_loader::CompressionLevel::Normal, + ) + .unwrap(); + output + } } }; diff --git a/repak/src/entry.rs b/repak/src/entry.rs index 6e353e4..e0311a1 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -2,7 +2,6 @@ use crate::{data::build_partial_entry, Error, Hash}; use super::{ext::BoolExt, ext::ReadExt, Compression, Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; -use oodle_loader::oodle; use std::io; #[derive(Debug, PartialEq, Clone, Copy)] @@ -108,10 +107,10 @@ impl Entry { ) -> Result { let partial_entry = build_partial_entry(allowed_compression, data)?; let stream_position = writer.stream_position()?; - let entry = partial_entry.into_entry(version, compression_slots, stream_position)?; + let entry = partial_entry.build_entry(version, compression_slots, stream_position)?; entry.write(writer, version, crate::entry::EntryLocation::Data)?; if partial_entry.blocks.is_empty() { - writer.write_all(&data)?; + writer.write_all(data)?; } else { for block in partial_entry.blocks { writer.write_all(&block.data)?; @@ -341,7 +340,6 @@ impl Entry { version: Version, compression: &[Option], #[allow(unused)] key: &super::Key, - #[allow(unused)] oodle: &super::Oodle, buf: &mut W, ) -> Result<(), super::Error> { reader.seek(io::SeekFrom::Start(self.offset))?; @@ -411,10 +409,6 @@ impl Entry { } #[cfg(feature = "oodle")] Some(Compression::Oodle) => { - let oodle = match oodle { - crate::Oodle::Some(getter) => getter().map_err(|_| super::Error::OodleFailed), - crate::Oodle::None => Err(super::Error::OodleFailed), - }?; let mut decompressed = vec![0; self.uncompressed as usize]; let mut compress_offset = 0; @@ -428,7 +422,7 @@ impl Entry { .min(self.uncompressed as usize - compress_offset) }; let buffer = &mut data[range]; - let out = oodle.decompress( + let out = oodle_loader::oodle()?.decompress( buffer, &mut decompressed[decompress_offset..decompress_offset + decomp], ); diff --git a/repak/src/error.rs b/repak/src/error.rs index b60bc92..363dc86 100644 --- a/repak/src/error.rs +++ b/repak/src/error.rs @@ -42,8 +42,9 @@ pub enum Error { #[error("found magic of {:#x} instead of {:#x}", .0, super::MAGIC)] Magic(u32), - #[error("pointer to OodleLZ_Decompress was not provided")] - OodleFailed, + #[cfg(feature = "oodle")] + #[error("Oodle loader error: {0}")] + OodleFailed(#[from] oodle_loader::Error), #[error("No entry found at {0}")] MissingEntry(String), diff --git a/repak/src/lib.rs b/repak/src/lib.rs index 5ff71a8..643381c 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -10,15 +10,6 @@ pub use {error::*, pak::*}; pub const MAGIC: u32 = 0x5A6F12E1; -#[cfg(feature = "oodle")] -mod oodle { - pub type OodleGetter = fn() -> Result<&'static oodle_loader::Oodle, oodle_loader::Error>; - pub type OodleDecompress = fn(comp_buf: &[u8], raw_buf: &mut [u8]) -> i32; -} - -#[cfg(feature = "oodle_loader")] -pub use oodle_loader; - #[derive( Clone, Copy, @@ -145,11 +136,3 @@ impl From for Key { Self::Some(value) } } - -#[derive(Debug, Default)] -pub(crate) enum Oodle { - #[cfg(feature = "oodle")] - Some(oodle::OodleGetter), - #[default] - None, -} diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 3b4d728..043453f 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -20,7 +20,6 @@ impl std::fmt::Debug for Hash { #[derive(Debug)] pub struct PakBuilder { key: super::Key, - oodle: super::Oodle, allowed_compression: Vec, } @@ -34,10 +33,6 @@ impl PakBuilder { pub fn new() -> Self { Self { key: Default::default(), - #[cfg(not(feature = "oodle_implicit_dynamic"))] - oodle: super::Oodle::None, - #[cfg(feature = "oodle_implicit_dynamic")] - oodle: super::Oodle::Some(oodle_loader::oodle), allowed_compression: Default::default(), } } @@ -46,25 +41,20 @@ impl PakBuilder { self.key = super::Key::Some(key); self } - #[cfg(feature = "oodle_explicit")] - pub fn oodle(mut self, oodle_getter: super::oodle::OodleGetter) -> Self { - self.oodle = super::Oodle::Some(oodle_getter); - self - } #[cfg(feature = "compression")] pub fn compression(mut self, compression: impl IntoIterator) -> Self { self.allowed_compression = compression.into_iter().collect(); self } pub fn reader(self, reader: &mut R) -> Result { - PakReader::new_any_inner(reader, self.key, self.oodle) + PakReader::new_any_inner(reader, self.key) } pub fn reader_with_version( self, reader: &mut R, version: super::Version, ) -> Result { - PakReader::new_inner(reader, version, self.key, self.oodle) + PakReader::new_inner(reader, version, self.key) } pub fn writer( self, @@ -88,7 +78,6 @@ impl PakBuilder { pub struct PakReader { pak: Pak, key: super::Key, - oodle: super::Oodle, } #[derive(Debug)] @@ -180,14 +169,13 @@ impl PakReader { fn new_any_inner( reader: &mut R, key: super::Key, - oodle: super::Oodle, ) -> Result { use std::fmt::Write; let mut log = "\n".to_owned(); for ver in Version::iter() { match Pak::read(&mut *reader, ver, &key) { - Ok(pak) => return Ok(Self { pak, key, oodle }), + Ok(pak) => return Ok(Self { pak, key }), Err(err) => writeln!(log, "trying version {} failed: {}", ver, err)?, } } @@ -198,9 +186,8 @@ impl PakReader { reader: &mut R, version: super::Version, key: super::Key, - oodle: super::Oodle, ) -> Result { - Pak::read(reader, version, &key).map(|pak| Self { pak, key, oodle }) + Pak::read(reader, version, &key).map(|pak| Self { pak, key }) } pub fn version(&self) -> super::Version { @@ -241,7 +228,6 @@ impl PakReader { self.pak.version, &self.pak.compression, &self.key, - &self.oodle, writer, ), None => Err(super::Error::MissingEntry(path.to_owned())), @@ -333,7 +319,7 @@ impl PakWriter { let stream_position = self.writer.stream_position()?; let (path, data, partial_entry) = message?; - let entry = partial_entry.into_entry( + let entry = partial_entry.build_entry( self.pak.version, &mut self.pak.compression, stream_position, @@ -358,7 +344,7 @@ impl PakWriter { }); if let Err(err) = handle.join().unwrap() { - Err(err.into()) // prioritize error from user code + Err(err) // prioritize error from user code } else if let Err(err) = result { Err(err.into()) // user code was successful, check pak writer error } else { diff --git a/repak/tests/test.rs b/repak/tests/test.rs index 51e6d22..5c63f8c 100644 --- a/repak/tests/test.rs +++ b/repak/tests/test.rs @@ -183,12 +183,10 @@ fn test_write(_version: repak::Version, _file_name: &str, bytes: &[u8]) { Some(0x205C5A7D), ); - pak_writer.parallel(|writer| { - for path in pak_reader.files() { - let data = pak_reader.get(&path, &mut reader).unwrap(); - writer.write_file(path, data).unwrap(); - } - }).unwrap(); + for path in pak_reader.files() { + let data = pak_reader.get(&path, &mut reader).unwrap(); + pak_writer.write_file(&path, data).unwrap(); + } assert!(pak_writer.write_index().unwrap().into_inner() == reader.into_inner()); } diff --git a/repak_cli/Cargo.toml b/repak_cli/Cargo.toml index 7397c92..2de9afb 100644 --- a/repak_cli/Cargo.toml +++ b/repak_cli/Cargo.toml @@ -19,7 +19,7 @@ path = "src/main.rs" [features] default = ["oodle"] -oodle = ["repak/oodle_implicit_dynamic"] +oodle = ["repak/oodle"] [dependencies] repak = { path = "../repak" } From 057429fde13d716b95ba5144e3353cdc64bf5f93 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 15:21:29 -0600 Subject: [PATCH 05/22] Add compression block size assert to ensure pak version compatibility --- repak/src/entry.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/repak/src/entry.rs b/repak/src/entry.rs index e0311a1..3c9fb76 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -275,10 +275,19 @@ impl Entry { } pub fn write_encoded(&self, writer: &mut W) -> Result<(), super::Error> { - let mut compression_block_size = (self.compression_block_size >> 11) & 0x3f; - if (compression_block_size << 11) != self.compression_block_size { - compression_block_size = 0x3f; + let compression_block_size = (self.compression_block_size >> 11) & 0x3f; + if (compression_block_size << 11) != self.compression_block_size + || compression_block_size == 0x3f + { + // https://github.com/EpicGames/UnrealEngine/commit/3aad0ff7976be1073005dca2c1282af548b45d89 + panic!( + "Unsupported compression block size: {}. Block size must fit into flags field or it may cause unreadable paks for earlier Unreal Engine versions.", + self.compression_block_size + ); } + //if (compression_block_size << 11) != self.compression_block_size { + // compression_block_size = 0x3f; + //} let compression_blocks_count = if self.compression_slot.is_some() { self.blocks.as_ref().unwrap().len() as u32 } else { @@ -298,9 +307,9 @@ impl Entry { writer.write_u32::(flags)?; - if compression_block_size == 0x3f { - writer.write_u32::(self.compression_block_size)?; - } + //if compression_block_size == 0x3f { + // writer.write_u32::(self.compression_block_size)?; + //} if is_offset_32_bit_safe { writer.write_u32::(self.offset as u32)?; From 1dadf3662416432887bdbe743c2f47e173858270 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 15:31:15 -0600 Subject: [PATCH 06/22] Improve oodle loader errors --- oodle_loader/src/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index 7fe5f89..835a618 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -127,7 +127,7 @@ fn url() -> String { #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("hash mismatch expected: {expected} got {found}")] + #[error("Oodle lib hash mismatch expected: {expected} got {found}")] HashMismatch { expected: String, found: String }, #[error("Oodle compression failed")] CompressionFailed, @@ -137,7 +137,7 @@ pub enum Error { Io(#[from] std::io::Error), #[error("ureq error {0:?}")] Ureq(Box), - #[error("libloading error {0:?}")] + #[error("Oodle libloading error {0:?}")] LibLoading(#[from] libloading::Error), } impl From for Error { @@ -173,7 +173,8 @@ fn fetch_oodle() -> Result { check_hash(&buffer)?; std::fs::write(&oodle_path, buffer)?; } - check_hash(&std::fs::read(&oodle_path)?)?; + // don't check existing file to allow user to substitute other versions + // check_hash(&std::fs::read(&oodle_path)?)?; Ok(oodle_path) } From 9090a6984d8681271be4ffbc1c204a52f5575780 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 16:09:50 -0600 Subject: [PATCH 07/22] Make parallel closure FnOnce --- repak/src/pak.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 043453f..71b3973 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -288,9 +288,9 @@ impl PakWriter { Ok(()) } - pub fn parallel(&mut self, mut f: F) -> Result<&mut Self, E> + pub fn parallel(&mut self, f: F) -> Result<&mut Self, E> where - F: Send + Sync + FnMut(&mut ParallelPakWriter) -> Result<(), E>, + F: Send + Sync + FnOnce(&mut ParallelPakWriter) -> Result<(), E>, E: From + Send, { { From 9764808fb30e588447e0d8fc278b6c14f480cf30 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 20:13:49 -0600 Subject: [PATCH 08/22] Add LZ4 compression --- Cargo.lock | 26 ++++++++++++++ repak/Cargo.toml | 5 +-- repak/src/data.rs | 1 + repak/src/entry.rs | 84 +++++++++++++++++++++------------------------- repak/src/lib.rs | 1 + 5 files changed, 70 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f6995a9..8e6159a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -668,6 +668,15 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + [[package]] name = "memchr" version = "2.7.4" @@ -843,6 +852,7 @@ dependencies = [ "byteorder", "flate2", "hex", + "lz4_flex", "oodle_loader", "pariter", "paste", @@ -1016,6 +1026,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -1122,6 +1138,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.17.0" diff --git a/repak/Cargo.toml b/repak/Cargo.toml index bb3fcb9..cdb7adf 100644 --- a/repak/Cargo.toml +++ b/repak/Cargo.toml @@ -9,8 +9,8 @@ keywords.workspace = true [features] default = ["compression", "encryption"] -compression = ["dep:flate2", "dep:zstd"] -oodle = ["dep:oodle_loader"] +compression = ["dep:flate2", "dep:zstd", "dep:lz4_flex"] +oodle = ["dep:oodle_loader", "compression"] encryption = ["dep:aes"] [dependencies] @@ -18,6 +18,7 @@ byteorder = "1.5" aes = { workspace = true, optional = true } flate2 = { version = "1.0", optional = true } zstd = { version = "0.13", optional = true } +lz4_flex = { version = "0.11.3", optional = true } oodle_loader = { path = "../oodle_loader", optional = true} thiserror = "2.0" sha1 = { workspace = true } diff --git a/repak/src/data.rs b/repak/src/data.rs index 7832db7..42951d5 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -174,6 +174,7 @@ fn compress(compression: Compression, data: &[u8]) -> Result> { compress.finish()? } Compression::Zstd => zstd::stream::encode_all(data, 0)?, + Compression::LZ4 => lz4_flex::block::compress(data), Compression::Oodle => { #[cfg(not(feature = "oodle"))] return Err(super::Error::Oodle); diff --git a/repak/src/entry.rs b/repak/src/entry.rs index 3c9fb76..abac48c 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -376,7 +376,7 @@ impl Entry { } } - #[cfg(any(feature = "compression", feature = "oodle"))] + #[cfg(feature = "compression")] let ranges = { let offset = |index: u64| -> usize { (match version.version_major() >= VersionMajor::RelativeChunkOffsets { @@ -406,52 +406,46 @@ impl Entry { match self.compression_slot.and_then(|c| compression[c as usize]) { None => buf.write_all(&data)?, - #[cfg(feature = "compression")] - Some(Compression::Zlib) => decompress!(flate2::read::ZlibDecoder<&[u8]>), - #[cfg(feature = "compression")] - Some(Compression::Gzip) => decompress!(flate2::read::GzDecoder<&[u8]>), - #[cfg(feature = "compression")] - Some(Compression::Zstd) => { - for range in ranges { - io::copy(&mut zstd::stream::read::Decoder::new(&data[range])?, buf)?; - } - } - #[cfg(feature = "oodle")] - Some(Compression::Oodle) => { - let mut decompressed = vec![0; self.uncompressed as usize]; - - let mut compress_offset = 0; - let mut decompress_offset = 0; - let block_count = ranges.len(); - for range in ranges { - let decomp = if block_count == 1 { - self.uncompressed as usize - } else { - (self.compression_block_size as usize) - .min(self.uncompressed as usize - compress_offset) - }; - let buffer = &mut data[range]; - let out = oodle_loader::oodle()?.decompress( - buffer, - &mut decompressed[decompress_offset..decompress_offset + decomp], - ); - if out == 0 { - return Err(super::Error::DecompressionFailed(Compression::Oodle)); - } - compress_offset += self.compression_block_size as usize; - decompress_offset += out as usize; - } - - debug_assert_eq!( - decompress_offset, self.uncompressed as usize, - "Oodle decompression length mismatch" - ); - buf.write_all(&decompressed)?; - } - #[cfg(not(feature = "oodle"))] - Some(Compression::Oodle) => return Err(super::Error::Oodle), #[cfg(not(feature = "compression"))] _ => return Err(super::Error::Compression), + #[cfg(feature = "compression")] + Some(comp) => match comp { + Compression::Zlib => decompress!(flate2::read::ZlibDecoder<&[u8]>), + Compression::Gzip => decompress!(flate2::read::GzDecoder<&[u8]>), + Compression::Zstd => { + for range in ranges { + io::copy(&mut zstd::stream::read::Decoder::new(&data[range])?, buf)?; + } + } + Compression::LZ4 => { + let mut decompressed = vec![0; self.uncompressed as usize]; + for (decomp_chunk, comp_range) in decompressed + .chunks_mut(self.compression_block_size as usize) + .zip(ranges) + { + lz4_flex::block::decompress_into(&data[comp_range], decomp_chunk) + .map_err(|_| Error::DecompressionFailed(Compression::LZ4))?; + } + buf.write_all(&decompressed)?; + } + #[cfg(feature = "oodle")] + Compression::Oodle => { + let mut decompressed = vec![0; self.uncompressed as usize]; + for (decomp_chunk, comp_range) in decompressed + .chunks_mut(self.compression_block_size as usize) + .zip(ranges) + { + let out = + oodle_loader::oodle()?.decompress(&data[comp_range], decomp_chunk); + if out == 0 { + return Err(Error::DecompressionFailed(Compression::Oodle)); + } + } + buf.write_all(&decompressed)?; + } + #[cfg(not(feature = "oodle"))] + Compression::Oodle => return Err(super::Error::Oodle), + }, } buf.flush()?; Ok(()) diff --git a/repak/src/lib.rs b/repak/src/lib.rs index 643381c..6445a97 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -119,6 +119,7 @@ pub enum Compression { Gzip, Oodle, Zstd, + LZ4, } #[allow(clippy::large_enum_variant)] From 13c1c269a4d1514230b7ffb5b3e7b95c9eaf5e86 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 20:16:02 -0600 Subject: [PATCH 09/22] Revert "Add compression block size assert to ensure pak version compatibility" --- repak/src/entry.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/repak/src/entry.rs b/repak/src/entry.rs index abac48c..fd7d7dd 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -275,19 +275,10 @@ impl Entry { } pub fn write_encoded(&self, writer: &mut W) -> Result<(), super::Error> { - let compression_block_size = (self.compression_block_size >> 11) & 0x3f; - if (compression_block_size << 11) != self.compression_block_size - || compression_block_size == 0x3f - { - // https://github.com/EpicGames/UnrealEngine/commit/3aad0ff7976be1073005dca2c1282af548b45d89 - panic!( - "Unsupported compression block size: {}. Block size must fit into flags field or it may cause unreadable paks for earlier Unreal Engine versions.", - self.compression_block_size - ); + let mut compression_block_size = (self.compression_block_size >> 11) & 0x3f; + if (compression_block_size << 11) != self.compression_block_size { + compression_block_size = 0x3f; } - //if (compression_block_size << 11) != self.compression_block_size { - // compression_block_size = 0x3f; - //} let compression_blocks_count = if self.compression_slot.is_some() { self.blocks.as_ref().unwrap().len() as u32 } else { @@ -307,9 +298,9 @@ impl Entry { writer.write_u32::(flags)?; - //if compression_block_size == 0x3f { - // writer.write_u32::(self.compression_block_size)?; - //} + if compression_block_size == 0x3f { + writer.write_u32::(self.compression_block_size)?; + } if is_offset_32_bit_safe { writer.write_u32::(self.offset as u32)?; From d8b3d2f089bae82a89e6a1f24540221564270e90 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 20:21:56 -0600 Subject: [PATCH 10/22] Don't assert compression block size as it break existsing tests --- repak/src/data.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/repak/src/data.rs b/repak/src/data.rs index 42951d5..42095c0 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -124,6 +124,8 @@ pub(crate) fn build_partial_entry( } #[cfg(feature = "compression")] Some(compression) => { + // https://github.com/EpicGames/UnrealEngine/commit/3aad0ff7976be1073005dca2c1282af548b45d89 + // Block size must fit into flags field or it may cause unreadable paks for earlier Unreal Engine versions compression_block_size = 0x10000; let mut compressed_size = 0; let mut blocks = vec![]; From 3b78c00527f00941c654ec35da5bb953c1a4d829 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sat, 18 Jan 2025 20:32:20 -0600 Subject: [PATCH 11/22] Allow disabling compression per file --- repak/src/pak.rs | 29 +++++++++++++++++++++-------- repak/tests/test.rs | 2 +- repak_cli/src/main.rs | 2 +- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 71b3973..55bff0b 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -89,7 +89,7 @@ pub struct PakWriter { } pub struct ParallelPakWriter { - tx: std::sync::mpsc::SyncSender<(String, Arc>)>, + tx: std::sync::mpsc::SyncSender<(String, bool, Arc>)>, } #[derive(Debug)] @@ -273,14 +273,23 @@ impl PakWriter { self.writer } - pub fn write_file(&mut self, path: &str, data: impl AsRef<[u8]>) -> Result<(), super::Error> { + pub fn write_file( + &mut self, + path: &str, + allow_compress: bool, + data: impl AsRef<[u8]>, + ) -> Result<(), super::Error> { self.pak.index.add_entry( path, Entry::write_file( &mut self.writer, self.pak.version, &mut self.pak.compression, - &self.allowed_compression, + if allow_compress { + &self.allowed_compression + } else { + &[] + }, data.as_ref(), )?, ); @@ -308,9 +317,13 @@ impl PakWriter { .into_iter() .parallel_map_scoped( scope, - |(path, data): (String, Arc>)| -> Result<_, Error> { - let partial_entry = - build_partial_entry(&self.allowed_compression, &data)?; + |(path, allow_compress, data): (String, bool, Arc>)| -> Result<_, Error> { + let allowed_compression = if allow_compress { + self.allowed_compression.as_slice() + } else { + &[] + }; + let partial_entry = build_partial_entry(allowed_compression, &data)?; let data = partial_entry.blocks.is_empty().then(|| Arc::new(data)); Ok((path, data, partial_entry)) }, @@ -363,8 +376,8 @@ impl PakWriter { } impl ParallelPakWriter { - pub fn write_file(&mut self, path: String, data: Vec) -> Result<(), Error> { - self.tx.send((path, Arc::new(data))).unwrap(); + pub fn write_file(&mut self, path: String, compress: bool, data: Vec) -> Result<(), Error> { + self.tx.send((path, compress, Arc::new(data))).unwrap(); Ok(()) } } diff --git a/repak/tests/test.rs b/repak/tests/test.rs index 5c63f8c..e951c02 100644 --- a/repak/tests/test.rs +++ b/repak/tests/test.rs @@ -185,7 +185,7 @@ fn test_write(_version: repak::Version, _file_name: &str, bytes: &[u8]) { for path in pak_reader.files() { let data = pak_reader.get(&path, &mut reader).unwrap(); - pak_writer.write_file(&path, data).unwrap(); + pak_writer.write_file(&path, false, data).unwrap(); } assert!(pak_writer.write_index().unwrap().into_inner() == reader.into_inner()); diff --git a/repak_cli/src/main.rs b/repak_cli/src/main.rs index b2ae887..e042868 100644 --- a/repak_cli/src/main.rs +++ b/repak_cli/src/main.rs @@ -508,7 +508,7 @@ fn pack(args: ActionPack) -> Result<(), repak::Error> { if args.verbose { log.println(format!("packing {}", &rel)); } - writer.write_file(rel.to_string(), std::fs::read(p)?)?; + writer.write_file(rel.to_string(), true, std::fs::read(p)?)?; } Ok(()) })?; From fb52732b64adf59ed14ba9955af9aad756b40aa7 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Sun, 19 Jan 2025 17:07:22 -0600 Subject: [PATCH 12/22] Don't require mut self for ParallelPakWriter::write_file --- repak/src/pak.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 55bff0b..e14fba2 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -376,7 +376,7 @@ impl PakWriter { } impl ParallelPakWriter { - pub fn write_file(&mut self, path: String, compress: bool, data: Vec) -> Result<(), Error> { + pub fn write_file(&self, path: String, compress: bool, data: Vec) -> Result<(), Error> { self.tx.send((path, compress, Arc::new(data))).unwrap(); Ok(()) } From bdeb0df8c79939c6bdff9c65271eb5db51f68543 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Mon, 20 Jan 2025 02:12:48 -0600 Subject: [PATCH 13/22] Add oodle mac platform --- oodle_loader/src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index 835a618..de027d0 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -104,13 +104,20 @@ struct OodlePlatform { hash: &'static str, } -#[cfg(unix)] +#[cfg(target_os = "linux")] static OODLE_PLATFORM: OodlePlatform = OodlePlatform { path: "linux/lib", name: "liboo2corelinux64.so.9", hash: "ed7e98f70be1254a80644efd3ae442ff61f854a2fe9debb0b978b95289884e9c", }; +#[cfg(target_os = "macos")] +static OODLE_PLATFORM: OodlePlatform = OodlePlatform { + path: "mac/lib", + name: "liboo2coremac64.2.9.10.dylib", + hash: "b09af35f6b84a61e2b6488495c7927e1cef789b969128fa1c845e51a475ec501", +}; + #[cfg(windows)] static OODLE_PLATFORM: OodlePlatform = OodlePlatform { path: "win/redist", From 194e800270a993917cbd5806f487e2f99f18e559 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Mon, 20 Jan 2025 17:39:58 -0600 Subject: [PATCH 14/22] Do not require moving data into parallel writer --- repak/src/data.rs | 89 +++++++++++++++++++---------- repak/src/entry.rs | 8 +-- repak/src/pak.rs | 135 +++++++++++++++++++++----------------------- repak/tests/test.rs | 27 +++++++++ 4 files changed, 150 insertions(+), 109 deletions(-) diff --git a/repak/src/data.rs b/repak/src/data.rs index 42095c0..c1390fa 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -1,3 +1,5 @@ +use std::io::Write; + use crate::{ entry::{Block, Entry}, Compression, Error, Hash, Version, VersionMajor, @@ -5,17 +7,21 @@ use crate::{ type Result = std::result::Result; -pub(crate) struct PartialEntry { +pub(crate) struct PartialEntry> { compression: Option, compressed_size: u64, uncompressed_size: u64, compression_block_size: u32, - pub(crate) blocks: Vec, + data: PartialEntryData, hash: Hash, } pub(crate) struct PartialBlock { uncompressed_size: usize, - pub(crate) data: Vec, + data: Vec, +} +pub(crate) enum PartialEntryData { + Slice(D), + Blocks(Vec), } #[cfg(feature = "compression")] @@ -55,7 +61,7 @@ fn get_compression_slot( } as u32) } -impl PartialEntry { +impl> PartialEntry { pub(crate) fn build_entry( &self, version: Version, @@ -70,25 +76,30 @@ impl PartialEntry { #[cfg(not(feature = "compression"))] let compression_slot = None; - let blocks = (!self.blocks.is_empty()).then(|| { - let entry_size = - Entry::get_serialized_size(version, compression_slot, self.blocks.len() as u32); + let blocks = match &self.data { + PartialEntryData::Slice(_) => None, + PartialEntryData::Blocks(blocks) => { + let entry_size = + Entry::get_serialized_size(version, compression_slot, blocks.len() as u32); - let mut offset = entry_size; - if version.version_major() < VersionMajor::RelativeChunkOffsets { - offset += file_offset; - }; + let mut offset = entry_size; + if version.version_major() < VersionMajor::RelativeChunkOffsets { + offset += file_offset; + }; - self.blocks - .iter() - .map(|block| { - let start = offset; - offset += block.data.len() as u64; - let end = offset; - Block { start, end } - }) - .collect() - }); + Some( + blocks + .iter() + .map(|block| { + let start = offset; + offset += block.data.len() as u64; + let end = offset; + Block { start, end } + }) + .collect(), + ) + } + }; Ok(Entry { offset: file_offset, @@ -102,22 +113,38 @@ impl PartialEntry { compression_block_size: self.compression_block_size, }) } + pub(crate) fn write_data(&self, stream: &mut S) -> Result<()> { + match &self.data { + PartialEntryData::Slice(data) => { + stream.write_all(data.as_ref())?; + } + PartialEntryData::Blocks(blocks) => { + for block in blocks { + stream.write_all(&block.data)?; + } + } + } + Ok(()) + } } -pub(crate) fn build_partial_entry( +pub(crate) fn build_partial_entry( allowed_compression: &[Compression], - data: &[u8], -) -> Result { + data: D, +) -> Result> +where + D: AsRef<[u8]>, +{ // TODO hash needs to be post-compression/encryption use sha1::{Digest, Sha1}; let mut hasher = Sha1::new(); // TODO possibly select best compression based on some criteria instead of picking first let compression = allowed_compression.first().cloned(); - let uncompressed_size = data.len() as u64; + let uncompressed_size = data.as_ref().len() as u64; let compression_block_size; - let (blocks, compressed_size) = match compression { + let (data, compressed_size) = match compression { #[cfg(not(feature = "compression"))] Some(_) => { unreachable!("should not be able to reach this point without compression feature") @@ -129,7 +156,7 @@ pub(crate) fn build_partial_entry( compression_block_size = 0x10000; let mut compressed_size = 0; let mut blocks = vec![]; - for chunk in data.chunks(compression_block_size as usize) { + for chunk in data.as_ref().chunks(compression_block_size as usize) { let data = compress(compression, chunk)?; compressed_size += data.len() as u64; hasher.update(&data); @@ -139,12 +166,12 @@ pub(crate) fn build_partial_entry( }) } - (blocks, compressed_size) + (PartialEntryData::Blocks(blocks), compressed_size) } None => { compression_block_size = 0; - hasher.update(data); - (vec![], uncompressed_size) + hasher.update(data.as_ref()); + (PartialEntryData::Slice(data), uncompressed_size) } }; @@ -153,7 +180,7 @@ pub(crate) fn build_partial_entry( compressed_size, uncompressed_size, compression_block_size, - blocks, + data, hash: Hash(hasher.finalize().into()), }) } diff --git a/repak/src/entry.rs b/repak/src/entry.rs index fd7d7dd..1a12469 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -109,13 +109,7 @@ impl Entry { let stream_position = writer.stream_position()?; let entry = partial_entry.build_entry(version, compression_slots, stream_position)?; entry.write(writer, version, crate::entry::EntryLocation::Data)?; - if partial_entry.blocks.is_empty() { - writer.write_all(data)?; - } else { - for block in partial_entry.blocks { - writer.write_all(&block.data)?; - } - } + partial_entry.write_data(writer)?; Ok(entry) } diff --git a/repak/src/pak.rs b/repak/src/pak.rs index e14fba2..030e8a9 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -7,7 +7,6 @@ use super::{Version, VersionMajor}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use std::collections::BTreeMap; use std::io::{self, Read, Seek, Write}; -use std::sync::Arc; #[derive(Default, Clone, Copy)] pub(crate) struct Hash(pub(crate) [u8; 20]); @@ -88,10 +87,6 @@ pub struct PakWriter { allowed_compression: Vec, } -pub struct ParallelPakWriter { - tx: std::sync::mpsc::SyncSender<(String, bool, Arc>)>, -} - #[derive(Debug)] pub(crate) struct Pak { version: Version, @@ -147,8 +142,8 @@ impl Index { self.entries } - fn add_entry(&mut self, path: &str, entry: super::entry::Entry) { - self.entries.insert(path.to_string(), entry); + fn add_entry(&mut self, path: String, entry: super::entry::Entry) { + self.entries.insert(path, entry); } } @@ -280,7 +275,7 @@ impl PakWriter { data: impl AsRef<[u8]>, ) -> Result<(), super::Error> { self.pak.index.add_entry( - path, + path.to_string(), Entry::write_file( &mut self.writer, self.pak.version, @@ -297,75 +292,56 @@ impl PakWriter { Ok(()) } - pub fn parallel(&mut self, f: F) -> Result<&mut Self, E> + pub fn parallel<'scope, F, E>(&mut self, f: F) -> Result<&mut Self, E> where - F: Send + Sync + FnOnce(&mut ParallelPakWriter) -> Result<(), E>, + F: Send + Sync + FnOnce(&mut ParallelPakWriter<'scope>) -> Result<(), E>, E: From + Send, { - { - use pariter::IteratorExt as _; + use pariter::IteratorExt as _; + let allowed_compression = self.allowed_compression.as_slice(); + pariter::scope(|scope: &pariter::Scope<'_>| -> Result<(), E> { let (tx, rx) = std::sync::mpsc::sync_channel(0); - pariter::scope(|scope| -> Result<(), E> { - let handle = scope.spawn(|_| -> Result<(), E> { - f(&mut ParallelPakWriter { tx })?; + let handle = scope.spawn(|_| f(&mut ParallelPakWriter { tx })); + + let result = rx + .into_iter() + .parallel_map_scoped(scope, |(path, compress, data)| -> Result<_, Error> { + let compression = compress.then_some(allowed_compression).unwrap_or_default(); + let partial_entry = build_partial_entry(compression, data)?; + Ok((path, partial_entry)) + }) + .try_for_each(|message| -> Result<(), Error> { + let stream_position = self.writer.stream_position()?; + let (path, partial_entry) = message?; + + let entry = partial_entry.build_entry( + self.pak.version, + &mut self.pak.compression, + stream_position, + )?; + + entry.write( + &mut self.writer, + self.pak.version, + crate::entry::EntryLocation::Data, + )?; + + self.pak.index.add_entry(path, entry); + partial_entry.write_data(&mut self.writer)?; Ok(()) }); - let result = rx - .into_iter() - .parallel_map_scoped( - scope, - |(path, allow_compress, data): (String, bool, Arc>)| -> Result<_, Error> { - let allowed_compression = if allow_compress { - self.allowed_compression.as_slice() - } else { - &[] - }; - let partial_entry = build_partial_entry(allowed_compression, &data)?; - let data = partial_entry.blocks.is_empty().then(|| Arc::new(data)); - Ok((path, data, partial_entry)) - }, - ) - .try_for_each(|message| -> Result<(), Error> { - let stream_position = self.writer.stream_position()?; - let (path, data, partial_entry) = message?; - - let entry = partial_entry.build_entry( - self.pak.version, - &mut self.pak.compression, - stream_position, - )?; - - entry.write( - &mut self.writer, - self.pak.version, - crate::entry::EntryLocation::Data, - )?; - - self.pak.index.add_entry(&path, entry); - - if let Some(data) = data { - self.writer.write_all(&data)?; - } else { - for block in partial_entry.blocks { - self.writer.write_all(&block.data)?; - } - } - Ok(()) - }); - - if let Err(err) = handle.join().unwrap() { - Err(err) // prioritize error from user code - } else if let Err(err) = result { - Err(err.into()) // user code was successful, check pak writer error - } else { - Ok(()) // neither returned error so return success - } - }) - .unwrap()?; - } + if let Err(err) = handle.join().unwrap() { + Err(err) // prioritize error from user code + } else if let Err(err) = result { + Err(err.into()) // user code was successful, check pak writer error + } else { + Ok(()) // neither returned error so return success + } + }) + .unwrap()?; Ok(self) } @@ -375,13 +351,30 @@ impl PakWriter { } } -impl ParallelPakWriter { - pub fn write_file(&self, path: String, compress: bool, data: Vec) -> Result<(), Error> { - self.tx.send((path, compress, Arc::new(data))).unwrap(); +pub struct ParallelPakWriter<'scope> { + tx: std::sync::mpsc::SyncSender<(String, bool, Data<'scope>)>, +} +impl<'scope> ParallelPakWriter<'scope> { + pub fn write_file + Send + Sync + 'scope>( + &self, + path: String, + compress: bool, + data: D, + ) -> Result<(), Error> { + self.tx + .send((path, compress, Data(Box::new(data)))) + .unwrap(); Ok(()) } } +struct Data<'d>(Box + Send + Sync + 'd>); +impl AsRef<[u8]> for Data<'_> { + fn as_ref(&self) -> &[u8] { + self.0.as_ref().as_ref() + } +} + impl Pak { fn read( reader: &mut R, diff --git a/repak/tests/test.rs b/repak/tests/test.rs index e951c02..e817fdd 100644 --- a/repak/tests/test.rs +++ b/repak/tests/test.rs @@ -88,6 +88,33 @@ mod test { } } +#[test] +fn test_parallel_writer() -> Result<(), repak::Error> { + let mut cur = Cursor::new(vec![]); + let mut writer = repak::PakBuilder::new().writer( + &mut cur, + repak::Version::V11, + "../../../".to_string(), + Some(0x12345678), + ); + + let outside_scope1 = vec![1, 2, 3]; + let outside_scope2 = vec![4, 5, 6]; + + writer.parallel(|writer| -> Result<(), repak::Error> { + let inside_scope = vec![7, 8, 9]; + + writer.write_file("pass/takes/ownership".to_string(), true, outside_scope1)?; + writer.write_file("pass/outlives/scope".to_string(), true, &outside_scope2)?; + + writer.write_file("pass/takes/ownership".to_string(), true, inside_scope)?; + // writer.write_file("fail/doesnt/outlive/scope".to_string(), true, &inside_scope)?; + Ok(()) + })?; + + Ok(()) +} + static AES_KEY: &str = "lNJbw660IOC+kU7cnVQ1oeqrXyhk4J6UAZrCBbcnp94="; fn test_read(version: repak::Version, _file_name: &str, bytes: &[u8]) { From 5cfc8f52bd51f2f9129e813da351d0ac6c9bec34 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Mon, 20 Jan 2025 18:49:45 -0600 Subject: [PATCH 15/22] ParallelPakWriter can be moved into closure --- repak/src/pak.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 030e8a9..d39df21 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -294,7 +294,7 @@ impl PakWriter { pub fn parallel<'scope, F, E>(&mut self, f: F) -> Result<&mut Self, E> where - F: Send + Sync + FnOnce(&mut ParallelPakWriter<'scope>) -> Result<(), E>, + F: Send + Sync + FnOnce(ParallelPakWriter<'scope>) -> Result<(), E>, E: From + Send, { use pariter::IteratorExt as _; @@ -303,7 +303,7 @@ impl PakWriter { pariter::scope(|scope: &pariter::Scope<'_>| -> Result<(), E> { let (tx, rx) = std::sync::mpsc::sync_channel(0); - let handle = scope.spawn(|_| f(&mut ParallelPakWriter { tx })); + let handle = scope.spawn(|_| f(ParallelPakWriter { tx })); let result = rx .into_iter() From b2bc86683d9a4820a22d741462cbe9083d8b593d Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Tue, 21 Jan 2025 12:29:16 -0600 Subject: [PATCH 16/22] Allow compression to be driven by user --- repak/src/data.rs | 2 +- repak/src/lib.rs | 2 +- repak/src/pak.rs | 77 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/repak/src/data.rs b/repak/src/data.rs index c1390fa..2705027 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -7,7 +7,7 @@ use crate::{ type Result = std::result::Result; -pub(crate) struct PartialEntry> { +pub struct PartialEntry> { compression: Option, compressed_size: u64, uncompressed_size: u64, diff --git a/repak/src/lib.rs b/repak/src/lib.rs index 6445a97..9453db6 100644 --- a/repak/src/lib.rs +++ b/repak/src/lib.rs @@ -6,7 +6,7 @@ mod ext; mod footer; mod pak; -pub use {error::*, pak::*}; +pub use {data::PartialEntry, error::*, pak::*}; pub const MAGIC: u32 = 0x5A6F12E1; diff --git a/repak/src/pak.rs b/repak/src/pak.rs index d39df21..0d20b05 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -1,6 +1,6 @@ use crate::data::build_partial_entry; use crate::entry::Entry; -use crate::{Compression, Error}; +use crate::{Compression, Error, PartialEntry}; use super::ext::{ReadExt, WriteExt}; use super::{Version, VersionMajor}; @@ -292,6 +292,37 @@ impl PakWriter { Ok(()) } + pub fn entry_builder(&self) -> EntryBuilder { + EntryBuilder { + allowed_compression: self.allowed_compression.clone(), + } + } + + pub fn write_entry>( + &mut self, + path: String, + partial_entry: PartialEntry, + ) -> Result<(), Error> { + let stream_position = self.writer.stream_position()?; + + let entry = partial_entry.build_entry( + self.pak.version, + &mut self.pak.compression, + stream_position, + )?; + + entry.write( + &mut self.writer, + self.pak.version, + crate::entry::EntryLocation::Data, + )?; + + self.pak.index.add_entry(path, entry); + partial_entry.write_data(&mut self.writer)?; + + Ok(()) + } + pub fn parallel<'scope, F, E>(&mut self, f: F) -> Result<&mut Self, E> where F: Send + Sync + FnOnce(ParallelPakWriter<'scope>) -> Result<(), E>, @@ -299,38 +330,20 @@ impl PakWriter { { use pariter::IteratorExt as _; - let allowed_compression = self.allowed_compression.as_slice(); pariter::scope(|scope: &pariter::Scope<'_>| -> Result<(), E> { let (tx, rx) = std::sync::mpsc::sync_channel(0); let handle = scope.spawn(|_| f(ParallelPakWriter { tx })); + let entry_builder = self.entry_builder(); let result = rx .into_iter() - .parallel_map_scoped(scope, |(path, compress, data)| -> Result<_, Error> { - let compression = compress.then_some(allowed_compression).unwrap_or_default(); - let partial_entry = build_partial_entry(compression, data)?; - Ok((path, partial_entry)) + .parallel_map_scoped(scope, move |(path, compress, data)| -> Result<_, Error> { + Ok((path, entry_builder.build_entry(compress, data)?)) }) .try_for_each(|message| -> Result<(), Error> { - let stream_position = self.writer.stream_position()?; let (path, partial_entry) = message?; - - let entry = partial_entry.build_entry( - self.pak.version, - &mut self.pak.compression, - stream_position, - )?; - - entry.write( - &mut self.writer, - self.pak.version, - crate::entry::EntryLocation::Data, - )?; - - self.pak.index.add_entry(path, entry); - partial_entry.write_data(&mut self.writer)?; - Ok(()) + self.write_entry(path, partial_entry) }); if let Err(err) = handle.join().unwrap() { @@ -375,6 +388,24 @@ impl AsRef<[u8]> for Data<'_> { } } +#[derive(Clone)] +pub struct EntryBuilder { + allowed_compression: Vec, +} +impl EntryBuilder { + /// Builds an entry in memory (compressed if requested) which must be written out later + pub fn build_entry + Send + Sync>( + &self, + compress: bool, + data: D, + ) -> Result, Error> { + let compression = compress + .then_some(self.allowed_compression.as_slice()) + .unwrap_or_default(); + build_partial_entry(compression, data) + } +} + impl Pak { fn read( reader: &mut R, From a33d5ef963638ea7c59562745a79c805b43ec09f Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Tue, 21 Jan 2025 13:06:59 -0600 Subject: [PATCH 17/22] Remove test file --- oodle_loader/comp.bin | 6 ------ oodle_loader/src/lib.rs | 1 - 2 files changed, 7 deletions(-) delete mode 100644 oodle_loader/comp.bin diff --git a/oodle_loader/comp.bin b/oodle_loader/comp.bin deleted file mode 100644 index d43c5b4..0000000 --- a/oodle_loader/comp.bin +++ /dev/null @@ -1,6 +0,0 @@ -Ì -In tools and when compressing large inputs in one call, consider using - $OodleXLZ_Compress_AsyncAndWait (in the Oodle2 Ext lib) instead to get parallelism. Alternatively, - chop the data into small fixed size chunks (we recommend at least 256KiB, i.e. 262144 bytes) and - call compress on each of them, which decreases compression ratio but makes for trivial parallel - compression and decompression. \ No newline at end of file diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index de027d0..62662f0 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -315,7 +315,6 @@ mod test { ) .unwrap(); - std::fs::write("comp.bin", &buffer).unwrap(); dbg!((data.len(), buffer.len())); let mut uncomp = vec![0; data.len()]; From 1a80db37d5c74b16f9d79653f14e8608c932633b Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Tue, 21 Jan 2025 13:09:38 -0600 Subject: [PATCH 18/22] Avoid unnecessary allocation --- oodle_loader/src/lib.rs | 26 +++++++------------------- repak/src/data.rs | 16 +++++----------- 2 files changed, 12 insertions(+), 30 deletions(-) diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index 62662f0..e5c87f1 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -1,7 +1,4 @@ -use std::{ - io::{Read, Write}, - sync::OnceLock, -}; +use std::{io::Read, sync::OnceLock}; type Result = std::result::Result; @@ -192,13 +189,12 @@ pub struct Oodle { get_compressed_buffer_size_needed: oodle_lz::GetCompressedBufferSizeNeeded, } impl Oodle { - pub fn compress( + pub fn compress( &self, input: &[u8], - mut output: S, compressor: Compressor, compression_level: CompressionLevel, - ) -> Result { + ) -> Result> { unsafe { let buffer_size = self.get_compressed_buffer_size_needed(compressor, input.len()); let mut buffer = vec![0; buffer_size]; @@ -219,11 +215,9 @@ impl Oodle { if len == -1 { return Err(Error::CompressionFailed); } - let len = len as usize; + buffer.truncate(len as usize); - output.write_all(&buffer[..len])?; - - Ok(len) + Ok(buffer) } } pub fn decompress(&self, input: &[u8], output: &mut [u8]) -> isize { @@ -305,14 +299,8 @@ mod test { call compress on each of them, which decreases compression ratio but makes for trivial parallel compression and decompression."; - let mut buffer = vec![]; - oodle - .compress( - data, - &mut buffer, - Compressor::Mermaid, - CompressionLevel::Optimal5, - ) + let buffer = oodle + .compress(data, Compressor::Mermaid, CompressionLevel::Optimal5) .unwrap(); dbg!((data.len(), buffer.len())); diff --git a/repak/src/data.rs b/repak/src/data.rs index 2705027..cb71aac 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -209,17 +209,11 @@ fn compress(compression: Compression, data: &[u8]) -> Result> { return Err(super::Error::Oodle); #[cfg(feature = "oodle")] { - let mut output = vec![]; - oodle_loader::oodle() - .unwrap() - .compress( - data.as_ref(), - &mut output, - oodle_loader::Compressor::Mermaid, - oodle_loader::CompressionLevel::Normal, - ) - .unwrap(); - output + oodle_loader::oodle().unwrap().compress( + data.as_ref(), + oodle_loader::Compressor::Mermaid, + oodle_loader::CompressionLevel::Normal, + )? } } }; From 29bf6e785952e3969fa3abe170b095ee7f5bf46a Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Tue, 21 Jan 2025 13:29:08 -0600 Subject: [PATCH 19/22] Remove non-functional internal ParallelPakWriter --- Cargo.lock | 59 ------------------------------------------- repak/Cargo.toml | 1 - repak/src/pak.rs | 53 -------------------------------------- repak/tests/test.rs | 27 -------------------- repak_cli/src/main.rs | 43 ++++++++++++++++++++++--------- 5 files changed, 31 insertions(+), 152 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8e6159a..bc2a637 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -232,28 +232,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -273,15 +251,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -420,12 +389,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hex" version = "0.4.3" @@ -692,16 +655,6 @@ dependencies = [ "adler2", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "number_prefix" version = "0.4.0" @@ -725,17 +678,6 @@ dependencies = [ "ureq", ] -[[package]] -name = "pariter" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324a62b9e7b5f270c0acc92a2040f8028bb643f959f9c068f11a7864f327e3d9" -dependencies = [ - "crossbeam", - "crossbeam-channel", - "num_cpus", -] - [[package]] name = "paste" version = "1.0.15" @@ -854,7 +796,6 @@ dependencies = [ "hex", "lz4_flex", "oodle_loader", - "pariter", "paste", "sha1", "strum", diff --git a/repak/Cargo.toml b/repak/Cargo.toml index cdb7adf..cdcdfac 100644 --- a/repak/Cargo.toml +++ b/repak/Cargo.toml @@ -23,7 +23,6 @@ oodle_loader = { path = "../oodle_loader", optional = true} thiserror = "2.0" sha1 = { workspace = true } strum = { workspace = true } -pariter = "0.5.1" hex.workspace = true [dev-dependencies] diff --git a/repak/src/pak.rs b/repak/src/pak.rs index 0d20b05..0bad50c 100644 --- a/repak/src/pak.rs +++ b/repak/src/pak.rs @@ -322,65 +322,12 @@ impl PakWriter { Ok(()) } - - pub fn parallel<'scope, F, E>(&mut self, f: F) -> Result<&mut Self, E> - where - F: Send + Sync + FnOnce(ParallelPakWriter<'scope>) -> Result<(), E>, - E: From + Send, - { - use pariter::IteratorExt as _; - - pariter::scope(|scope: &pariter::Scope<'_>| -> Result<(), E> { - let (tx, rx) = std::sync::mpsc::sync_channel(0); - - let handle = scope.spawn(|_| f(ParallelPakWriter { tx })); - let entry_builder = self.entry_builder(); - - let result = rx - .into_iter() - .parallel_map_scoped(scope, move |(path, compress, data)| -> Result<_, Error> { - Ok((path, entry_builder.build_entry(compress, data)?)) - }) - .try_for_each(|message| -> Result<(), Error> { - let (path, partial_entry) = message?; - self.write_entry(path, partial_entry) - }); - - if let Err(err) = handle.join().unwrap() { - Err(err) // prioritize error from user code - } else if let Err(err) = result { - Err(err.into()) // user code was successful, check pak writer error - } else { - Ok(()) // neither returned error so return success - } - }) - .unwrap()?; - Ok(self) - } - pub fn write_index(mut self) -> Result { self.pak.write(&mut self.writer, &self.key)?; Ok(self.writer) } } -pub struct ParallelPakWriter<'scope> { - tx: std::sync::mpsc::SyncSender<(String, bool, Data<'scope>)>, -} -impl<'scope> ParallelPakWriter<'scope> { - pub fn write_file + Send + Sync + 'scope>( - &self, - path: String, - compress: bool, - data: D, - ) -> Result<(), Error> { - self.tx - .send((path, compress, Data(Box::new(data)))) - .unwrap(); - Ok(()) - } -} - struct Data<'d>(Box + Send + Sync + 'd>); impl AsRef<[u8]> for Data<'_> { fn as_ref(&self) -> &[u8] { diff --git a/repak/tests/test.rs b/repak/tests/test.rs index e817fdd..e951c02 100644 --- a/repak/tests/test.rs +++ b/repak/tests/test.rs @@ -88,33 +88,6 @@ mod test { } } -#[test] -fn test_parallel_writer() -> Result<(), repak::Error> { - let mut cur = Cursor::new(vec![]); - let mut writer = repak::PakBuilder::new().writer( - &mut cur, - repak::Version::V11, - "../../../".to_string(), - Some(0x12345678), - ); - - let outside_scope1 = vec![1, 2, 3]; - let outside_scope2 = vec![4, 5, 6]; - - writer.parallel(|writer| -> Result<(), repak::Error> { - let inside_scope = vec![7, 8, 9]; - - writer.write_file("pass/takes/ownership".to_string(), true, outside_scope1)?; - writer.write_file("pass/outlives/scope".to_string(), true, &outside_scope2)?; - - writer.write_file("pass/takes/ownership".to_string(), true, inside_scope)?; - // writer.write_file("fail/doesnt/outlive/scope".to_string(), true, &inside_scope)?; - Ok(()) - })?; - - Ok(()) -} - static AES_KEY: &str = "lNJbw660IOC+kU7cnVQ1oeqrXyhk4J6UAZrCBbcnp94="; fn test_read(version: repak::Version, _file_name: &str, bytes: &[u8]) { diff --git a/repak_cli/src/main.rs b/repak_cli/src/main.rs index e042868..7b83063 100644 --- a/repak_cli/src/main.rs +++ b/repak_cli/src/main.rs @@ -487,7 +487,7 @@ fn pack(args: ActionPack) -> Result<(), repak::Error> { use indicatif::ProgressIterator; let iter = paths.iter(); - let (log, mut iter) = if !args.quiet { + let (log, iter) = if !args.quiet { let iter = iter.progress_with_style(indicatif::ProgressStyle::with_template(STYLE).unwrap()); ( @@ -498,20 +498,39 @@ fn pack(args: ActionPack) -> Result<(), repak::Error> { (Output::Stdout, itertools::Either::Right(iter)) }; let log = log.clone(); - pak.parallel(|writer| -> Result<(), repak::Error> { - for p in &mut iter { - let rel = &p - .strip_prefix(input_path) - .expect("file not in input directory") - .to_slash() - .expect("failed to convert to slash path"); - if args.verbose { - log.println(format!("packing {}", &rel)); - } - writer.write_file(rel.to_string(), true, std::fs::read(p)?)?; + + let mut result = None; + let result_ref = &mut result; + rayon::in_place_scope(|scope| -> Result<(), repak::Error> { + let (tx, rx) = std::sync::mpsc::sync_channel(0); + let entry_builder = pak.entry_builder(); + + scope.spawn(move |_| { + *result_ref = Some( + iter.par_bridge() + .try_for_each(|p| -> Result<(), repak::Error> { + let rel = &p + .strip_prefix(input_path) + .expect("file not in input directory") + .to_slash() + .expect("failed to convert to slash path"); + if args.verbose { + log.println(format!("packing {}", &rel)); + } + let entry = entry_builder.build_entry(true, std::fs::read(p)?)?; + + tx.send((rel.to_string(), entry)).unwrap(); + Ok(()) + }), + ); + }); + + for (path, entry) in rx { + pak.write_entry(path, entry)?; } Ok(()) })?; + result.unwrap()?; pak.write_index()?; From cff54acadb20f5c0f86c4bfaf05bb66cd6adbb21 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Wed, 22 Jan 2025 11:55:31 -0600 Subject: [PATCH 20/22] Fix decompression block size --- repak/src/entry.rs | 74 +++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/repak/src/entry.rs b/repak/src/entry.rs index 1a12469..a5fd649 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -394,43 +394,49 @@ impl Entry { #[cfg(not(feature = "compression"))] _ => return Err(super::Error::Compression), #[cfg(feature = "compression")] - Some(comp) => match comp { - Compression::Zlib => decompress!(flate2::read::ZlibDecoder<&[u8]>), - Compression::Gzip => decompress!(flate2::read::GzDecoder<&[u8]>), - Compression::Zstd => { - for range in ranges { - io::copy(&mut zstd::stream::read::Decoder::new(&data[range])?, buf)?; - } - } - Compression::LZ4 => { - let mut decompressed = vec![0; self.uncompressed as usize]; - for (decomp_chunk, comp_range) in decompressed - .chunks_mut(self.compression_block_size as usize) - .zip(ranges) - { - lz4_flex::block::decompress_into(&data[comp_range], decomp_chunk) - .map_err(|_| Error::DecompressionFailed(Compression::LZ4))?; - } - buf.write_all(&decompressed)?; - } - #[cfg(feature = "oodle")] - Compression::Oodle => { - let mut decompressed = vec![0; self.uncompressed as usize]; - for (decomp_chunk, comp_range) in decompressed - .chunks_mut(self.compression_block_size as usize) - .zip(ranges) - { - let out = - oodle_loader::oodle()?.decompress(&data[comp_range], decomp_chunk); - if out == 0 { - return Err(Error::DecompressionFailed(Compression::Oodle)); + Some(comp) => { + let chunk_size = if ranges.len() == 1 { + self.uncompressed as usize + } else { + self.compression_block_size as usize + }; + + match comp { + Compression::Zlib => decompress!(flate2::read::ZlibDecoder<&[u8]>), + Compression::Gzip => decompress!(flate2::read::GzDecoder<&[u8]>), + Compression::Zstd => { + for range in ranges { + io::copy(&mut zstd::stream::read::Decoder::new(&data[range])?, buf)?; } } - buf.write_all(&decompressed)?; + Compression::LZ4 => { + let mut decompressed = vec![0; self.uncompressed as usize]; + for (decomp_chunk, comp_range) in + decompressed.chunks_mut(chunk_size).zip(ranges) + { + lz4_flex::block::decompress_into(&data[comp_range], decomp_chunk) + .map_err(|_| Error::DecompressionFailed(Compression::LZ4))?; + } + buf.write_all(&decompressed)?; + } + #[cfg(feature = "oodle")] + Compression::Oodle => { + let mut decompressed = vec![0; self.uncompressed as usize]; + for (decomp_chunk, comp_range) in + decompressed.chunks_mut(chunk_size).zip(ranges) + { + let out = + oodle_loader::oodle()?.decompress(&data[comp_range], decomp_chunk); + if out == 0 { + return Err(Error::DecompressionFailed(Compression::Oodle)); + } + } + buf.write_all(&decompressed)?; + } + #[cfg(not(feature = "oodle"))] + Compression::Oodle => return Err(super::Error::Oodle), } - #[cfg(not(feature = "oodle"))] - Compression::Oodle => return Err(super::Error::Oodle), - }, + } } buf.flush()?; Ok(()) From aaa16b780024e5e59119340f550bbe3f46791018 Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Thu, 30 Jan 2025 19:25:46 -0600 Subject: [PATCH 21/22] Silence oodle logging --- oodle_loader/src/lib.rs | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/oodle_loader/src/lib.rs b/oodle_loader/src/lib.rs index e5c87f1..0bfbca9 100644 --- a/oodle_loader/src/lib.rs +++ b/oodle_loader/src/lib.rs @@ -90,6 +90,8 @@ mod oodle_lz { pub type GetCompressedBufferSizeNeeded = unsafe extern "system" fn(compressor: Compressor, rawSize: usize) -> usize; + + pub type SetPrintf = unsafe extern "system" fn(printf: *const ()); } static OODLE_VERSION: &str = "2.9.10"; @@ -187,8 +189,23 @@ pub struct Oodle { compress: oodle_lz::Compress, decompress: oodle_lz::Decompress, get_compressed_buffer_size_needed: oodle_lz::GetCompressedBufferSizeNeeded, + set_printf: oodle_lz::SetPrintf, } impl Oodle { + fn new(lib: libloading::Library) -> Result { + unsafe { + let res = Oodle { + compress: *lib.get(b"OodleLZ_Compress")?, + decompress: *lib.get(b"OodleLZ_Decompress")?, + get_compressed_buffer_size_needed: *lib + .get(b"OodleLZ_GetCompressedBufferSizeNeeded")?, + set_printf: *lib.get(b"OodleCore_Plugins_SetPrintf")?, + _library: lib, + }; + (res.set_printf)(std::ptr::null()); // silence oodle logging + Ok(res) + } + } pub fn compress( &self, input: &[u8], @@ -255,14 +272,7 @@ fn load_oodle() -> Result { let path = fetch_oodle()?; unsafe { let library = libloading::Library::new(path)?; - - Ok(Oodle { - compress: *library.get(b"OodleLZ_Compress")?, - decompress: *library.get(b"OodleLZ_Decompress")?, - get_compressed_buffer_size_needed: *library - .get(b"OodleLZ_GetCompressedBufferSizeNeeded")?, - _library: library, - }) + Oodle::new(library) } } From 71d06644746134bd59ed1467bae1856704d2f8bb Mon Sep 17 00:00:00 2001 From: Truman Kilen Date: Thu, 30 Jan 2025 19:29:44 -0600 Subject: [PATCH 22/22] Increase compression block size --- repak/src/data.rs | 2 +- repak/src/entry.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/repak/src/data.rs b/repak/src/data.rs index cb71aac..37a3cac 100644 --- a/repak/src/data.rs +++ b/repak/src/data.rs @@ -153,7 +153,7 @@ where Some(compression) => { // https://github.com/EpicGames/UnrealEngine/commit/3aad0ff7976be1073005dca2c1282af548b45d89 // Block size must fit into flags field or it may cause unreadable paks for earlier Unreal Engine versions - compression_block_size = 0x10000; + compression_block_size = 0x3e << 11; // max possible block size let mut compressed_size = 0; let mut blocks = vec![]; for chunk in data.as_ref().chunks(compression_block_size as usize) { diff --git a/repak/src/entry.rs b/repak/src/entry.rs index a5fd649..73b514e 100644 --- a/repak/src/entry.rs +++ b/repak/src/entry.rs @@ -282,6 +282,11 @@ impl Entry { let is_uncompressed_size_32_bit_safe = self.uncompressed <= u32::MAX as u64; let is_offset_32_bit_safe = self.offset <= u32::MAX as u64; + assert!( + compression_blocks_count < 0x10_000, + "compression blocks count fits in 16 bits" + ); + let flags = (compression_block_size) | (compression_blocks_count << 6) | ((self.is_encrypted() as u32) << 22)