From 5417650b8527357a36390f92bad994153adee5ff Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 20 Jul 2025 14:57:25 -0700 Subject: [PATCH] work on writing compressed --- rust/pspp/src/sys/raw.rs | 128 +------------ rust/pspp/src/sys/raw/records.rs | 296 +++++++++++++++++++++++-------- rust/pspp/src/sys/write.rs | 200 +++++++++++++++++---- 3 files changed, 404 insertions(+), 220 deletions(-) diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 23dd35d94b..f37c5a7faf 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -23,7 +23,6 @@ use crate::{ data::{Case, Datum, RawStr, RawString}, dictionary::{VarType, VarWidth}, endian::{Endian, Parse, ToBytes}, - format::DisplayPlainF64, identifier::{Error as IdError, Identifier}, sys::{ encoding::{default_encoding, get_encoding, Error as EncodingError}, @@ -38,7 +37,8 @@ use crate::{ RawVariableSetRecord, RawVeryLongStringsRecord, ValueLabelRecord, ValueLabelWarning, VarDisplayRecord, VariableAttributesRecord, VariableDisplayWarning, VariableRecord, VariableSetRecord, VariableSetWarning, VariableWarning, VeryLongStringWarning, - VeryLongStringsRecord, ZHeader, ZTrailer, ZlibTrailerWarning, + VeryLongStringsRecord, ZHeader, ZHeaderError, ZTrailer, ZTrailerError, + ZlibTrailerWarning, }, }, }; @@ -235,123 +235,13 @@ pub enum ErrorDetails { n_chunks: usize, }, - /// Impossible ztrailer_offset {0:#x}. - #[error("Impossible ztrailer_offset {0:#x}.")] - ImpossibleZTrailerOffset( - /// `ztrailer_offset` - u64, - ), - - /// ZLIB header's zlib_offset is {actual:#x} instead of expected - /// {expected:#x}. - #[error("ZLIB header's zlib_offset is {actual:#x} instead of expected {expected:#x}.")] - UnexpectedZHeaderOffset { - /// Actual `zlib_offset`. - actual: u64, - /// Expected `zlib_offset`. - expected: u64, - }, - - /// Invalid ZLIB trailer length {0}. - #[error("Invalid ZLIB trailer length {0}.")] - InvalidZTrailerLength( - /// ZLIB trailer length. - u64, - ), - - /// ZLIB trailer bias {actual} is not {} as expected from file header bias. - #[ - error( - "ZLIB trailer bias {actual} is not {} as expected from file header bias.", - DisplayPlainF64(*expected) - )] - WrongZlibTrailerBias { - /// ZLIB trailer bias read from file. - actual: i64, - /// Expected ZLIB trailer bias. - expected: f64, - }, - - /// ZLIB trailer \"zero\" field has nonzero value {0}. - #[error("ZLIB trailer \"zero\" field has nonzero value {0}.")] - WrongZlibTrailerZero( - /// Actual value that should have been zero. - u64, - ), - - /// ZLIB trailer specifies unexpected {0}-byte block size. - #[error("ZLIB trailer specifies unexpected {0}-byte block size.")] - WrongZlibTrailerBlockSize( - /// Block size read from file. - u32, - ), - - /// Block count in ZLIB trailer differs from expected block count calculated - /// from trailer length. - #[error( - "Block count {n_blocks} in ZLIB trailer differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}." - )] - BadZlibTrailerNBlocks { - /// Number of blocks. - n_blocks: u32, - /// Expected number of blocks. - expected_n_blocks: u64, - /// ZLIB trailer length in bytes. - ztrailer_len: u64, - }, + /// Error reading a [ZHeader]. + #[error("Error reading ZLIB header: {0}")] + ZHeader(#[from] ZHeaderError), - /// ZLIB block descriptor reported uncompressed data offset different from - /// expected. - #[error( - "ZLIB block descriptor {index} reported uncompressed data offset {actual:#x}, when {expected:#x} was expected." - )] - ZlibTrailerBlockWrongUncmpOfs { - /// Block descriptor index. - index: usize, - /// Actual uncompressed data offset. - actual: u64, - /// Expected uncompressed data offset. - expected: u64, - }, - - /// ZLIB block descriptor {index} reported compressed data offset - /// {actual:#x}, when {expected:#x} was expected. - #[error( - "ZLIB block descriptor {index} reported compressed data offset {actual:#x}, when {expected:#x} was expected." - )] - ZlibTrailerBlockWrongCmpOfs { - /// Block descriptor index. - index: usize, - /// Actual compressed data offset. - actual: u64, - /// Expected compressed data offset. - expected: u64, - }, - - /// ZLIB block descriptor {index} reports compressed size {compressed_size} - /// and uncompressed size {uncompressed_size}. - #[error( - "ZLIB block descriptor {index} reports compressed size {compressed_size} and uncompressed size {uncompressed_size}." - )] - ZlibExpansion { - /// Block descriptor index. - index: usize, - /// Compressed size. - compressed_size: u32, - /// Uncompressed size. - uncompressed_size: u32, - }, - - /// ZLIB trailer at unexpected offset. - #[error( - "ZLIB trailer is at offset {actual:#x} but {expected:#x} would be expected from block descriptors." - )] - ZlibTrailerOffsetInconsistency { - /// Expected offset. - expected: u64, - /// Actual offset. - actual: u64, - }, + /// Error reading a [ZTrailer]. + #[error("Error reading ZLIB trailer: {0}")] + ZTrailer(#[from] ZTrailerError), /// File metadata says it contains {expected} cases, but {actual} cases were read. #[error("File metadata says it contains {expected} cases, but {actual} cases were read.")] @@ -1350,7 +1240,7 @@ where self.0.reader.as_mut().unwrap(), self.0.header.endian, self.0.header.bias, - zheader, + &zheader.inner, &mut self.0.warn, ) { Ok(None) => { diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index feddf3fa70..d0edb810fe 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -5,7 +5,7 @@ use std::{ borrow::Cow, collections::BTreeMap, - fmt::{Debug, Formatter}, + fmt::{Debug, Display, Formatter}, io::{Cursor, ErrorKind, Read, Seek, SeekFrom}, ops::Range, str::from_utf8, @@ -18,7 +18,7 @@ use crate::{ VarWidth, }, endian::{Endian, Parse}, - format::{Format, Type}, + format::{DisplayPlainF64, Format, Type}, identifier::{Error as IdError, Identifier}, sys::{ raw::{ @@ -29,7 +29,7 @@ use crate::{ }, }; -use binrw::{BinRead, BinWrite}; +use binrw::{binrw, BinRead, BinWrite, Error as BinError}; use clap::ValueEnum; use itertools::Itertools; use thiserror::Error as ThisError; @@ -42,6 +42,7 @@ pub enum Compression { /// [ZLIB] compression. /// /// [ZLIB]: https://www.zlib.net/ + #[value(name = "zlib", help = "ZLIB space-efficient compression")] ZLib, } @@ -2368,6 +2369,12 @@ pub struct ZHeader { /// File offset to the start of the record. pub offset: u64, + /// Raw header. + pub inner: RawZHeader, +} + +#[derive(Clone, Debug, BinRead, BinWrite)] +pub struct RawZHeader { /// File offset to the ZLIB data header. pub zheader_offset: u64, @@ -2385,37 +2392,71 @@ impl ZHeader { R: Read + Seek, { let offset = r.stream_position()?; - let zheader_offset: u64 = endian.parse(read_bytes(r)?); - let ztrailer_offset: u64 = endian.parse(read_bytes(r)?); - let ztrailer_len: u64 = endian.parse(read_bytes(r)?); + let inner = RawZHeader::read_options(r, endian, ()).map_err(|e| Error { + offsets: Some(offset..offset + 24), + details: ZHeaderError::from(e).into(), + })?; - if zheader_offset != offset { - Err(ErrorDetails::UnexpectedZHeaderOffset { - actual: zheader_offset, + if inner.zheader_offset != offset { + Err(ZHeaderError::UnexpectedZHeaderOffset { + actual: inner.zheader_offset, expected: offset, - }) - } else if ztrailer_offset < offset { - Err(ErrorDetails::ImpossibleZTrailerOffset(ztrailer_offset)) - } else if ztrailer_len < 24 || ztrailer_len % 24 != 0 { - Err(ErrorDetails::InvalidZTrailerLength(ztrailer_len)) + } + .into()) + } else if inner.ztrailer_offset < offset { + Err(ZHeaderError::ImpossibleZTrailerOffset(inner.ztrailer_offset).into()) + } else if inner.ztrailer_len < 24 || inner.ztrailer_len % 24 != 0 { + Err(ZHeaderError::InvalidZTrailerLength(inner.ztrailer_len).into()) } else { - Ok(ZHeader { - offset, - zheader_offset, - ztrailer_offset, - ztrailer_len, - }) + Ok(ZHeader { offset, inner }) } .map_err(|details| Error::new(Some(offset..offset + 12), details)) } } +/// Error reading a [ZHeader]. +#[derive(ThisError, Debug)] +pub enum ZHeaderError { + #[error("{}", DisplayBinError(&.0, "ZLIB header"))] + BinError(#[from] BinError), + + /// Impossible ztrailer_offset {0:#x}. + #[error("Impossible ztrailer_offset {0:#x}.")] + ImpossibleZTrailerOffset( + /// `ztrailer_offset` + u64, + ), + + /// ZLIB header's zlib_offset is {actual:#x} instead of expected + /// {expected:#x}. + #[error("ZLIB header's zlib_offset is {actual:#x} instead of expected {expected:#x}.")] + UnexpectedZHeaderOffset { + /// Actual `zlib_offset`. + actual: u64, + /// Expected `zlib_offset`. + expected: u64, + }, + + /// Invalid ZLIB trailer length {0}. + #[error("Invalid ZLIB trailer length {0}.")] + InvalidZTrailerLength( + /// ZLIB trailer length. + u64, + ), +} + /// A ZLIB trailer in a system file. #[derive(Clone, Debug)] pub struct ZTrailer { /// File offset to the start of the record. pub offset: u64, + pub inner: RawZTrailer, +} + +#[binrw] +#[derive(Clone, Debug)] +pub struct RawZTrailer { /// Compression bias as a negative integer, e.g. -100. pub int_bias: i64, @@ -2426,10 +2467,21 @@ pub struct ZTrailer { /// `0x3ff000` has been observed so far. pub block_size: u32, + /// Number of blocks. + #[bw(calc(blocks.len() as u32))] + pub n_blocks: u32, + /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them. + #[br(count = n_blocks)] pub blocks: Vec, } +impl RawZTrailer { + pub fn len(&self) -> usize { + 24 + self.blocks.len() * 24 + } +} + /// Warning for a ZLIB trailer record. #[derive(ThisError, Debug)] pub enum ZlibTrailerWarning { @@ -2461,7 +2513,7 @@ pub enum ZlibTrailerWarning { } /// A ZLIB block descriptor in a system file. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, BinRead, BinWrite)] pub struct ZBlock { /// Offset of block of data if simple compression were used. pub uncompressed_ofs: u64, @@ -2479,15 +2531,6 @@ pub struct ZBlock { } impl ZBlock { - fn read(r: &mut R, endian: Endian) -> Result { - Ok(ZBlock { - uncompressed_ofs: endian.parse(read_bytes(r)?), - compressed_ofs: endian.parse(read_bytes(r)?), - uncompressed_size: endian.parse(read_bytes(r)?), - compressed_size: endian.parse(read_bytes(r)?), - }) - } - /// Returns true if the uncompressed and compressed sizes are plausible. /// /// [zlib Technical Details] says that the maximum expansion from @@ -2502,6 +2545,119 @@ impl ZBlock { } } +struct DisplayBinError<'a>(&'a BinError, &'static str); + +impl<'a> Display for DisplayBinError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if self.0.is_eof() { + write!(f, "Unexpected end-of-file reading {}", self.1) + } else { + write!(f, "Error reading {}: {}", self.1, self.0.root_cause()) + } + } +} + +/// Error reading a [ZTrailer]. +#[derive(ThisError, Debug)] +pub enum ZTrailerError { + #[error("{}", DisplayBinError(&.0, "ZLIB trailer"))] + BinError(#[from] BinError), + + /// ZLIB trailer bias {actual} is not {} as expected from file header bias. + #[ + error( + "ZLIB trailer bias {actual} is not {} as expected from file header bias.", + DisplayPlainF64(*expected) + )] + WrongZlibTrailerBias { + /// ZLIB trailer bias read from file. + actual: i64, + /// Expected ZLIB trailer bias. + expected: f64, + }, + + /// ZLIB trailer \"zero\" field has nonzero value {0}. + #[error("ZLIB trailer \"zero\" field has nonzero value {0}.")] + WrongZlibTrailerZero( + /// Actual value that should have been zero. + u64, + ), + + /// ZLIB trailer specifies unexpected {0}-byte block size. + #[error("ZLIB trailer specifies unexpected {0}-byte block size.")] + WrongZlibTrailerBlockSize( + /// Block size read from file. + u32, + ), + + /// Block count in ZLIB trailer differs from expected block count calculated + /// from trailer length. + #[error( + "Block count {n_blocks} in ZLIB trailer differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}." + )] + BadZlibTrailerNBlocks { + /// Number of blocks. + n_blocks: usize, + /// Expected number of blocks. + expected_n_blocks: u64, + /// ZLIB trailer length in bytes. + ztrailer_len: u64, + }, + + /// ZLIB block descriptor reported uncompressed data offset different from + /// expected. + #[error( + "ZLIB block descriptor {index} reported uncompressed data offset {actual:#x}, when {expected:#x} was expected." + )] + ZlibTrailerBlockWrongUncmpOfs { + /// Block descriptor index. + index: usize, + /// Actual uncompressed data offset. + actual: u64, + /// Expected uncompressed data offset. + expected: u64, + }, + + /// ZLIB block descriptor {index} reported compressed data offset + /// {actual:#x}, when {expected:#x} was expected. + #[error( + "ZLIB block descriptor {index} reported compressed data offset {actual:#x}, when {expected:#x} was expected." + )] + ZlibTrailerBlockWrongCmpOfs { + /// Block descriptor index. + index: usize, + /// Actual compressed data offset. + actual: u64, + /// Expected compressed data offset. + expected: u64, + }, + + /// ZLIB block descriptor {index} reports compressed size {compressed_size} + /// and uncompressed size {uncompressed_size}. + #[error( + "ZLIB block descriptor {index} reports compressed size {compressed_size} and uncompressed size {uncompressed_size}." + )] + ZlibExpansion { + /// Block descriptor index. + index: usize, + /// Compressed size. + compressed_size: u32, + /// Uncompressed size. + uncompressed_size: u32, + }, + + /// ZLIB trailer at unexpected offset. + #[error( + "ZLIB trailer is at offset {actual:#x} but {expected:#x} would be expected from block descriptors." + )] + ZlibTrailerOffsetInconsistency { + /// Expected offset. + expected: u64, + /// Actual offset. + actual: u64, + }, +} + impl ZTrailer { /// Reads a ZLIB trailer from `reader` using `endian`. `bias` is the /// floating-point bias for confirmation against the trailer, and `zheader` @@ -2510,12 +2666,13 @@ impl ZTrailer { reader: &mut R, endian: Endian, bias: f64, - zheader: &ZHeader, + zheader: &RawZHeader, warn: &mut dyn FnMut(Warning), ) -> Result, Error> where R: Read + Seek, { + dbg!(); let start_offset = reader.stream_position()?; if reader .seek(SeekFrom::Start(zheader.ztrailer_offset)) @@ -2523,84 +2680,85 @@ impl ZTrailer { { return Ok(None); } - let int_bias = endian.parse(read_bytes(reader)?); - let zero = endian.parse(read_bytes(reader)?); - let block_size = endian.parse(read_bytes(reader)?); - let n_blocks: u32 = endian.parse(read_bytes(reader)?); - if int_bias as f64 != -bias { - Err(ErrorDetails::WrongZlibTrailerBias { - actual: int_bias, + let inner = RawZTrailer::read_options(reader, endian, ()).map_err(|e| Error { + offsets: Some(zheader.ztrailer_offset..zheader.ztrailer_offset + zheader.ztrailer_len), + details: ZTrailerError::from(e).into(), + })?; + if inner.int_bias as f64 != -bias { + Err(ZTrailerError::WrongZlibTrailerBias { + actual: inner.int_bias, expected: -bias, - }) - } else if zero != 0 { - Err(ErrorDetails::WrongZlibTrailerZero(zero)) - } else if block_size != 0x3ff000 { - Err(ErrorDetails::WrongZlibTrailerBlockSize(block_size)) + } + .into()) + } else if inner.zero != 0 { + Err(ZTrailerError::WrongZlibTrailerZero(inner.zero).into()) + } else if inner.block_size != 0x3ff000 { + Err(ZTrailerError::WrongZlibTrailerBlockSize(inner.block_size).into()) } else if let expected_n_blocks = (zheader.ztrailer_len - 24) / 24 - && n_blocks as u64 != expected_n_blocks + && inner.blocks.len() as u64 != expected_n_blocks { - Err(ErrorDetails::BadZlibTrailerNBlocks { - n_blocks, + Err(ZTrailerError::BadZlibTrailerNBlocks { + n_blocks: inner.blocks.len(), expected_n_blocks, ztrailer_len: zheader.ztrailer_len, - }) + } + .into()) } else { Ok(()) } .map_err(|details| Error::new(Some(start_offset..start_offset + 24), details))?; - let blocks = (0..n_blocks) - .map(|_| ZBlock::read(reader, endian)) - .collect::, _>>()?; - let mut expected_uncmp_ofs = zheader.zheader_offset; let mut expected_cmp_ofs = zheader.zheader_offset + 24; - for (index, block) in blocks.iter().enumerate() { + for (index, block) in inner.blocks.iter().enumerate() { let block_start = start_offset + 24 + 24 * index as u64; let block_offsets = block_start..block_start + 24; if block.uncompressed_ofs != expected_uncmp_ofs { - Err(ErrorDetails::ZlibTrailerBlockWrongUncmpOfs { + Err(ZTrailerError::ZlibTrailerBlockWrongUncmpOfs { index, actual: block.uncompressed_ofs, expected: expected_cmp_ofs, - }) + } + .into()) } else if block.compressed_ofs != expected_cmp_ofs { - Err(ErrorDetails::ZlibTrailerBlockWrongCmpOfs { + Err(ZTrailerError::ZlibTrailerBlockWrongCmpOfs { index, actual: block.compressed_ofs, expected: expected_cmp_ofs, - }) + } + .into()) } else if !block.has_plausible_sizes() { - Err(ErrorDetails::ZlibExpansion { + Err(ZTrailerError::ZlibExpansion { index, compressed_size: block.compressed_size, uncompressed_size: block.uncompressed_size, - }) + } + .into()) } else { Ok(()) } .map_err(|details| Error::new(Some(block_offsets.clone()), details))?; - if index < blocks.len() - 1 { - if block.uncompressed_size != block_size { + if index < inner.blocks.len() - 1 { + if block.uncompressed_size != inner.block_size { warn(Warning::new( Some(block_offsets), ZlibTrailerWarning::ZlibTrailerBlockWrongSize { index, actual: block.uncompressed_size, - expected: block_size, + expected: inner.block_size, }, )); } } else { - if block.uncompressed_size > block_size { + if block.uncompressed_size > inner.block_size { warn(Warning::new( Some(block_offsets), ZlibTrailerWarning::ZlibTrailerBlockTooBig { index, actual: block.uncompressed_size, - max_expected: block_size, + max_expected: inner.block_size, }, )); } @@ -2612,21 +2770,19 @@ impl ZTrailer { if expected_cmp_ofs != zheader.ztrailer_offset { return Err(Error::new( - Some(start_offset..start_offset + 24 + 24 * n_blocks as u64), - ErrorDetails::ZlibTrailerOffsetInconsistency { + Some(start_offset..start_offset + 24 + 24 * inner.blocks.len() as u64), + ZTrailerError::ZlibTrailerOffsetInconsistency { expected: expected_cmp_ofs, actual: zheader.ztrailer_offset, - }, + } + .into(), )); } reader.seek(SeekFrom::Start(start_offset))?; Ok(Some(ZTrailer { offset: zheader.ztrailer_offset, - int_bias, - zero, - block_size, - blocks, + inner, })) } } diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 89901017de..0f3ba55896 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -5,7 +5,7 @@ use std::{ collections::HashMap, fmt::Write as _, fs::File, - io::{BufWriter, Cursor, Seek, Write}, + io::{BufWriter, Cursor, Error as IoError, ErrorKind, Seek, SeekFrom, Write}, iter::repeat_n, path::Path, }; @@ -13,6 +13,7 @@ use std::{ use binrw::{BinWrite, Endian, Error as BinError}; use chrono::Local; use encoding_rs::Encoding; +use flate2::{Compress, FlushCompress, Status}; use itertools::zip_eq; use smallvec::SmallVec; @@ -30,7 +31,7 @@ use crate::{ raw::{ records::{ Compression, FloatInfoRecord, RawFormat, RawHeader, RawIntegerInfoRecord, - RawVariableRecord, + RawVariableRecord, RawZHeader, RawZTrailer, ZBlock, }, Magic, }, @@ -93,23 +94,19 @@ impl WriteOptions { self, dictionary: &Dictionary, path: impl AsRef, - ) -> Result>, BinError> { + ) -> Result { self.write_writer(dictionary, BufWriter::new(File::create(path)?)) } /// Writes `dictionary` to `writer` in system file format. Returns a /// [Writer] that can be used for writing cases to the new file. - pub fn write_writer( - self, - dictionary: &Dictionary, - mut writer: W, - ) -> Result, BinError> + pub fn write_writer(self, dictionary: &Dictionary, mut writer: W) -> Result where - W: Write + Seek, + W: Write + Seek + 'static, { let mut dict_writer = DictionaryWriter::new(&self, &mut writer, dictionary); dict_writer.write()?; - Ok(Writer::new(self, dict_writer.case_vars, writer)) + Writer::new(self, dict_writer.case_vars, writer) } } @@ -800,22 +797,22 @@ impl CaseVar { } /// System file writer. -pub struct Writer { +pub struct Writer { compression: Option, case_vars: Vec, - inner: WriterInner, + inner: WriterInner, } -pub struct WriterInner { +pub struct WriterInner { opcodes: Vec, data: Vec, - inner: W, + inner: Box, } -impl WriterInner -where - W: Write + Seek, -{ +trait WriteSeek: Write + Seek {} +impl WriteSeek for T where T: Write + Seek {} + +impl WriterInner { fn flush_compressed(&mut self) -> Result<(), BinError> { if !self.opcodes.is_empty() { self.opcodes.resize(8, 0); @@ -835,29 +832,31 @@ where } } -impl Writer { - fn new(options: WriteOptions, case_vars: Vec, inner: W) -> Self { - Self { +impl Writer { + fn new(options: WriteOptions, case_vars: Vec, inner: W) -> Result + where + W: Write + Seek + 'static, + { + Ok(Self { compression: options.compression, case_vars, inner: WriterInner { opcodes: Vec::with_capacity(8), data: Vec::with_capacity(64), - inner, + inner: match options.compression { + Some(Compression::ZLib) => Box::new(ZlibWriter::new(inner)?), + _ => Box::new(inner), + }, }, - } + }) } } -impl Writer -where - W: Write + Seek, -{ +impl Writer { /// Writes `case` to the system file. pub fn write_case(&mut self, case: &Case) -> Result<(), BinError> { match self.compression { - Some(Compression::Simple) => self.write_case_compressed(case), - Some(Compression::ZLib) => todo!(), + Some(_) => self.write_case_compressed(case), None => self.write_case_uncompressed(case), } } @@ -874,7 +873,8 @@ where for segment in encoding { let data; (data, s) = s.split_at(segment.data_bytes); - (data, Pad::new(segment.padding_bytes, 0)).write_le(&mut self.inner.inner)?; + (data, Pad::new(segment.padding_bytes, 0)) + .write_le(&mut self.inner.inner)?; } } } @@ -895,7 +895,9 @@ where } else { self.inner.put_opcode(253)?; - number.write_le(&mut Cursor::new(&mut self.inner.data)).unwrap(); + number + .write_le(&mut Cursor::new(&mut self.inner.data)) + .unwrap(); } } }, @@ -934,3 +936,139 @@ where Ok(()) } } + +struct Block { + uncompressed_size: u64, + compressed_size: u64, +} +struct ZlibWriter +where + W: Write + Seek, +{ + header: RawZHeader, + trailer: RawZTrailer, + compress: Compress, + buf: Vec, + inner: W, +} + +impl ZlibWriter +where + W: Write + Seek, +{ + fn new(mut inner: W) -> Result { + let header = RawZHeader { + zheader_offset: inner.stream_position()?, + ztrailer_offset: 0, + ztrailer_len: 0, + }; + header.write_le(&mut inner)?; + Ok(Self { + header, + trailer: RawZTrailer { + int_bias: -BIAS as i64, + zero: 0, + block_size: ZBLOCK_SIZE as u32, + blocks: Vec::new(), + }, + compress: Compress::new(flate2::Compression::new(5), false), + buf: Vec::with_capacity(4096), + inner, + }) + } + fn try_finish(&mut self) -> Result<(), BinError> { + self.flush()?; + let ztrailer_offset = self.inner.stream_position()?; + self.trailer.write_le(&mut self.inner)?; + let header = RawZHeader { + zheader_offset: self.header.zheader_offset, + ztrailer_offset, + ztrailer_len: self.trailer.len() as u64, + }; + dbg!(&header); + self.inner.seek(SeekFrom::Start(header.zheader_offset))?; + header.write_le(&mut self.inner) + } + fn finish(mut self) -> Result<(), BinError> { + self.try_finish() + } +} + +impl Drop for ZlibWriter +where + W: Write + Seek, +{ + fn drop(&mut self) { + dbg!(); + let _ = self.try_finish(); + } +} + +const ZBLOCK_SIZE: u64 = 0x3ff000; + +impl Write for ZlibWriter +where + W: Write + Seek, +{ + fn write(&mut self, mut buf: &[u8]) -> Result { + let n = buf.len(); + while buf.len() > 0 { + if self.compress.total_in() >= ZBLOCK_SIZE { + self.flush()?; + } + + let chunk = buf + .len() + .min((ZBLOCK_SIZE - self.compress.total_in()) as usize); + let in_before = self.compress.total_in(); + self.buf.clear(); + self.compress + .compress_vec(&buf[..chunk], &mut self.buf, FlushCompress::None) + .unwrap(); + let consumed = self.compress.total_in() - in_before; + self.inner.write_all(&self.buf)?; + buf = &buf[consumed as usize..]; + } + Ok(n) + } + + fn flush(&mut self) -> std::io::Result<()> { + if self.compress.total_in() > 0 { + let mut status = Status::Ok; + while status == Status::Ok { + self.buf.clear(); + status = self + .compress + .compress_vec(&[], &mut self.buf, FlushCompress::Finish) + .unwrap(); + self.inner.write_all(&self.buf)?; + } + assert_eq!(status, Status::StreamEnd); + + self.trailer.blocks.push(ZBlock { + uncompressed_size: self.compress.total_in() as u32, + compressed_size: self.compress.total_out() as u32, + uncompressed_ofs: match self.trailer.blocks.last() { + Some(prev) => prev.uncompressed_ofs + prev.uncompressed_size as u64, + None => self.header.zheader_offset, + }, + compressed_ofs: match self.trailer.blocks.last() { + Some(prev) => prev.compressed_ofs + prev.compressed_size as u64, + None => self.header.zheader_offset + 24, + }, + }); + self.compress.reset(); + } + Ok(()) + } +} + +impl Seek for ZlibWriter +where + W: Write + Seek, +{ + fn seek(&mut self, _pos: std::io::SeekFrom) -> Result { + panic!(); + Err(IoError::from(ErrorKind::NotSeekable)) + } +} -- 2.30.2