#![allow(unused_variables)]
use endian::{Endian, Parse, ToBytes};
+//use flate2::bufread::ZlibDecoder;
use num::Integer;
use num_derive::FromPrimitive;
use std::{
collections::VecDeque,
- io::{BufReader, Error as IoError, Read, Seek},
+ io::{BufReader, Error as IoError, Read, Seek, SeekFrom},
};
use thiserror::Error;
#[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
CompressedStringExpected { offset: u64, case_ofs: u64 },
+
+ #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
+ BadZlibTrailerNBlocks { offset: u64, n_blocks: u32, expected_n_blocks: u64, ztrailer_len: u64 }
}
#[derive(Error, Debug)]
VarIndexes(VarIndexes),
Extension(Extension),
EndOfHeaders,
+ ZHeader(ZHeader),
+ ZTrailer(ZTrailer),
Case(Vec<Value>),
}
Headers(Endian, Option<Compression>),
Data(Endian),
CompressedData(Endian, VecDeque<u8>),
+ ZHeader(Endian),
+ ZTrailer { endian: Endian, ztrailer_ofs: u64, ztrailer_len: u64 },
+ //ZData,
End,
}
Some(Compression::Simple) => {
ReaderState::CompressedData(endian, VecDeque::new())
}
- _ => ReaderState::End,
+ Some(Compression::ZLib) => ReaderState::ZHeader(endian),
};
return Ok(Some(Record::EndOfHeaders));
}
});
}
}
- 253 => break Value::from_raw(
- var_type,
- read_bytes(&mut self.r)?,
- endian,
- ),
+ 253 => {
+ break Value::from_raw(var_type, read_bytes(&mut self.r)?, endian)
+ }
254 => match var_type {
VarType::String => break Value::String(*b" "), // XXX EBCDIC
VarType::Number => {
VarType::String => {
return Err(Error::CompressedNumberExpected {
offset: case_start,
- case_ofs: self.r.stream_position()? - case_start,})
+ case_ofs: self.r.stream_position()? - case_start,
+ })
}
- }
+ },
}
};
values.push(value);
}
Ok(Some(Record::Case(values)))
}
+ ReaderState::ZHeader(endian) => {
+ let zheader = read_zheader(&mut self.r, endian)?;
+ self.state = ReaderState::ZTrailer { endian, ztrailer_ofs: zheader.ztrailer_offset, ztrailer_len: zheader.ztrailer_len};
+ Ok(Some(Record::ZHeader(zheader)))
+ }
+ ReaderState::ZTrailer { endian, ztrailer_ofs, ztrailer_len } => {
+ //self.state = ReaderState::ZData;
+ match read_ztrailer(&mut self.r, endian, ztrailer_ofs, ztrailer_len)? {
+ Some(ztrailer) => {
+ Ok(Some(Record::ZTrailer(ztrailer)))
+ },
+ None => self._next()
+ }
+ }
+/*
+ ReaderState::ZData(zlib_decoder) => {
+ let zlib_decoder = zlib_decoder.unwrap_or_else(
+ },
+*/
ReaderState::End => Ok(None),
}
}
self.state = ReaderState::End;
None
}
- Ok(Some(record)) => {
- Some(Ok(record))
- }
+ Ok(Some(record)) => Some(Ok(record)),
Err(error) => {
self.state = ReaderState::End;
Some(Err(error))
fn read_variable_record<R: Read + Seek>(
r: &mut BufReader<R>,
- e: Endian,
+ endian: Endian,
) -> Result<Variable, Error> {
let offset = r.stream_position()?;
- let width: i32 = e.parse(read_bytes(r)?);
- let has_variable_label: u32 = e.parse(read_bytes(r)?);
- let missing_value_code: i32 = e.parse(read_bytes(r)?);
- let print_format: u32 = e.parse(read_bytes(r)?);
- let write_format: u32 = e.parse(read_bytes(r)?);
+ let width: i32 = endian.parse(read_bytes(r)?);
+ let has_variable_label: u32 = endian.parse(read_bytes(r)?);
+ let missing_value_code: i32 = endian.parse(read_bytes(r)?);
+ let print_format: u32 = endian.parse(read_bytes(r)?);
+ let write_format: u32 = endian.parse(read_bytes(r)?);
let name: [u8; 8] = read_bytes(r)?;
let label = match has_variable_label {
0 => None,
1 => {
- let len: u32 = e.parse(read_bytes(r)?);
+ let len: u32 = endian.parse(read_bytes(r)?);
let read_len = len.min(65535) as usize;
let label = Some(read_vec(r, read_len)?);
fn read_value_label_record<R: Read + Seek>(
r: &mut BufReader<R>,
- e: Endian,
+ endian: Endian,
) -> Result<ValueLabel, Error> {
let offset = r.stream_position()?;
- let n: u32 = e.parse(read_bytes(r)?);
+ let n: u32 = endian.parse(read_bytes(r)?);
if n > ValueLabel::MAX {
return Err(Error::BadNumberOfValueLabels {
offset,
let mut labels = Vec::new();
for _ in 0..n {
let value: [u8; 8] = read_bytes(r)?;
- let label_len: u8 = e.parse(read_bytes(r)?);
+ let label_len: u8 = endian.parse(read_bytes(r)?);
let label_len = label_len as usize;
let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
fn read_var_indexes_record<R: Read + Seek>(
r: &mut BufReader<R>,
- e: Endian,
+ endian: Endian,
) -> Result<VarIndexes, Error> {
let offset = r.stream_position()?;
- let n: u32 = e.parse(read_bytes(r)?);
+ let n: u32 = endian.parse(read_bytes(r)?);
if n > VarIndexes::MAX {
return Err(Error::BadNumberOfVarIndexes {
offset,
}
let mut var_indexes = Vec::with_capacity(n as usize);
for _ in 0..n {
- var_indexes.push(e.parse(read_bytes(r)?));
+ var_indexes.push(endian.parse(read_bytes(r)?));
}
Ok(VarIndexes {
fn read_document_record<R: Read + Seek>(
r: &mut BufReader<R>,
- e: Endian,
+ endian: Endian,
) -> Result<Document, Error> {
let offset = r.stream_position()?;
- let n: u32 = e.parse(read_bytes(r)?);
+ let n: u32 = endian.parse(read_bytes(r)?);
match n {
0..=DOC_MAX_LINES => {
let pos = r.stream_position()?;
fn read_extension_record<R: Read + Seek>(
r: &mut BufReader<R>,
- e: Endian,
+ endian: Endian,
) -> Result<Extension, Error> {
- let subtype = e.parse(read_bytes(r)?);
+ let subtype = endian.parse(read_bytes(r)?);
let offset = r.stream_position()?;
- let size: u32 = e.parse(read_bytes(r)?);
- let count = e.parse(read_bytes(r)?);
+ let size: u32 = endian.parse(read_bytes(r)?);
+ let count = endian.parse(read_bytes(r)?);
let Some(product) = size.checked_mul(count) else {
return Err(Error::ExtensionRecordTooLarge {
offset,
})
}
-struct ZHeader {
+pub struct ZHeader {
/// File offset to the start of the record.
- offset: u64,
+ pub offset: u64,
/// File offset to the ZLIB data header.
- zheader_offset: u64,
+ pub zheader_offset: u64,
/// File offset to the ZLIB trailer.
- ztrailer_offset: u64,
+ pub ztrailer_offset: u64,
/// Length of the ZLIB trailer in bytes.
- ztrailer_len: u64,
+ pub ztrailer_len: u64,
}
-fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, e: Endian) -> Result<ZHeader, Error> {
+fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, endian: Endian) -> Result<ZHeader, Error> {
let offset = r.stream_position()?;
- let zheader_offset: u64 = e.parse(read_bytes(r)?);
- let ztrailer_offset: u64 = e.parse(read_bytes(r)?);
- let ztrailer_len: u64 = e.parse(read_bytes(r)?);
+ let zheader_offset: u64 = endian.parse(read_bytes(r)?);
+ let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
+ let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
Ok(ZHeader {
offset,
})
}
+pub struct ZTrailer {
+ /// File offset to the start of the record.
+ pub offset: u64,
+
+ /// Compression bias as a negative integer, e.g. -100.
+ pub int_bias: i64,
+
+ /// Always observed as zero.
+ pub zero: u64,
+
+ /// Uncompressed size of each block, except possibly the last. Only
+ /// `0x3ff000` has been observed so far.
+ pub block_size: u32,
+
+ /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
+ pub blocks: Vec<ZBlock>,
+}
+
+pub struct ZBlock {
+ /// Offset of block of data if simple compression were used.
+ pub uncompressed_ofs: u64,
+
+ /// Actual offset within the file of the compressed data block.
+ pub compressed_ofs: u64,
+
+ /// The number of bytes in this data block after decompression. This is
+ /// `block_size` in every data block but the last, which may be smaller.
+ pub uncompressed_size: u32,
+
+ /// The number of bytes in this data block, as stored compressed in this
+ /// file.
+ pub compressed_size: u32,
+}
+
+fn read_ztrailer<R: Read + Seek>(r: &mut BufReader<R>, endian: Endian, ztrailer_ofs: u64, ztrailer_len: u64) -> Result<Option<ZTrailer>, Error> {
+ let start_offset = r.stream_position()?;
+ if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
+ return Ok(None)
+ }
+ let int_bias = endian.parse(read_bytes(r)?);
+ let zero = endian.parse(read_bytes(r)?);
+ let block_size = endian.parse(read_bytes(r)?);
+ let n_blocks: u32 = endian.parse(read_bytes(r)?);
+ let expected_n_blocks = (ztrailer_len - 24) / 24;
+ if n_blocks as u64 != expected_n_blocks {
+ return Err(Error::BadZlibTrailerNBlocks { offset: ztrailer_ofs, n_blocks, expected_n_blocks, ztrailer_len })
+ }
+ let mut blocks = Vec::with_capacity(n_blocks as usize);
+ for _ in 0..n_blocks {
+ let uncompressed_ofs = endian.parse(read_bytes(r)?);
+ let compressed_ofs = endian.parse(read_bytes(r)?);
+ let uncompressed_size = endian.parse(read_bytes(r)?);
+ let compressed_size = endian.parse(read_bytes(r)?);
+ blocks.push(ZBlock { uncompressed_ofs, compressed_ofs, uncompressed_size, compressed_size });
+ }
+ r.seek(SeekFrom::Start(start_offset))?;
+ Ok(Some(ZTrailer { offset: ztrailer_ofs, int_bias, zero, block_size, blocks }))
+}
+
fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
let mut buf = [0; N];
let n = r.read(&mut buf)?;