use crate::{
dictionary::{Attributes, Datum, VarWidth},
endian::{Endian, Parse, ToBytes},
- format::DisplayPlain,
+ format::{DisplayPlain, DisplayPlainF64},
identifier::{Error as IdError, Identifier},
sys::encoding::{default_encoding, get_encoding, Error as EncodingError},
};
},
#[error(
- "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
+ "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes and {n_chunks} compression chunks into a compressed case."
)]
- EofInCompressedCase { offset: u64, case_ofs: u64 },
+ EofInCompressedCase {
+ offset: u64,
+ case_ofs: u64,
+ n_chunks: usize,
+ },
#[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
PartialCompressedCase { offset: u64, case_ofs: u64 },
#[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
CompressedStringExpected { offset: u64, case_ofs: u64 },
+ #[error("Impossible ztrailer_offset {0:#x}.")]
+ ImpossibleZTrailerOffset(u64),
+
+ #[error("ZLIB header's zlib_offset is {actual:#x} instead of expected {expected:#x}.")]
+ UnexpectedZHeaderOffset { actual: u64, expected: u64 },
+
+ #[error("Invalid ZLIB trailer length {0}.")]
+ InvalidZTrailerLength(u64),
+
+ #[error(
+ "ZLIB trailer bias {actual} is not {} as expected from file header bias.",
+ DisplayPlainF64(*expected)
+ )]
+ WrongZlibTrailerBias { actual: i64, expected: f64 },
+
+ #[error("ZLIB trailer \"zero\" field has nonzero value {0}.")]
+ WrongZlibTrailerZero(u64),
+
+ #[error("ZLIB trailer specifies unexpected {0}-byte block size.")]
+ WrongZlibTrailerBlockSize(u32),
+
#[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
BadZlibTrailerNBlocks {
offset: u64,
ztrailer_len: u64,
},
+ #[error("ZLIB block descriptor {index} reported uncompressed data offset {actual:#x}, when {expected:#x} was expected.")]
+ ZlibTrailerBlockWrongUncmpOfs {
+ index: usize,
+ actual: u64,
+ expected: u64,
+ },
+
+ #[error("ZLIB block descriptor {index} reported compressed data offset {actual:#x}, when {expected:#x} was expected.")]
+ ZlibTrailerBlockWrongCmpOfs {
+ index: usize,
+ actual: u64,
+ expected: u64,
+ },
+
+ #[error("ZLIB block descriptor {index} reports compressed size {compressed_size} and uncompressed size {uncompressed_size}.")]
+ ZlibExpansion {
+ index: usize,
+ compressed_size: u32,
+ uncompressed_size: u32,
+ },
+
+ #[error("ZLIB trailer is at offset {zheader:#x} but {descriptors:#x} would be expected from block descriptors.")]
+ ZlibTrailerOffsetInconsistency { descriptors: u64, zheader: u64 },
+
#[error("File metadata says it contains {expected} cases, but {actual} cases were read.")]
WrongNumberOfCases { expected: u64, actual: u64 },
#[error("Compression bias is {0} instead of the usual values of 0 or 100.")]
UnexpectedBias(f64),
+ #[error("ZLIB block descriptor {index} reported block size {actual:#x}, when {expected:#x} was expected.")]
+ ZlibTrailerBlockWrongSize {
+ index: usize,
+ actual: u32,
+ expected: u32,
+ },
+
+ #[error("ZLIB block descriptor {index} reported block size {actual:#x}, when at most {max_expected:#x} was expected.")]
+ ZlibTrailerBlockTooBig {
+ index: usize,
+ actual: u32,
+ max_expected: u32,
+ },
+
#[error("Details TBD (raw)")]
TBD,
}
reader: &mut R,
case_vars: &[CaseVar],
case_start: u64,
+ n_chunks: usize,
) -> Result<Option<Vec<Datum>>, Error> {
let offset = reader.stream_position()?;
- if offset == case_start {
- Ok(None)
- } else {
- Err(Error::EofInCase {
- offset,
+ if n_chunks > 0 {
+ Err(Error::EofInCompressedCase {
case_ofs: offset - case_start,
- case_len: case_vars.iter().map(CaseVar::bytes).sum(),
+ n_chunks,
+ offset,
})
+ } else {
+ Ok(None)
}
}
let case_start = reader.stream_position()?;
+ let mut n_chunks = 0;
let mut values = Vec::with_capacity(case_vars.len());
for var in case_vars {
match var {
CaseVar::Numeric => {
let Some(raw) = Self::read_compressed_chunk(reader, codes, endian, bias)?
else {
- return eof(reader, case_vars, case_start);
+ return eof(reader, case_vars, case_start, n_chunks);
};
+ n_chunks += 1;
values.push(Datum::Number(endian.parse(raw)));
}
CaseVar::String { width, encoding } => {
let Some(raw) =
Self::read_compressed_chunk(reader, codes, endian, bias)?
else {
- return eof(reader, case_vars, case_start);
+ return eof(reader, case_vars, case_start, n_chunks);
};
let n_data = data_bytes.min(8);
datum.extend_from_slice(&raw[..n_data]);
data_bytes -= n_data;
padding_bytes -= 8 - n_data;
+ n_chunks += 1;
}
}
values.push(Datum::String(RawString(datum)));
Start,
Headers,
ZlibHeader,
- ZlibTrailer {
- ztrailer_offset: u64,
- ztrailer_len: u64,
- },
+ ZlibTrailer(ZHeader),
End,
}
Some(Ok(record))
}
ReaderState::ZlibHeader => {
- let zheader =
- match ZHeader::read(self.0.reader.as_mut().unwrap(), self.0.header.endian) {
- Ok(zheader) => zheader,
- Err(error) => return Some(Err(error)),
- };
- self.0.state = ReaderState::ZlibTrailer {
- ztrailer_offset: zheader.ztrailer_offset,
- ztrailer_len: zheader.ztrailer_len,
+ let zheader = match ZHeader::read(
+ self.0.reader.as_mut().unwrap(),
+ self.0.header.endian,
+ &mut self.0.warn,
+ ) {
+ Ok(zheader) => zheader,
+ Err(error) => return Some(Err(error)),
};
+ self.0.state = ReaderState::ZlibTrailer(zheader.clone());
Some(Ok(Record::ZHeader(zheader)))
}
- ReaderState::ZlibTrailer {
- ztrailer_offset,
- ztrailer_len,
- } => {
+ ReaderState::ZlibTrailer(ref zheader) => {
match ZTrailer::read(
self.0.reader.as_mut().unwrap(),
self.0.header.endian,
- ztrailer_offset,
- ztrailer_len,
+ self.0.header.bias,
+ zheader,
+ &mut self.0.warn,
) {
Ok(None) => {
self.cases();
}
impl ZHeader {
- fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
+ fn read<R: Read + Seek>(
+ r: &mut R,
+ endian: Endian,
+ warn: &mut dyn FnMut(Warning),
+ ) -> Result<ZHeader, Error> {
let offset = r.stream_position()?;
let zheader_offset: u64 = endian.parse(read_bytes(r)?);
let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
+ if zheader_offset != offset {
+ return Err(Error::UnexpectedZHeaderOffset {
+ actual: zheader_offset,
+ expected: offset,
+ });
+ }
+
+ if ztrailer_offset < offset {
+ return Err(Error::ImpossibleZTrailerOffset(ztrailer_offset));
+ }
+
+ if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
+ return Err(Error::InvalidZTrailerLength(ztrailer_len));
+ }
+
Ok(ZHeader {
offset,
zheader_offset,
fn read<R: Read + Seek>(
reader: &mut R,
endian: Endian,
- ztrailer_ofs: u64,
- ztrailer_len: u64,
+ bias: f64,
+ zheader: &ZHeader,
+ warn: &mut dyn FnMut(Warning),
) -> Result<Option<ZTrailer>, Error> {
let start_offset = reader.stream_position()?;
- if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
+ if reader
+ .seek(SeekFrom::Start(zheader.ztrailer_offset))
+ .is_err()
+ {
return Ok(None);
}
let int_bias = endian.parse(read_bytes(reader)?);
+ if int_bias as f64 != -bias {
+ return Err(Error::WrongZlibTrailerBias {
+ actual: int_bias,
+ expected: -bias,
+ });
+ }
let zero = endian.parse(read_bytes(reader)?);
+ if zero != 0 {
+ return Err(Error::WrongZlibTrailerZero(zero));
+ }
let block_size = endian.parse(read_bytes(reader)?);
+ if block_size != 0x3ff000 {
+ return Err(Error::WrongZlibTrailerBlockSize(block_size));
+ }
let n_blocks: u32 = endian.parse(read_bytes(reader)?);
- let expected_n_blocks = (ztrailer_len - 24) / 24;
+ let expected_n_blocks = (zheader.ztrailer_len - 24) / 24;
if n_blocks as u64 != expected_n_blocks {
return Err(Error::BadZlibTrailerNBlocks {
- offset: ztrailer_ofs,
+ offset: zheader.ztrailer_offset,
n_blocks,
expected_n_blocks,
- ztrailer_len,
+ ztrailer_len: zheader.ztrailer_len,
});
}
let blocks = (0..n_blocks)
.map(|_| ZBlock::read(reader, endian))
.collect::<Result<Vec<_>, _>>()?;
+
+ let mut expected_uncmp_ofs = zheader.zheader_offset;
+ let mut expected_cmp_ofs = zheader.zheader_offset + 24;
+ for (index, block) in blocks.iter().enumerate() {
+ if block.uncompressed_ofs != expected_uncmp_ofs {
+ return Err(Error::ZlibTrailerBlockWrongUncmpOfs {
+ index,
+ actual: block.uncompressed_ofs,
+ expected: expected_cmp_ofs,
+ });
+ }
+ if block.compressed_ofs != expected_cmp_ofs {
+ return Err(Error::ZlibTrailerBlockWrongCmpOfs {
+ index,
+ actual: block.compressed_ofs,
+ expected: expected_cmp_ofs,
+ });
+ }
+ if index < blocks.len() - 1 {
+ if block.uncompressed_size != block_size {
+ warn(Warning::ZlibTrailerBlockWrongSize {
+ index,
+ actual: block.uncompressed_size,
+ expected: block_size,
+ });
+ }
+ } else {
+ if block.uncompressed_size > block_size {
+ warn(Warning::ZlibTrailerBlockTooBig {
+ index,
+ actual: block.uncompressed_size,
+ max_expected: block_size,
+ });
+ }
+ }
+ // http://www.zlib.net/zlib_tech.html says that the maximum
+ // expansion from compression, with worst-case parameters, is 13.5%
+ // plus 11 bytes. This code checks for an expansion of more than
+ // 14.3% plus 11 bytes.
+ if block.compressed_size > block.uncompressed_size + block.uncompressed_size / 7 + 11 {
+ return Err(Error::ZlibExpansion {
+ index,
+ compressed_size: block.compressed_size,
+ uncompressed_size: block.uncompressed_size,
+ });
+ }
+
+ expected_cmp_ofs += block.compressed_size as u64;
+ expected_uncmp_ofs += block.uncompressed_size as u64;
+ }
+
+ if expected_cmp_ofs != zheader.ztrailer_offset {
+ return Err(Error::ZlibTrailerOffsetInconsistency {
+ descriptors: expected_cmp_ofs,
+ zheader: zheader.ztrailer_offset,
+ });
+ }
+
reader.seek(SeekFrom::Start(start_offset))?;
Ok(Some(ZTrailer {
- offset: ztrailer_ofs,
+ offset: zheader.ztrailer_offset,
int_bias,
zero,
block_size,