iter::repeat_n,
mem::take,
num::NonZeroU8,
+ ops::Range,
};
use thiserror::Error as ThisError;
/// An error encountered reading raw system file records.
///
/// Any error prevents reading further data from the system file.
+#[derive(Debug)]
+pub struct Error {
+ /// Range of file offsets where the error occurred.
+ offsets: Option<Range<u64>>,
+
+ /// Details of the error.
+ details: ErrorDetails,
+}
+
+impl std::error::Error for Error {}
+
+impl Error {
+ pub fn new(offsets: Option<Range<u64>>, details: ErrorDetails) -> Self {
+ Self { offsets, details }
+ }
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ if let Some(offsets) = &self.offsets
+ && !offsets.is_empty()
+ {
+ if offsets.end > offsets.start.wrapping_add(1) {
+ write!(
+ f,
+ "Error at file offsets {:#x} to {:#x}: ",
+ offsets.start, offsets.end
+ )?;
+ } else {
+ write!(f, "Error at file offset {:#x}: ", offsets.start)?;
+ }
+ }
+ write!(f, "{}", &self.details)
+ }
+}
+
+impl From<IoError> for Error {
+ fn from(value: IoError) -> Self {
+ Self::new(None, value.into())
+ }
+}
+
+/// Details of an [Error].
#[derive(ThisError, Debug)]
-pub enum Error {
+pub enum ErrorDetails {
#[error("Not an SPSS system file")]
NotASystemFile,
InvalidZsavCompression(u32),
#[error(
- "Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}."
+ "Document record has document line count ({n}) greater than the maximum number {max}."
)]
- BadDocumentLength { offset: u64, n: usize, max: usize },
+ BadDocumentLength { n: usize, max: usize },
- #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
- BadRecordType { offset: u64, rec_type: u32 },
+ #[error("Unrecognized record type {0}.")]
+ BadRecordType(u32),
- #[error(
- "In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255."
- )]
- BadVariableWidth { start_offset: u64, width: i32 },
+ #[error("Variable width {0} in variable record is not in the valid range -1 to 255.")]
+ BadVariableWidth(i32),
- #[error(
- "In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1."
- )]
- BadVariableLabelCode {
- start_offset: u64,
- code_offset: u64,
- code: u32,
- },
+ #[error("In variable record, variable label code {0} is not 0 or 1.")]
+ BadVariableLabelCode(u32),
- #[error("At offset {offset:#x}, missing value code ({code}) is not -3, -2, 0, 1, 2, or 3.")]
- BadMissingValueCode { offset: u64, code: i32 },
+ #[error("Missing value code ({0}) is not -3, -2, 0, 1, 2, or 3.")]
+ BadMissingValueCode(i32),
- #[error(
- "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
- )]
- BadNumericMissingValueCode { offset: u64, code: i32 },
+ #[error("Numeric missing value code ({0}) is not -3, -2, 0, 1, 2, or 3.")]
+ BadNumericMissingValueCode(i32),
- #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
- BadStringMissingValueCode { offset: u64, code: i32 },
+ #[error("String missing value code ({0}) is not 0, 1, 2, or 3.")]
+ BadStringMissingValueCode(i32),
- #[error(
- "At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}."
- )]
- BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
+ #[error("Number of value labels ({n}) is greater than the maximum number {max}.")]
+ BadNumberOfValueLabels { n: u32, max: u32 },
#[error(
- "At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record"
+ "Following value label record, found record type {0} instead of expected type 4 for variable index record"
)]
- ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
+ ExpectedVarIndexRecord(u32),
#[error(
- "At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max})."
+ "Number of variables indexes for value labels ({n}) is greater than the maximum number ({max})."
)]
- TooManyVarIndexes { offset: u64, n: u32, max: u32 },
+ TooManyVarIndexes { n: u32, max: u32 },
#[error(
- "At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements."
+ "Record type 7 subtype {subtype} is too large with element size {size} and {count} elements."
)]
- ExtensionRecordTooLarge {
- offset: u64,
- subtype: u32,
- size: u32,
- count: u32,
- },
+ ExtensionRecordTooLarge { subtype: u32, size: u32, count: u32 },
- #[error(
- "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case."
- )]
- EofInCase {
- offset: u64,
- case_ofs: u64,
- case_len: usize,
- },
+ #[error("Unexpected end of file {case_ofs} bytes into a {case_len}-byte case.")]
+ EofInCase { case_ofs: u64, case_len: usize },
#[error(
- "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes and {n_chunks} compression chunks into a compressed case."
+ "Unexpected end of file {case_ofs} bytes and {n_chunks} compression chunks into a compressed case."
)]
- EofInCompressedCase {
- offset: u64,
- case_ofs: u64,
- n_chunks: usize,
- },
+ EofInCompressedCase { case_ofs: u64, n_chunks: usize },
- #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
- PartialCompressedCase { offset: u64, case_ofs: u64 },
+ #[error("Data ends {case_ofs} bytes into a compressed case.")]
+ PartialCompressedCase { case_ofs: u64 },
- #[error(
- "At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected."
- )]
- CompressedNumberExpected { offset: u64, case_ofs: u64 },
+ #[error("At {0} bytes into compressed case, a string was found where a number was expected.")]
+ CompressedNumberExpected(u64),
- #[error(
- "At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected."
- )]
- CompressedStringExpected { offset: u64, case_ofs: u64 },
+ #[error("At {0} bytes into compressed case, a number was found where a string was expected.")]
+ CompressedStringExpected(u64),
#[error("Impossible ztrailer_offset {0:#x}.")]
ImpossibleZTrailerOffset(u64),
WrongZlibTrailerBlockSize(u32),
#[error(
- "Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}."
+ "Block count {n_blocks} in ZLIB trailer differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}."
)]
BadZlibTrailerNBlocks {
- offset: u64,
n_blocks: u32,
expected_n_blocks: u64,
ztrailer_len: u64,
#[error("Missing value record with range not allowed for string variable")]
MissingValueStringRange,
- #[error("Missing value record at offset {0:#x} not allowed for long string continuation")]
- MissingValueContinuation(u64),
+ #[error("Missing value not allowed for long string continuation")]
+ MissingValueContinuation,
#[error("Invalid multiple dichotomy label type")]
InvalidMultipleDichotomyLabelType,
999 => Ok(Some(Record::EndOfHeaders(
endian.parse(read_bytes(reader)?),
))),
- _ => Err(Error::BadRecordType {
- offset: reader.stream_position()?,
- rec_type,
- }),
+ _ => Err(Error::new(
+ {
+ let offset = reader.stream_position()?;
+ Some(offset - 4..offset)
+ },
+ ErrorDetails::BadRecordType(rec_type),
+ )),
}
}
match get_encoding(encoding, character_code) {
Ok(encoding) => Ok(encoding),
- Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
+ Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, ErrorDetails::EncodingError(err))),
Err(err) => {
warn(Warning::EncodingError(err));
// Warn that we're using the default encoding.
}
impl TryFrom<[u8; 4]> for Magic {
- type Error = Error;
+ type Error = ErrorDetails;
fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
match value {
Magic::SAV => Ok(Magic::Sav),
Magic::ZSAV => Ok(Magic::Zsav),
Magic::EBCDIC => Ok(Magic::Ebcdic),
- _ => Err(Error::BadMagic(value)),
+ _ => Err(ErrorDetails::BadMagic(value)),
}
}
}
if offset == case_start {
Ok(None)
} else {
- Err(Error::EofInCase {
- offset,
- case_ofs: offset - case_start,
- case_len: case_vars.iter().map(CaseVar::bytes).sum(),
- })
+ Err(Error::new(
+ Some(case_start..offset),
+ ErrorDetails::EofInCase {
+ case_ofs: offset - case_start,
+ case_len: case_vars.iter().map(CaseVar::bytes).sum(),
+ },
+ ))
}
}
) -> Result<Option<Case>, Error> {
let offset = reader.stream_position()?;
if n_chunks > 0 {
- Err(Error::EofInCompressedCase {
- case_ofs: offset - case_start,
- n_chunks,
- offset,
- })
+ Err(Error::new(
+ Some(case_start..offset),
+ ErrorDetails::EofInCompressedCase {
+ case_ofs: offset - case_start,
+ n_chunks,
+ },
+ ))
} else {
Ok(None)
}
if let Some(expected_cases) = self.expected_cases
&& expected_cases != self.read_cases
{
- return Some(Err(Error::WrongNumberOfCases {
- expected: expected_cases,
- actual: self.read_cases,
- }));
+ return Some(Err(Error::new(
+ None,
+ ErrorDetails::WrongNumberOfCases {
+ expected: expected_cases,
+ actual: self.read_cases,
+ },
+ )));
} else {
return None;
}
borrow::Cow,
collections::BTreeMap,
fmt::{Debug, Formatter},
- io::{Read, Seek, SeekFrom},
+ io::{Cursor, ErrorKind, Read, Seek, SeekFrom},
ops::Range,
str::from_utf8,
};
endian::{Endian, Parse},
identifier::{Error as IdError, Identifier},
sys::raw::{
- read_bytes, read_string, read_vec, DecodedRecord, Decoder, Error, Magic, RawDatum,
- RawStrArray, RawWidth, Record, VarTypes, Warning,
+ read_bytes, read_string, read_vec, DecodedRecord, Decoder, Error, ErrorDetails, Magic,
+ RawDatum, RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning,
},
};
+use binrw::BinRead;
+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Compression {
Simple,
where
S: Debug,
{
- /// Offset in file.
- pub offsets: Range<u64>,
-
/// Magic number.
pub magic: Magic,
impl HeaderRecord<RawString> {
pub fn read<R: Read + Seek>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error> {
- let start = r.stream_position()?;
+ let header_bytes = read_vec(r, 176).map_err(|e| {
+ Error::new(
+ None,
+ if e.kind() == ErrorKind::UnexpectedEof {
+ ErrorDetails::NotASystemFile
+ } else {
+ e.into()
+ },
+ )
+ })?;
+ Self::read_inner(&header_bytes, warn).map_err(|details| Error::new(Some(0..176), details))
+ }
+
+ fn read_inner(
+ header_bytes: &[u8],
+ warn: &mut dyn FnMut(Warning),
+ ) -> Result<Self, ErrorDetails> {
+ #[derive(BinRead)]
+ struct RawHeader {
+ magic: [u8; 4],
+ eye_catcher: [u8; 60],
+ layout_code: u32,
+ nominal_case_size: u32,
+ compression_code: u32,
+ weight_index: u32,
+ n_cases: u32,
+ bias: f64,
+ creation_date: [u8; 9],
+ creation_time: [u8; 8],
+ file_label: [u8; 64],
+ _padding: [u8; 3],
+ }
+
+ let be_header = RawHeader::read_be(&mut Cursor::new(&header_bytes)).unwrap();
+ let le_header = RawHeader::read_le(&mut Cursor::new(&header_bytes)).unwrap();
- let magic: [u8; 4] = read_bytes(r)?;
- let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
+ let magic: Magic = be_header
+ .magic
+ .try_into()
+ .map_err(|_| ErrorDetails::NotASystemFile)?;
- let eye_catcher = RawString(read_vec(r, 60)?);
- let layout_code: [u8; 4] = read_bytes(r)?;
- let endian = Endian::identify_u32(2, layout_code)
- .or_else(|| Endian::identify_u32(2, layout_code))
- .ok_or(Error::NotASystemFile)?;
- let layout_code = endian.parse(layout_code);
+ let (endian, header) = if be_header.layout_code == 2 {
+ (Endian::Big, &be_header)
+ } else if le_header.layout_code == 2 {
+ (Endian::Little, &le_header)
+ } else {
+ return Err(ErrorDetails::NotASystemFile);
+ };
- let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
- let nominal_case_size = (1..i32::MAX as u32 / 16)
- .contains(&nominal_case_size)
- .then_some(nominal_case_size);
+ let nominal_case_size = (1..i32::MAX.cast_unsigned() / 16)
+ .contains(&header.nominal_case_size)
+ .then_some(header.nominal_case_size);
- let compression_code: u32 = endian.parse(read_bytes(r)?);
- let compression = match (magic, compression_code) {
+ let compression = match (magic, header.compression_code) {
(Magic::Zsav, 2) => Some(Compression::ZLib),
- (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
+ (Magic::Zsav, code) => return Err(ErrorDetails::InvalidZsavCompression(code)),
(_, 0) => None,
(_, 1) => Some(Compression::Simple),
- (_, code) => return Err(Error::InvalidSavCompression(code)),
+ (_, code) => return Err(ErrorDetails::InvalidSavCompression(code)),
};
- let weight_index: u32 = endian.parse(read_bytes(r)?);
- let weight_index = (weight_index > 0).then_some(weight_index);
+ let weight_index = (header.weight_index > 0).then_some(header.weight_index);
- let n_cases: u32 = endian.parse(read_bytes(r)?);
- let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+ let n_cases = (header.n_cases < i32::MAX as u32 / 2).then_some(header.n_cases);
- let bias: f64 = endian.parse(read_bytes(r)?);
- if bias != 100.0 && bias != 0.0 {
- warn(Warning::UnexpectedBias(bias));
+ if header.bias != 100.0 && header.bias != 0.0 {
+ warn(Warning::UnexpectedBias(header.bias));
}
- let creation_date = RawString(read_vec(r, 9)?);
- let creation_time = RawString(read_vec(r, 8)?);
- let file_label = RawString(read_vec(r, 64)?);
- let _: [u8; 3] = read_bytes(r)?;
+ let creation_date = RawString(header.creation_date.into());
+ let creation_time = RawString(header.creation_time.into());
+ let file_label = RawString(header.file_label.into());
Ok(HeaderRecord {
- offsets: start..r.stream_position()?,
magic,
- layout_code,
+ layout_code: header.layout_code,
nominal_case_size,
compression,
weight_index,
n_cases,
- bias,
+ bias: header.bias,
creation_date,
creation_time,
- eye_catcher,
+ eye_catcher: RawString(header.eye_catcher.into()),
file_label,
endian,
})
weight_index: self.weight_index,
n_cases: self.n_cases,
file_label,
- offsets: self.offsets.clone(),
magic: self.magic,
layout_code: self.layout_code,
nominal_case_size: self.nominal_case_size,
}
impl MissingValues {
- pub fn read<R: Read + Seek>(
+ pub fn read<R>(
r: &mut R,
- offset: u64,
+ offsets: Range<u64>,
raw_width: RawWidth,
code: i32,
endian: Endian,
warn: &mut dyn FnMut(Warning),
- ) -> Result<Self, Error> {
+ ) -> Result<Self, Error>
+ where
+ R: Read + Seek,
+ {
let (individual_values, has_range) = match code {
0 => return Ok(Self::default()),
1..=3 => (code as usize, false),
-2 => (0, true),
-3 => (1, true),
- _ => return Err(Error::BadMissingValueCode { offset, code }),
+ _ => {
+ return Err(Error::new(
+ Some(offsets),
+ ErrorDetails::BadMissingValueCode(code),
+ ))
+ }
};
+ Self::read_inner(r, raw_width, individual_values, has_range, endian, warn).map_err(
+ |details| {
+ Error::new(
+ {
+ let n = individual_values + if has_range { 2 } else { 0 };
+ Some(offsets.start..offsets.end + 8 * n as u64)
+ },
+ details,
+ )
+ },
+ )
+ }
+
+ fn read_inner<R>(
+ r: &mut R,
+ raw_width: RawWidth,
+ individual_values: usize,
+ has_range: bool,
+ endian: Endian,
+ warn: &mut dyn FnMut(Warning),
+ ) -> Result<Self, ErrorDetails>
+ where
+ R: Read + Seek,
+ {
let mut values = Vec::with_capacity(individual_values);
let range = if has_range {
let low = read_bytes::<8, _>(r)?;
.collect();
return Ok(Self::new(values, None).unwrap());
}
- Err(()) => warn(Warning::MissingValueContinuation(offset)),
+ Err(()) => warn(Warning::MissingValueContinuation),
}
Ok(Self::default())
}
endian: Endian,
warn: &mut dyn FnMut(Warning),
) -> Result<Record, Error> {
+ #[derive(BinRead)]
+ struct RawVariableRecord {
+ width: i32,
+ has_variable_label: u32,
+ missing_value_code: i32,
+ print_format: u32,
+ write_format: u32,
+ name: [u8; 8],
+ }
+
let start_offset = r.stream_position()?;
- let width: i32 = endian.parse(read_bytes(r)?);
- let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
- start_offset,
- width,
+ let offsets = start_offset..start_offset + 28;
+ let raw_record =
+ read_vec(r, 28).map_err(|e| Error::new(Some(offsets.clone()), e.into()))?;
+ let raw_record =
+ RawVariableRecord::read_options(&mut Cursor::new(&raw_record), endian, ()).unwrap();
+
+ let width: RawWidth = raw_record.width.try_into().map_err(|_| {
+ Error::new(
+ Some(offsets.clone()),
+ ErrorDetails::BadVariableWidth(raw_record.width),
+ )
})?;
- let code_offset = r.stream_position()?;
- let has_variable_label: u32 = endian.parse(read_bytes(r)?);
- let missing_value_code: i32 = endian.parse(read_bytes(r)?);
- let print_format = RawFormat(endian.parse(read_bytes(r)?));
- let write_format = RawFormat(endian.parse(read_bytes(r)?));
- let name = RawString(read_vec(r, 8)?);
-
- let label = match has_variable_label {
+
+ let label = match raw_record.has_variable_label {
0 => None,
1 => {
let len: u32 = endian.parse(read_bytes(r)?);
Some(label)
}
_ => {
- return Err(Error::BadVariableLabelCode {
- start_offset,
- code_offset,
- code: has_variable_label,
- });
+ return Err(Error::new(
+ Some(offsets),
+ ErrorDetails::BadVariableLabelCode(raw_record.has_variable_label),
+ ));
}
};
- let missing_values =
- MissingValues::read(r, start_offset, width, missing_value_code, endian, warn)?;
+ let missing_values = MissingValues::read(
+ r,
+ offsets,
+ width,
+ raw_record.missing_value_code,
+ endian,
+ warn,
+ )?;
let end_offset = r.stream_position()?;
Ok(Record::Variable(VariableRecord {
offsets: start_offset..end_offset,
width,
- name,
- print_format,
- write_format,
+ name: RawString(raw_record.name.into()),
+ print_format: RawFormat(raw_record.print_format),
+ write_format: RawFormat(raw_record.write_format),
missing_values,
label,
}))
let label_offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
if n > Self::MAX_LABELS {
- return Err(Error::BadNumberOfValueLabels {
- offset: label_offset,
- n,
- max: Self::MAX_LABELS,
- });
+ return Err(Error::new(
+ Some(label_offset..label_offset + 4),
+ ErrorDetails::BadNumberOfValueLabels {
+ n,
+ max: Self::MAX_LABELS,
+ },
+ ));
}
let mut labels = Vec::new();
for _ in 0..n {
- let value = super::UntypedDatum(read_bytes(r)?);
+ let value = UntypedDatum(read_bytes(r)?);
let label_len: u8 = endian.parse(read_bytes(r)?);
let label_len = label_len as usize;
let padded_len = (label_len + 1).next_multiple_of(8);
let index_offset = r.stream_position()?;
let rec_type: u32 = endian.parse(read_bytes(r)?);
if rec_type != 4 {
- return Err(Error::ExpectedVarIndexRecord {
- offset: index_offset,
- rec_type,
- });
+ return Err(Error::new(
+ Some(index_offset..index_offset + 4),
+ ErrorDetails::ExpectedVarIndexRecord(rec_type),
+ ));
}
let n: u32 = endian.parse(read_bytes(r)?);
if n > Self::MAX_INDEXES {
- return Err(Error::TooManyVarIndexes {
- offset: index_offset,
- n,
- max: Self::MAX_INDEXES,
- });
+ return Err(Error::new(
+ Some(index_offset + 4..index_offset + 8),
+ ErrorDetails::TooManyVarIndexes {
+ n,
+ max: Self::MAX_INDEXES,
+ },
+ ));
} else if n == 0 {
- dbg!();
warn(Warning::NoVarIndexes {
offset: index_offset,
});
let n: u32 = endian.parse(read_bytes(r)?);
let n = n as usize;
if n > Self::MAX_LINES {
- Err(Error::BadDocumentLength {
- offset: start_offset,
- n,
- max: Self::MAX_LINES,
- })
+ Err(Error::new(
+ Some(start_offset..start_offset + 4),
+ ErrorDetails::BadDocumentLength {
+ n,
+ max: Self::MAX_LINES,
+ },
+ ))
} else {
+ let offsets = start_offset..start_offset.saturating_add((n * DOC_LINE_LEN) as u64);
let mut lines = Vec::with_capacity(n);
for _ in 0..n {
- lines.push(RawStrArray(read_bytes(r)?));
+ lines.push(RawStrArray(
+ read_bytes(r).map_err(|e| Error::new(Some(offsets.clone()), e.into()))?,
+ ));
}
- let end_offset = r.stream_position()?;
- Ok(Record::Document(DocumentRecord {
- offsets: start_offset..end_offset,
- lines,
- }))
+ Ok(Record::Document(DocumentRecord { offsets, lines }))
}
}
let mut missing_value_set = Vec::new();
while !input.is_empty() {
let var_name = read_string(&mut input, endian)?;
- dbg!(&var_name);
let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
let value_len: u32 = endian.parse(read_bytes(&mut input)?);
if value_len != 8 {
offset,
value_len,
});
- read_vec(
- &mut input,
- dbg!(value_len as usize * n_missing_values as usize),
- )?;
+ read_vec(&mut input, value_len as usize * n_missing_values as usize)?;
continue;
}
let mut missing_values = Vec::new();
impl LongName {
fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
let Some((short_name, long_name)) = input.split_once('=') else {
- return Err(dbg!(Warning::LongNameMissingEquals));
+ return Err(Warning::LongNameMissingEquals);
};
let short_name = decoder
.new_identifier(short_name)
let size: u32 = endian.parse(read_bytes(r)?);
let count = endian.parse(read_bytes(r)?);
let Some(product) = size.checked_mul(count) else {
- return Err(Error::ExtensionRecordTooLarge {
- offset: header_offset,
- subtype,
- size,
- count,
- });
+ return Err(Error::new(
+ Some(header_offset..header_offset + 8),
+ ErrorDetails::ExtensionRecordTooLarge {
+ subtype,
+ size,
+ count,
+ },
+ ));
};
let start_offset = r.stream_position()?;
let data = read_vec(r, product as usize)?;
let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
if zheader_offset != offset {
- return Err(Error::UnexpectedZHeaderOffset {
+ Err(ErrorDetails::UnexpectedZHeaderOffset {
actual: zheader_offset,
expected: offset,
- });
- }
-
- if ztrailer_offset < offset {
- return Err(Error::ImpossibleZTrailerOffset(ztrailer_offset));
- }
-
- if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
- return Err(Error::InvalidZTrailerLength(ztrailer_len));
+ })
+ } else if ztrailer_offset < offset {
+ Err(ErrorDetails::ImpossibleZTrailerOffset(ztrailer_offset))
+ } else if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
+ Err(ErrorDetails::InvalidZTrailerLength(ztrailer_len))
+ } else {
+ Ok(ZHeader {
+ offset,
+ zheader_offset,
+ ztrailer_offset,
+ ztrailer_len,
+ })
}
-
- Ok(ZHeader {
- offset,
- zheader_offset,
- ztrailer_offset,
- ztrailer_len,
- })
+ .map_err(|details| Error::new(Some(offset..offset + 12), details))
}
}
compressed_size: endian.parse(read_bytes(r)?),
})
}
+
+ /// Returns true if the uncompressed and compressed sizes are plausible.
+ ///
+ /// [zlib Technical Details] says that the maximum expansion from
+ /// compression, with worst-case parameters, is 13.5% plus 11 bytes. This
+ /// code checks for an expansion of more than 14.3% plus 11 bytes.
+ ///
+ /// [zlib Technical Details]: http://www.zlib.net/zlib_tech.html
+ fn has_plausible_sizes(&self) -> bool {
+ self.uncompressed_size
+ .checked_add(self.uncompressed_size / 7 + 11)
+ .is_some_and(|max| self.compressed_size <= max)
+ }
}
impl ZTrailer {
return Ok(None);
}
let int_bias = endian.parse(read_bytes(reader)?);
- if int_bias as f64 != -bias {
- return Err(Error::WrongZlibTrailerBias {
- actual: int_bias,
- expected: -bias,
- });
- }
let zero = endian.parse(read_bytes(reader)?);
- if zero != 0 {
- return Err(Error::WrongZlibTrailerZero(zero));
- }
let block_size = endian.parse(read_bytes(reader)?);
- if block_size != 0x3ff000 {
- return Err(Error::WrongZlibTrailerBlockSize(block_size));
- }
let n_blocks: u32 = endian.parse(read_bytes(reader)?);
- let expected_n_blocks = (zheader.ztrailer_len - 24) / 24;
- if n_blocks as u64 != expected_n_blocks {
- return Err(Error::BadZlibTrailerNBlocks {
- offset: zheader.ztrailer_offset,
+ if int_bias as f64 != -bias {
+ Err(ErrorDetails::WrongZlibTrailerBias {
+ actual: int_bias,
+ expected: -bias,
+ })
+ } else if zero != 0 {
+ Err(ErrorDetails::WrongZlibTrailerZero(zero))
+ } else if block_size != 0x3ff000 {
+ Err(ErrorDetails::WrongZlibTrailerBlockSize(block_size))
+ } else if let expected_n_blocks = (zheader.ztrailer_len - 24) / 24
+ && n_blocks as u64 != expected_n_blocks
+ {
+ Err(ErrorDetails::BadZlibTrailerNBlocks {
n_blocks,
expected_n_blocks,
ztrailer_len: zheader.ztrailer_len,
- });
+ })
+ } else {
+ Ok(())
}
+ .map_err(|details| Error::new(Some(start_offset..start_offset + 24), details))?;
+
let blocks = (0..n_blocks)
.map(|_| ZBlock::read(reader, endian))
.collect::<Result<Vec<_>, _>>()?;
let mut expected_cmp_ofs = zheader.zheader_offset + 24;
for (index, block) in blocks.iter().enumerate() {
if block.uncompressed_ofs != expected_uncmp_ofs {
- return Err(Error::ZlibTrailerBlockWrongUncmpOfs {
+ Err(ErrorDetails::ZlibTrailerBlockWrongUncmpOfs {
index,
actual: block.uncompressed_ofs,
expected: expected_cmp_ofs,
- });
- }
- if block.compressed_ofs != expected_cmp_ofs {
- return Err(Error::ZlibTrailerBlockWrongCmpOfs {
+ })
+ } else if block.compressed_ofs != expected_cmp_ofs {
+ Err(ErrorDetails::ZlibTrailerBlockWrongCmpOfs {
index,
actual: block.compressed_ofs,
expected: expected_cmp_ofs,
- });
+ })
+ } else if !block.has_plausible_sizes() {
+ Err(ErrorDetails::ZlibExpansion {
+ index,
+ compressed_size: block.compressed_size,
+ uncompressed_size: block.uncompressed_size,
+ })
+ } else {
+ Ok(())
}
+ .map_err(|details| {
+ Error::new(
+ {
+ let block_start = start_offset + 24 + 24 * index as u64;
+ Some(block_start..block_start + 24)
+ },
+ details,
+ )
+ })?;
+
if index < blocks.len() - 1 {
if block.uncompressed_size != block_size {
warn(Warning::ZlibTrailerBlockWrongSize {
});
}
}
- // http://www.zlib.net/zlib_tech.html says that the maximum
- // expansion from compression, with worst-case parameters, is 13.5%
- // plus 11 bytes. This code checks for an expansion of more than
- // 14.3% plus 11 bytes.
- if block.compressed_size > block.uncompressed_size + block.uncompressed_size / 7 + 11 {
- return Err(Error::ZlibExpansion {
- index,
- compressed_size: block.compressed_size,
- uncompressed_size: block.uncompressed_size,
- });
- }
expected_cmp_ofs += block.compressed_size as u64;
expected_uncmp_ofs += block.uncompressed_size as u64;
}
if expected_cmp_ofs != zheader.ztrailer_offset {
- return Err(Error::ZlibTrailerOffsetInconsistency {
- descriptors: expected_cmp_ofs,
- zheader: zheader.ztrailer_offset,
- });
+ return Err(Error::new(
+ Some(start_offset..start_offset + 24 + 24 * n_blocks as u64),
+ ErrorDetails::ZlibTrailerOffsetInconsistency {
+ descriptors: expected_cmp_ofs,
+ zheader: zheader.ztrailer_offset,
+ },
+ ));
}
reader.seek(SeekFrom::Start(start_offset))?;