From: Ben Pfaff Date: Mon, 31 Jul 2023 16:50:46 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2f5387c960ae55adfd09d698566430947c28615e;p=pspp work --- diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 8bf72fe02c..6dc13b586a 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,18 +1,11 @@ -use endian::{Endian, Parse, ToBytes}; -use flate2::read::ZlibDecoder; -use num::Integer; -use num_derive::FromPrimitive; -use std::{ - collections::VecDeque, - io::{Error as IoError, Read, Seek, SeekFrom}, - iter::FusedIterator, -}; -use thiserror::Error; +use std::io::Error as IoError; +use thiserror::Error as ThisError; pub mod endian; +pub mod raw; pub mod sack; -#[derive(Error, Debug)] +#[derive(ThisError, Debug)] pub enum Error { #[error("Not an SPSS system file")] NotASystemFile, @@ -32,9 +25,6 @@ pub enum Error { #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] BadVariableWidth { offset: u64, width: i32 }, - #[error("Misplaced type 4 record near offset {0:#x}.")] - MisplacedType4Record(u64), - #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")] BadDocumentLength { offset: u64, n: u32, max: u32 }, @@ -55,9 +45,6 @@ pub enum Error { #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")] BadNumberOfValueLabels { offset: u64, n: u32, max: u32 }, - #[error("At offset {offset:#x}, variable index record (type 4) does not immediately follow value label record (type 3) as it should.")] - MissingVariableIndexRecord { offset: u64 }, - #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")] BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 }, @@ -69,15 +56,6 @@ pub enum Error { count: u32, }, - #[error("Wrong ZLIB data header offset {zheader_offset:#x} (expected {offset:#x}).")] - BadZlibHeaderOffset { offset: u64, zheader_offset: u64 }, - - #[error("At offset {offset:#x}, impossible ZLIB trailer offset {ztrailer_offset:#x}.")] - BadZlibTrailerOffset { offset: u64, ztrailer_offset: u64 }, - - #[error("At offset {offset:#x}, impossible ZLIB trailer length {ztrailer_len}.")] - BadZlibTrailerLen { offset: u64, ztrailer_len: u64 }, - #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")] EofInCase { offset: u64, @@ -107,918 +85,3 @@ pub enum Error { ztrailer_len: u64, }, } - -#[derive(Error, Debug)] -pub enum Warning { - #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")] - UnexpectedBias(f64), - - #[error("Duplicate type 6 (document) record.")] - DuplicateDocumentRecord, -} - -#[derive(Copy, Clone, Debug)] -pub enum Compression { - Simple, - ZLib, -} - -pub enum Record { - Header(Header), - Document(Document), - Variable(Variable), - ValueLabel(ValueLabel), - VarIndexes(VarIndexes), - Extension(Extension), - EndOfHeaders, - ZHeader(ZHeader), - ZTrailer(ZTrailer), - Case(Vec), -} - -pub struct Header { - /// Magic number. - pub magic: Magic, - - /// Eye-catcher string, product name, in the file's encoding. Padded - /// on the right with spaces. - pub eye_catcher: [u8; 60], - - /// Layout code, normally either 2 or 3. - pub layout_code: u32, - - /// Number of variable positions, or `None` if the value in the file is - /// questionably trustworthy. - pub nominal_case_size: Option, - - /// Compression type, if any, - pub compression: Option, - - /// 0-based variable index of the weight variable, or `None` if the file is - /// unweighted. - pub weight_index: Option, - - /// Claimed number of cases, if known. - pub n_cases: Option, - - /// Compression bias, usually 100.0. - pub bias: f64, - - /// `dd mmm yy` in the file's encoding. - pub creation_date: [u8; 9], - - /// `HH:MM:SS` in the file's encoding. - pub creation_time: [u8; 8], - - /// File label, in the file's encoding. Padded on the right with spaces. - pub file_label: [u8; 64], - - /// Endianness of the data in the file header. - pub endian: Endian, -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct Magic([u8; 4]); - -impl Magic { - /// Magic number for a regular system file. - pub const SAV: Magic = Magic(*b"$FL2"); - - /// Magic number for a system file that contains zlib-compressed data. - pub const ZSAV: Magic = Magic(*b"$FL3"); - - /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded - /// in EBCDIC. - pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]); -} - -impl TryFrom<[u8; 4]> for Magic { - type Error = Error; - - fn try_from(value: [u8; 4]) -> Result { - let magic = Magic(value); - match magic { - Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic), - _ => Err(Error::BadMagic(value)), - } - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub enum VarType { - Number, - String, -} - -impl VarType { - fn from_width(width: i32) -> VarType { - match width { - 0 => VarType::Number, - _ => VarType::String, - } - } -} - -trait State { - #[allow(clippy::type_complexity)] - fn read(self: Box) -> Result)>, Error>; -} - -struct Start { - reader: R, -} - -struct CommonState { - reader: R, - endian: Endian, - bias: f64, - compression: Option, - var_types: Vec, -} - -impl State for Start { - fn read(mut self: Box) -> Result)>, Error> { - let header = read_header(&mut self.reader)?; - let next_state = Headers(CommonState { - reader: self.reader, - endian: header.endian, - bias: header.bias, - compression: header.compression, - var_types: Vec::new(), - }); - Ok(Some((Record::Header(header), Box::new(next_state)))) - } -} - -struct Headers(CommonState); - -impl State for Headers { - fn read(mut self: Box) -> Result)>, Error> { - let endian = self.0.endian; - let rec_type: u32 = endian.parse(read_bytes(&mut self.0.reader)?); - let record = match rec_type { - 2 => { - let variable = read_variable_record(&mut self.0.reader, endian)?; - self.0.var_types.push(VarType::from_width(variable.width)); - Record::Variable(variable) - } - 3 => Record::ValueLabel(read_value_label_record(&mut self.0.reader, endian)?), - 4 => Record::VarIndexes(read_var_indexes_record(&mut self.0.reader, endian)?), - 6 => Record::Document(read_document_record(&mut self.0.reader, endian)?), - 7 => Record::Extension(read_extension_record(&mut self.0.reader, endian)?), - 999 => { - let _: [u8; 4] = read_bytes(&mut self.0.reader)?; - let next_state: Box = match self.0.compression { - None => Box::new(Data(self.0)), - Some(Compression::Simple) => Box::new(CompressedData::new(self.0)), - Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)), - }; - return Ok(Some((Record::EndOfHeaders, next_state))); - } - _ => { - return Err(Error::BadRecordType { - offset: self.0.reader.stream_position()?, - rec_type, - }) - } - }; - Ok(Some((record, self))) - } -} - -struct ZlibHeader(CommonState); - -impl State for ZlibHeader { - fn read(mut self: Box) -> Result)>, Error> { - let zheader = read_zheader(&mut self.0.reader, self.0.endian)?; - Ok(Some((Record::ZHeader(zheader), self))) - } -} - -struct ZlibTrailer(CommonState, ZHeader); - -impl State for ZlibTrailer { - fn read(mut self: Box) -> Result)>, Error> { - let retval = read_ztrailer(&mut self.0.reader, self.0.endian, self.1.ztrailer_offset, self.1.ztrailer_len)?; - let next_state = Box::new(CompressedData::new(CommonState { - reader: ZlibDecodeMultiple::new(self.0.reader), - endian: self.0.endian, - bias: self.0.bias, - compression: self.0.compression, - var_types: self.0.var_types - })); - match retval { - None => next_state.read(), - Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))) - } - } -} - -struct Data(CommonState); - -impl State for Data { - fn read(mut self: Box) -> Result)>, Error> { - let case_start = self.0.reader.stream_position()?; - let mut values = Vec::with_capacity(self.0.var_types.len()); - for (i, &var_type) in self.0.var_types.iter().enumerate() { - let Some(raw) = try_read_bytes(&mut self.0.reader)? else { - if i == 0 { - return Ok(None); - } else { - let offset = self.0.reader.stream_position()?; - return Err(Error::EofInCase { - offset, - case_ofs: offset - case_start, - case_len: self.0.var_types.len() * 8, - }); - } - }; - values.push(Value::from_raw(var_type, raw, self.0.endian)); - } - Ok(Some((Record::Case(values), self))) - } -} - -struct CompressedData { - common: CommonState, - codes: VecDeque, -} - -impl CompressedData { - fn new(common: CommonState) -> CompressedData { - CompressedData { common, codes: VecDeque::new() } - } -} - -impl State for CompressedData { - fn read(mut self: Box) -> Result)>, Error> { - let case_start = self.common.reader.stream_position()?; - let mut values = Vec::with_capacity(self.common.var_types.len()); - for (i, &var_type) in self.common.var_types.iter().enumerate() { - let value = loop { - let Some(code) = self.codes.pop_front() else { - let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.common.reader)? - else { - if i == 0 { - return Ok(None); - } else { - let offset = self.common.reader.stream_position()?; - return Err(Error::EofInCompressedCase { - offset, - case_ofs: offset - case_start, - }); - } - }; - self.codes.extend(new_codes.into_iter()); - continue; - }; - match code { - 0 => (), - 1..=251 => match var_type { - VarType::Number => break Value::Number(Some(code as f64 - self.common.bias)), - VarType::String => { - break Value::String(self.common.endian.to_bytes(code as f64 - self.common.bias)) - } - }, - 252 => { - if i == 0 { - return Ok(None); - } else { - let offset = self.common.reader.stream_position()?; - return Err(Error::PartialCompressedCase { - offset, - case_ofs: offset - case_start, - }); - } - } - 253 => { - break Value::from_raw( - var_type, - read_bytes(&mut self.common.reader)?, - self.common.endian, - ) - } - 254 => match var_type { - VarType::String => break Value::String(*b" "), // XXX EBCDIC - VarType::Number => { - return Err(Error::CompressedStringExpected { - offset: case_start, - case_ofs: self.common.reader.stream_position()? - case_start, - }) - } - }, - 255 => match var_type { - VarType::Number => break Value::Number(None), - VarType::String => { - return Err(Error::CompressedNumberExpected { - offset: case_start, - case_ofs: self.common.reader.stream_position()? - case_start, - }) - } - }, - } - }; - values.push(value); - } - Ok(Some((Record::Case(values), self))) - } -} - -struct ZlibDecodeMultiple -where - R: Read + Seek, -{ - reader: Option>, -} - -impl ZlibDecodeMultiple -where - R: Read + Seek, -{ - fn new(reader: R) -> ZlibDecodeMultiple { - ZlibDecodeMultiple { - reader: Some(ZlibDecoder::new(reader)), - } - } -} - -impl Read for ZlibDecodeMultiple -where - R: Read + Seek, -{ - fn read(&mut self, buf: &mut [u8]) -> Result { - loop { - match self.reader.as_mut().unwrap().read(buf)? { - 0 => { - let inner = self.reader.take().unwrap().into_inner(); - self.reader = Some(ZlibDecoder::new(inner)); - } - n => return Ok(n), - }; - } - } -} - -impl Seek for ZlibDecodeMultiple -where - R: Read + Seek, -{ - fn seek(&mut self, pos: SeekFrom) -> Result { - self.reader.as_mut().unwrap().get_mut().seek(pos) - } -} - -#[derive(Copy, Clone)] -pub enum Value { - Number(Option), - String([u8; 8]), -} - -impl Value { - pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value { - match var_type { - VarType::String => Value::String(raw), - VarType::Number => { - let number: f64 = endian.parse(raw); - Value::Number((number != -f64::MAX).then_some(number)) - } - } - } -} - -pub struct Reader { - state: Option>, -} - -impl Reader { - pub fn new(reader: R) -> Result { - Ok(Reader { - state: Some(Box::new(Start { reader })), - }) - } -} - -impl Iterator for Reader { - type Item = Result; - - fn next(&mut self) -> Option { - match self.state.take()?.read() { - Ok(Some((record, next_state))) => { - self.state = Some(next_state); - Some(Ok(record)) - } - Ok(None) => None, - Err(error) => Some(Err(error)), - } - } -} - -impl FusedIterator for Reader {} - -fn read_header(r: &mut R) -> Result { - let magic: [u8; 4] = read_bytes(r)?; - let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; - - let eye_catcher: [u8; 60] = read_bytes(r)?; - let layout_code: [u8; 4] = read_bytes(r)?; - let endian = Endian::identify_u32(2, layout_code) - .or_else(|| Endian::identify_u32(2, layout_code)) - .ok_or_else(|| Error::NotASystemFile)?; - let layout_code = endian.parse(layout_code); - - let nominal_case_size: u32 = endian.parse(read_bytes(r)?); - let nominal_case_size = - (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size); - - let compression_code: u32 = endian.parse(read_bytes(r)?); - let compression = match (magic, compression_code) { - (Magic::ZSAV, 2) => Some(Compression::ZLib), - (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)), - (_, 0) => None, - (_, 1) => Some(Compression::Simple), - (_, code) => return Err(Error::InvalidSavCompression(code)), - }; - - let weight_index: u32 = endian.parse(read_bytes(r)?); - let weight_index = (weight_index > 0).then_some(weight_index - 1); - - let n_cases: u32 = endian.parse(read_bytes(r)?); - let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases); - - let bias: f64 = endian.parse(read_bytes(r)?); - - let creation_date: [u8; 9] = read_bytes(r)?; - let creation_time: [u8; 8] = read_bytes(r)?; - let file_label: [u8; 64] = read_bytes(r)?; - let _: [u8; 3] = read_bytes(r)?; - - Ok(Header { - magic, - layout_code, - nominal_case_size, - compression, - weight_index, - n_cases, - bias, - creation_date, - creation_time, - eye_catcher, - file_label, - endian, - }) -} - -pub struct Variable { - /// Offset from the start of the file to the start of the record. - pub offset: u64, - - /// Variable width, in the range -1..=255. - pub width: i32, - - /// Variable name, padded on the right with spaces. - pub name: [u8; 8], - - /// Print format. - pub print_format: u32, - - /// Write format. - pub write_format: u32, - - /// Missing value code, one of -3, -2, 0, 1, 2, or 3. - pub missing_value_code: i32, - - /// Raw missing values, up to 3 of them. - pub missing: Vec<[u8; 8]>, - - /// Optional variable label. - pub label: Option>, -} - -fn read_variable_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let width: i32 = endian.parse(read_bytes(r)?); - let has_variable_label: u32 = endian.parse(read_bytes(r)?); - let missing_value_code: i32 = endian.parse(read_bytes(r)?); - let print_format: u32 = endian.parse(read_bytes(r)?); - let write_format: u32 = endian.parse(read_bytes(r)?); - let name: [u8; 8] = read_bytes(r)?; - - let label = match has_variable_label { - 0 => None, - 1 => { - let len: u32 = endian.parse(read_bytes(r)?); - let read_len = len.min(65535) as usize; - let label = Some(read_vec(r, read_len)?); - - let padding_bytes = Integer::next_multiple_of(&len, &4) - len; - let _ = read_vec(r, padding_bytes as usize)?; - - label - } - _ => { - return Err(Error::BadVariableLabelCode { - offset, - code: has_variable_label, - }) - } - }; - - let mut missing = Vec::new(); - if missing_value_code != 0 { - match (width, missing_value_code) { - (0, -3 | -2 | 1 | 2 | 3) => (), - (0, _) => { - return Err(Error::BadNumericMissingValueCode { - offset, - code: missing_value_code, - }) - } - (_, 0..=3) => (), - (_, _) => { - return Err(Error::BadStringMissingValueCode { - offset, - code: missing_value_code, - }) - } - } - - for _ in 0..missing_value_code.abs() { - missing.push(read_bytes(r)?); - } - } - - Ok(Variable { - offset, - width, - name, - print_format, - write_format, - missing_value_code, - missing, - label, - }) -} - -pub struct ValueLabel { - /// Offset from the start of the file to the start of the record. - pub offset: u64, - - /// The labels. - pub labels: Vec<([u8; 8], Vec)>, -} - -impl ValueLabel { - /// Maximum number of value labels in a record. - pub const MAX: u32 = u32::MAX / 8; -} - -fn read_value_label_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let n: u32 = endian.parse(read_bytes(r)?); - if n > ValueLabel::MAX { - return Err(Error::BadNumberOfValueLabels { - offset, - n, - max: ValueLabel::MAX, - }); - } - - let mut labels = Vec::new(); - for _ in 0..n { - let value: [u8; 8] = read_bytes(r)?; - let label_len: u8 = endian.parse(read_bytes(r)?); - let label_len = label_len as usize; - let padded_len = Integer::next_multiple_of(&(label_len + 1), &8); - - let mut label = read_vec(r, padded_len)?; - label.truncate(label_len); - labels.push((value, label)); - } - Ok(ValueLabel { offset, labels }) -} - -pub struct VarIndexes { - /// Offset from the start of the file to the start of the record. - pub offset: u64, - - /// The 0-based indexes of the variable indexes. - pub var_indexes: Vec, -} - -impl VarIndexes { - /// Maximum number of variable indexes in a record. - pub const MAX: u32 = u32::MAX / 8; -} - -fn read_var_indexes_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let n: u32 = endian.parse(read_bytes(r)?); - if n > VarIndexes::MAX { - return Err(Error::BadNumberOfVarIndexes { - offset, - n, - max: VarIndexes::MAX, - }); - } - let mut var_indexes = Vec::with_capacity(n as usize); - for _ in 0..n { - var_indexes.push(endian.parse(read_bytes(r)?)); - } - - Ok(VarIndexes { - offset, - var_indexes, - }) -} - -pub const DOC_LINE_LEN: u32 = 80; -pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN; - -pub struct Document { - /// Offset from the start of the file to the start of the record. - pub pos: u64, - - /// The document, as an array of 80-byte lines. - pub lines: Vec<[u8; DOC_LINE_LEN as usize]>, -} - -fn read_document_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let n: u32 = endian.parse(read_bytes(r)?); - match n { - 0..=DOC_MAX_LINES => { - let pos = r.stream_position()?; - let mut lines = Vec::with_capacity(n as usize); - for _ in 0..n { - let line: [u8; 80] = read_bytes(r)?; - lines.push(line); - } - Ok(Document { pos, lines }) - } - _ => Err(Error::BadDocumentLength { - offset, - n, - max: DOC_MAX_LINES, - }), - } -} - -#[derive(FromPrimitive)] -enum ExtensionType { - /// Machine integer info. - Integer = 3, - /// Machine floating-point info. - Float = 4, - /// Variable sets. - VarSets = 5, - /// DATE. - Date = 6, - /// Multiple response sets. - Mrsets = 7, - /// SPSS Data Entry. - DataEntry = 8, - /// Extra product info text. - ProductInfo = 10, - /// Variable display parameters. - Display = 11, - /// Long variable names. - LongNames = 13, - /// Long strings. - LongStrings = 14, - /// Extended number of cases. - Ncases = 16, - /// Data file attributes. - FileAttrs = 17, - /// Variable attributes. - VarAttrs = 18, - /// Multiple response sets (extended). - Mrsets2 = 19, - /// Character encoding. - Encoding = 20, - /// Value labels for long strings. - LongLabels = 21, - /// Missing values for long strings. - LongMissing = 22, - /// "Format properties in dataview table". - Dataview = 24, -} - -pub struct Extension { - /// Offset from the start of the file to the start of the record. - pub offset: u64, - - /// Record subtype. - pub subtype: u32, - - /// Size of each data element. - pub size: u32, - - /// Number of data elements. - pub count: u32, - - /// `size * count` bytes of data. - pub data: Vec, -} - -fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) { - match extension { - /* Implemented record types. */ - ExtensionType::Integer => (4, 8), - ExtensionType::Float => (8, 3), - ExtensionType::VarSets => (1, 0), - ExtensionType::Mrsets => (1, 0), - ExtensionType::ProductInfo => (1, 0), - ExtensionType::Display => (4, 0), - ExtensionType::LongNames => (1, 0), - ExtensionType::LongStrings => (1, 0), - ExtensionType::Ncases => (8, 2), - ExtensionType::FileAttrs => (1, 0), - ExtensionType::VarAttrs => (1, 0), - ExtensionType::Mrsets2 => (1, 0), - ExtensionType::Encoding => (1, 0), - ExtensionType::LongLabels => (1, 0), - ExtensionType::LongMissing => (1, 0), - - /* Ignored record types. */ - ExtensionType::Date => (0, 0), - ExtensionType::DataEntry => (0, 0), - ExtensionType::Dataview => (0, 0), - } -} - -fn read_extension_record(r: &mut R, endian: Endian) -> Result { - let subtype = endian.parse(read_bytes(r)?); - let offset = r.stream_position()?; - let size: u32 = endian.parse(read_bytes(r)?); - let count = endian.parse(read_bytes(r)?); - let Some(product) = size.checked_mul(count) else { - return Err(Error::ExtensionRecordTooLarge { - offset, - subtype, - size, - count, - }); - }; - let offset = r.stream_position()?; - let data = read_vec(r, product as usize)?; - Ok(Extension { - offset, - subtype, - size, - count, - data, - }) -} - -pub struct ZHeader { - /// File offset to the start of the record. - pub offset: u64, - - /// File offset to the ZLIB data header. - pub zheader_offset: u64, - - /// File offset to the ZLIB trailer. - pub ztrailer_offset: u64, - - /// Length of the ZLIB trailer in bytes. - pub ztrailer_len: u64, -} - -fn read_zheader(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let zheader_offset: u64 = endian.parse(read_bytes(r)?); - let ztrailer_offset: u64 = endian.parse(read_bytes(r)?); - let ztrailer_len: u64 = endian.parse(read_bytes(r)?); - - Ok(ZHeader { - offset, - zheader_offset, - ztrailer_offset, - ztrailer_len, - }) -} - -pub struct ZTrailer { - /// File offset to the start of the record. - pub offset: u64, - - /// Compression bias as a negative integer, e.g. -100. - pub int_bias: i64, - - /// Always observed as zero. - pub zero: u64, - - /// Uncompressed size of each block, except possibly the last. Only - /// `0x3ff000` has been observed so far. - pub block_size: u32, - - /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them. - pub blocks: Vec, -} - -pub struct ZBlock { - /// Offset of block of data if simple compression were used. - pub uncompressed_ofs: u64, - - /// Actual offset within the file of the compressed data block. - pub compressed_ofs: u64, - - /// The number of bytes in this data block after decompression. This is - /// `block_size` in every data block but the last, which may be smaller. - pub uncompressed_size: u32, - - /// The number of bytes in this data block, as stored compressed in this - /// file. - pub compressed_size: u32, -} - -fn read_ztrailer( - r: &mut R, - endian: Endian, - ztrailer_ofs: u64, - ztrailer_len: u64, -) -> Result, Error> { - let start_offset = r.stream_position()?; - if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() { - return Ok(None); - } - let int_bias = endian.parse(read_bytes(r)?); - let zero = endian.parse(read_bytes(r)?); - let block_size = endian.parse(read_bytes(r)?); - let n_blocks: u32 = endian.parse(read_bytes(r)?); - let expected_n_blocks = (ztrailer_len - 24) / 24; - if n_blocks as u64 != expected_n_blocks { - return Err(Error::BadZlibTrailerNBlocks { - offset: ztrailer_ofs, - n_blocks, - expected_n_blocks, - ztrailer_len, - }); - } - let mut blocks = Vec::with_capacity(n_blocks as usize); - for _ in 0..n_blocks { - let uncompressed_ofs = endian.parse(read_bytes(r)?); - let compressed_ofs = endian.parse(read_bytes(r)?); - let uncompressed_size = endian.parse(read_bytes(r)?); - let compressed_size = endian.parse(read_bytes(r)?); - blocks.push(ZBlock { - uncompressed_ofs, - compressed_ofs, - uncompressed_size, - compressed_size, - }); - } - r.seek(SeekFrom::Start(start_offset))?; - Ok(Some(ZTrailer { - offset: ztrailer_ofs, - int_bias, - zero, - block_size, - blocks, - })) -} - -fn try_read_bytes(r: &mut R) -> Result, IoError> { - let mut buf = [0; N]; - let n = r.read(&mut buf)?; - if n > 0 { - if n < N { - r.read_exact(&mut buf[n..])?; - } - Ok(Some(buf)) - } else { - Ok(None) - } -} - -fn read_bytes(r: &mut R) -> Result<[u8; N], IoError> { - let mut buf = [0; N]; - r.read_exact(&mut buf)?; - Ok(buf) -} - -fn read_vec(r: &mut R, n: usize) -> Result, IoError> { - let mut vec = vec![0; n]; - r.read_exact(&mut vec)?; - Ok(vec) -} - -/* -fn trim_end(mut s: Vec, c: u8) -> Vec { - while s.last() == Some(&c) { - s.pop(); - } - s -} - -fn skip_bytes(r: &mut R, mut n: u64) -> Result<(), IoError> { - let mut buf = [0; 1024]; - while n > 0 { - let chunk = u64::min(n, buf.len() as u64); - r.read_exact(&mut buf[0..chunk as usize])?; - n -= chunk; - } - Ok(()) -} - -*/ diff --git a/rust/src/raw.rs b/rust/src/raw.rs new file mode 100644 index 0000000000..f1e362376b --- /dev/null +++ b/rust/src/raw.rs @@ -0,0 +1,899 @@ +use crate::endian::{Endian, Parse, ToBytes}; +use crate::Error; + +use flate2::read::ZlibDecoder; +use num::Integer; +use num_derive::FromPrimitive; +use std::{ + collections::VecDeque, + io::{Error as IoError, Read, Seek, SeekFrom}, + iter::FusedIterator, +}; + +#[derive(Copy, Clone, Debug)] +pub enum Compression { + Simple, + ZLib, +} + +pub enum Record { + Header(Header), + Document(Document), + Variable(Variable), + ValueLabel(ValueLabel), + VarIndexes(VarIndexes), + Extension(Extension), + EndOfHeaders, + ZHeader(ZHeader), + ZTrailer(ZTrailer), + Case(Vec), +} + +pub struct Header { + /// Magic number. + pub magic: Magic, + + /// Eye-catcher string, product name, in the file's encoding. Padded + /// on the right with spaces. + pub eye_catcher: [u8; 60], + + /// Layout code, normally either 2 or 3. + pub layout_code: u32, + + /// Number of variable positions, or `None` if the value in the file is + /// questionably trustworthy. + pub nominal_case_size: Option, + + /// Compression type, if any, + pub compression: Option, + + /// 0-based variable index of the weight variable, or `None` if the file is + /// unweighted. + pub weight_index: Option, + + /// Claimed number of cases, if known. + pub n_cases: Option, + + /// Compression bias, usually 100.0. + pub bias: f64, + + /// `dd mmm yy` in the file's encoding. + pub creation_date: [u8; 9], + + /// `HH:MM:SS` in the file's encoding. + pub creation_time: [u8; 8], + + /// File label, in the file's encoding. Padded on the right with spaces. + pub file_label: [u8; 64], + + /// Endianness of the data in the file header. + pub endian: Endian, +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Magic([u8; 4]); + +impl Magic { + /// Magic number for a regular system file. + pub const SAV: Magic = Magic(*b"$FL2"); + + /// Magic number for a system file that contains zlib-compressed data. + pub const ZSAV: Magic = Magic(*b"$FL3"); + + /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded + /// in EBCDIC. + pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]); +} + +impl TryFrom<[u8; 4]> for Magic { + type Error = Error; + + fn try_from(value: [u8; 4]) -> Result { + let magic = Magic(value); + match magic { + Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic), + _ => Err(Error::BadMagic(value)), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub enum VarType { + Number, + String, +} + +impl VarType { + fn from_width(width: i32) -> VarType { + match width { + 0 => VarType::Number, + _ => VarType::String, + } + } +} + +trait State { + #[allow(clippy::type_complexity)] + fn read(self: Box) -> Result)>, Error>; +} + +struct Start { + reader: R, +} + +struct CommonState { + reader: R, + endian: Endian, + bias: f64, + compression: Option, + var_types: Vec, +} + +impl State for Start { + fn read(mut self: Box) -> Result)>, Error> { + let header = read_header(&mut self.reader)?; + let next_state = Headers(CommonState { + reader: self.reader, + endian: header.endian, + bias: header.bias, + compression: header.compression, + var_types: Vec::new(), + }); + Ok(Some((Record::Header(header), Box::new(next_state)))) + } +} + +struct Headers(CommonState); + +impl State for Headers { + fn read(mut self: Box) -> Result)>, Error> { + let endian = self.0.endian; + let rec_type: u32 = endian.parse(read_bytes(&mut self.0.reader)?); + let record = match rec_type { + 2 => { + let variable = read_variable_record(&mut self.0.reader, endian)?; + self.0.var_types.push(VarType::from_width(variable.width)); + Record::Variable(variable) + } + 3 => Record::ValueLabel(read_value_label_record(&mut self.0.reader, endian)?), + 4 => Record::VarIndexes(read_var_indexes_record(&mut self.0.reader, endian)?), + 6 => Record::Document(read_document_record(&mut self.0.reader, endian)?), + 7 => Record::Extension(read_extension_record(&mut self.0.reader, endian)?), + 999 => { + let _: [u8; 4] = read_bytes(&mut self.0.reader)?; + let next_state: Box = match self.0.compression { + None => Box::new(Data(self.0)), + Some(Compression::Simple) => Box::new(CompressedData::new(self.0)), + Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)), + }; + return Ok(Some((Record::EndOfHeaders, next_state))); + } + _ => { + return Err(Error::BadRecordType { + offset: self.0.reader.stream_position()?, + rec_type, + }) + } + }; + Ok(Some((record, self))) + } +} + +struct ZlibHeader(CommonState); + +impl State for ZlibHeader { + fn read(mut self: Box) -> Result)>, Error> { + let zheader = read_zheader(&mut self.0.reader, self.0.endian)?; + Ok(Some((Record::ZHeader(zheader), self))) + } +} + +struct ZlibTrailer(CommonState, ZHeader); + +impl State for ZlibTrailer { + fn read(mut self: Box) -> Result)>, Error> { + let retval = read_ztrailer(&mut self.0.reader, self.0.endian, self.1.ztrailer_offset, self.1.ztrailer_len)?; + let next_state = Box::new(CompressedData::new(CommonState { + reader: ZlibDecodeMultiple::new(self.0.reader), + endian: self.0.endian, + bias: self.0.bias, + compression: self.0.compression, + var_types: self.0.var_types + })); + match retval { + None => next_state.read(), + Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))) + } + } +} + +struct Data(CommonState); + +impl State for Data { + fn read(mut self: Box) -> Result)>, Error> { + let case_start = self.0.reader.stream_position()?; + let mut values = Vec::with_capacity(self.0.var_types.len()); + for (i, &var_type) in self.0.var_types.iter().enumerate() { + let Some(raw) = try_read_bytes(&mut self.0.reader)? else { + if i == 0 { + return Ok(None); + } else { + let offset = self.0.reader.stream_position()?; + return Err(Error::EofInCase { + offset, + case_ofs: offset - case_start, + case_len: self.0.var_types.len() * 8, + }); + } + }; + values.push(Value::from_raw(var_type, raw, self.0.endian)); + } + Ok(Some((Record::Case(values), self))) + } +} + +struct CompressedData { + common: CommonState, + codes: VecDeque, +} + +impl CompressedData { + fn new(common: CommonState) -> CompressedData { + CompressedData { common, codes: VecDeque::new() } + } +} + +impl State for CompressedData { + fn read(mut self: Box) -> Result)>, Error> { + let case_start = self.common.reader.stream_position()?; + let mut values = Vec::with_capacity(self.common.var_types.len()); + for (i, &var_type) in self.common.var_types.iter().enumerate() { + let value = loop { + let Some(code) = self.codes.pop_front() else { + let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.common.reader)? + else { + if i == 0 { + return Ok(None); + } else { + let offset = self.common.reader.stream_position()?; + return Err(Error::EofInCompressedCase { + offset, + case_ofs: offset - case_start, + }); + } + }; + self.codes.extend(new_codes.into_iter()); + continue; + }; + match code { + 0 => (), + 1..=251 => match var_type { + VarType::Number => break Value::Number(Some(code as f64 - self.common.bias)), + VarType::String => { + break Value::String(self.common.endian.to_bytes(code as f64 - self.common.bias)) + } + }, + 252 => { + if i == 0 { + return Ok(None); + } else { + let offset = self.common.reader.stream_position()?; + return Err(Error::PartialCompressedCase { + offset, + case_ofs: offset - case_start, + }); + } + } + 253 => { + break Value::from_raw( + var_type, + read_bytes(&mut self.common.reader)?, + self.common.endian, + ) + } + 254 => match var_type { + VarType::String => break Value::String(*b" "), // XXX EBCDIC + VarType::Number => { + return Err(Error::CompressedStringExpected { + offset: case_start, + case_ofs: self.common.reader.stream_position()? - case_start, + }) + } + }, + 255 => match var_type { + VarType::Number => break Value::Number(None), + VarType::String => { + return Err(Error::CompressedNumberExpected { + offset: case_start, + case_ofs: self.common.reader.stream_position()? - case_start, + }) + } + }, + } + }; + values.push(value); + } + Ok(Some((Record::Case(values), self))) + } +} + +struct ZlibDecodeMultiple +where + R: Read + Seek, +{ + reader: Option>, +} + +impl ZlibDecodeMultiple +where + R: Read + Seek, +{ + fn new(reader: R) -> ZlibDecodeMultiple { + ZlibDecodeMultiple { + reader: Some(ZlibDecoder::new(reader)), + } + } +} + +impl Read for ZlibDecodeMultiple +where + R: Read + Seek, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + loop { + match self.reader.as_mut().unwrap().read(buf)? { + 0 => { + let inner = self.reader.take().unwrap().into_inner(); + self.reader = Some(ZlibDecoder::new(inner)); + } + n => return Ok(n), + }; + } + } +} + +impl Seek for ZlibDecodeMultiple +where + R: Read + Seek, +{ + fn seek(&mut self, pos: SeekFrom) -> Result { + self.reader.as_mut().unwrap().get_mut().seek(pos) + } +} + +#[derive(Copy, Clone)] +pub enum Value { + Number(Option), + String([u8; 8]), +} + +impl Value { + pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value { + match var_type { + VarType::String => Value::String(raw), + VarType::Number => { + let number: f64 = endian.parse(raw); + Value::Number((number != -f64::MAX).then_some(number)) + } + } + } +} + +pub struct Reader { + state: Option>, +} + +impl Reader { + pub fn new(reader: R) -> Result { + Ok(Reader { + state: Some(Box::new(Start { reader })), + }) + } +} + +impl Iterator for Reader { + type Item = Result; + + fn next(&mut self) -> Option { + match self.state.take()?.read() { + Ok(Some((record, next_state))) => { + self.state = Some(next_state); + Some(Ok(record)) + } + Ok(None) => None, + Err(error) => Some(Err(error)), + } + } +} + +impl FusedIterator for Reader {} + +fn read_header(r: &mut R) -> Result { + let magic: [u8; 4] = read_bytes(r)?; + let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; + + let eye_catcher: [u8; 60] = read_bytes(r)?; + let layout_code: [u8; 4] = read_bytes(r)?; + let endian = Endian::identify_u32(2, layout_code) + .or_else(|| Endian::identify_u32(2, layout_code)) + .ok_or_else(|| Error::NotASystemFile)?; + let layout_code = endian.parse(layout_code); + + let nominal_case_size: u32 = endian.parse(read_bytes(r)?); + let nominal_case_size = + (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size); + + let compression_code: u32 = endian.parse(read_bytes(r)?); + let compression = match (magic, compression_code) { + (Magic::ZSAV, 2) => Some(Compression::ZLib), + (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)), + (_, 0) => None, + (_, 1) => Some(Compression::Simple), + (_, code) => return Err(Error::InvalidSavCompression(code)), + }; + + let weight_index: u32 = endian.parse(read_bytes(r)?); + let weight_index = (weight_index > 0).then_some(weight_index - 1); + + let n_cases: u32 = endian.parse(read_bytes(r)?); + let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases); + + let bias: f64 = endian.parse(read_bytes(r)?); + + let creation_date: [u8; 9] = read_bytes(r)?; + let creation_time: [u8; 8] = read_bytes(r)?; + let file_label: [u8; 64] = read_bytes(r)?; + let _: [u8; 3] = read_bytes(r)?; + + Ok(Header { + magic, + layout_code, + nominal_case_size, + compression, + weight_index, + n_cases, + bias, + creation_date, + creation_time, + eye_catcher, + file_label, + endian, + }) +} + +pub struct Variable { + /// Offset from the start of the file to the start of the record. + pub offset: u64, + + /// Variable width, in the range -1..=255. + pub width: i32, + + /// Variable name, padded on the right with spaces. + pub name: [u8; 8], + + /// Print format. + pub print_format: u32, + + /// Write format. + pub write_format: u32, + + /// Missing value code, one of -3, -2, 0, 1, 2, or 3. + pub missing_value_code: i32, + + /// Raw missing values, up to 3 of them. + pub missing: Vec<[u8; 8]>, + + /// Optional variable label. + pub label: Option>, +} + +fn read_variable_record(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let width: i32 = endian.parse(read_bytes(r)?); + let has_variable_label: u32 = endian.parse(read_bytes(r)?); + let missing_value_code: i32 = endian.parse(read_bytes(r)?); + let print_format: u32 = endian.parse(read_bytes(r)?); + let write_format: u32 = endian.parse(read_bytes(r)?); + let name: [u8; 8] = read_bytes(r)?; + + let label = match has_variable_label { + 0 => None, + 1 => { + let len: u32 = endian.parse(read_bytes(r)?); + let read_len = len.min(65535) as usize; + let label = Some(read_vec(r, read_len)?); + + let padding_bytes = Integer::next_multiple_of(&len, &4) - len; + let _ = read_vec(r, padding_bytes as usize)?; + + label + } + _ => { + return Err(Error::BadVariableLabelCode { + offset, + code: has_variable_label, + }) + } + }; + + let mut missing = Vec::new(); + if missing_value_code != 0 { + match (width, missing_value_code) { + (0, -3 | -2 | 1 | 2 | 3) => (), + (0, _) => { + return Err(Error::BadNumericMissingValueCode { + offset, + code: missing_value_code, + }) + } + (_, 0..=3) => (), + (_, _) => { + return Err(Error::BadStringMissingValueCode { + offset, + code: missing_value_code, + }) + } + } + + for _ in 0..missing_value_code.abs() { + missing.push(read_bytes(r)?); + } + } + + Ok(Variable { + offset, + width, + name, + print_format, + write_format, + missing_value_code, + missing, + label, + }) +} + +pub struct ValueLabel { + /// Offset from the start of the file to the start of the record. + pub offset: u64, + + /// The labels. + pub labels: Vec<([u8; 8], Vec)>, +} + +impl ValueLabel { + /// Maximum number of value labels in a record. + pub const MAX: u32 = u32::MAX / 8; +} + +fn read_value_label_record(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let n: u32 = endian.parse(read_bytes(r)?); + if n > ValueLabel::MAX { + return Err(Error::BadNumberOfValueLabels { + offset, + n, + max: ValueLabel::MAX, + }); + } + + let mut labels = Vec::new(); + for _ in 0..n { + let value: [u8; 8] = read_bytes(r)?; + let label_len: u8 = endian.parse(read_bytes(r)?); + let label_len = label_len as usize; + let padded_len = Integer::next_multiple_of(&(label_len + 1), &8); + + let mut label = read_vec(r, padded_len)?; + label.truncate(label_len); + labels.push((value, label)); + } + Ok(ValueLabel { offset, labels }) +} + +pub struct VarIndexes { + /// Offset from the start of the file to the start of the record. + pub offset: u64, + + /// The 0-based indexes of the variable indexes. + pub var_indexes: Vec, +} + +impl VarIndexes { + /// Maximum number of variable indexes in a record. + pub const MAX: u32 = u32::MAX / 8; +} + +fn read_var_indexes_record(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let n: u32 = endian.parse(read_bytes(r)?); + if n > VarIndexes::MAX { + return Err(Error::BadNumberOfVarIndexes { + offset, + n, + max: VarIndexes::MAX, + }); + } + let mut var_indexes = Vec::with_capacity(n as usize); + for _ in 0..n { + var_indexes.push(endian.parse(read_bytes(r)?)); + } + + Ok(VarIndexes { + offset, + var_indexes, + }) +} + +pub const DOC_LINE_LEN: u32 = 80; +pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN; + +pub struct Document { + /// Offset from the start of the file to the start of the record. + pub pos: u64, + + /// The document, as an array of 80-byte lines. + pub lines: Vec<[u8; DOC_LINE_LEN as usize]>, +} + +fn read_document_record(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let n: u32 = endian.parse(read_bytes(r)?); + match n { + 0..=DOC_MAX_LINES => { + let pos = r.stream_position()?; + let mut lines = Vec::with_capacity(n as usize); + for _ in 0..n { + let line: [u8; 80] = read_bytes(r)?; + lines.push(line); + } + Ok(Document { pos, lines }) + } + _ => Err(Error::BadDocumentLength { + offset, + n, + max: DOC_MAX_LINES, + }), + } +} + +#[derive(FromPrimitive)] +enum ExtensionType { + /// Machine integer info. + Integer = 3, + /// Machine floating-point info. + Float = 4, + /// Variable sets. + VarSets = 5, + /// DATE. + Date = 6, + /// Multiple response sets. + Mrsets = 7, + /// SPSS Data Entry. + DataEntry = 8, + /// Extra product info text. + ProductInfo = 10, + /// Variable display parameters. + Display = 11, + /// Long variable names. + LongNames = 13, + /// Long strings. + LongStrings = 14, + /// Extended number of cases. + Ncases = 16, + /// Data file attributes. + FileAttrs = 17, + /// Variable attributes. + VarAttrs = 18, + /// Multiple response sets (extended). + Mrsets2 = 19, + /// Character encoding. + Encoding = 20, + /// Value labels for long strings. + LongLabels = 21, + /// Missing values for long strings. + LongMissing = 22, + /// "Format properties in dataview table". + Dataview = 24, +} + +pub struct Extension { + /// Offset from the start of the file to the start of the record. + pub offset: u64, + + /// Record subtype. + pub subtype: u32, + + /// Size of each data element. + pub size: u32, + + /// Number of data elements. + pub count: u32, + + /// `size * count` bytes of data. + pub data: Vec, +} + +/* +fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) { + match extension { + /* Implemented record types. */ + ExtensionType::Integer => (4, 8), + ExtensionType::Float => (8, 3), + ExtensionType::VarSets => (1, 0), + ExtensionType::Mrsets => (1, 0), + ExtensionType::ProductInfo => (1, 0), + ExtensionType::Display => (4, 0), + ExtensionType::LongNames => (1, 0), + ExtensionType::LongStrings => (1, 0), + ExtensionType::Ncases => (8, 2), + ExtensionType::FileAttrs => (1, 0), + ExtensionType::VarAttrs => (1, 0), + ExtensionType::Mrsets2 => (1, 0), + ExtensionType::Encoding => (1, 0), + ExtensionType::LongLabels => (1, 0), + ExtensionType::LongMissing => (1, 0), + + /* Ignored record types. */ + ExtensionType::Date => (0, 0), + ExtensionType::DataEntry => (0, 0), + ExtensionType::Dataview => (0, 0), + } +} + */ + +fn read_extension_record(r: &mut R, endian: Endian) -> Result { + let subtype = endian.parse(read_bytes(r)?); + let offset = r.stream_position()?; + let size: u32 = endian.parse(read_bytes(r)?); + let count = endian.parse(read_bytes(r)?); + let Some(product) = size.checked_mul(count) else { + return Err(Error::ExtensionRecordTooLarge { + offset, + subtype, + size, + count, + }); + }; + let offset = r.stream_position()?; + let data = read_vec(r, product as usize)?; + Ok(Extension { + offset, + subtype, + size, + count, + data, + }) +} + +pub struct ZHeader { + /// File offset to the start of the record. + pub offset: u64, + + /// File offset to the ZLIB data header. + pub zheader_offset: u64, + + /// File offset to the ZLIB trailer. + pub ztrailer_offset: u64, + + /// Length of the ZLIB trailer in bytes. + pub ztrailer_len: u64, +} + +fn read_zheader(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let zheader_offset: u64 = endian.parse(read_bytes(r)?); + let ztrailer_offset: u64 = endian.parse(read_bytes(r)?); + let ztrailer_len: u64 = endian.parse(read_bytes(r)?); + + Ok(ZHeader { + offset, + zheader_offset, + ztrailer_offset, + ztrailer_len, + }) +} + +pub struct ZTrailer { + /// File offset to the start of the record. + pub offset: u64, + + /// Compression bias as a negative integer, e.g. -100. + pub int_bias: i64, + + /// Always observed as zero. + pub zero: u64, + + /// Uncompressed size of each block, except possibly the last. Only + /// `0x3ff000` has been observed so far. + pub block_size: u32, + + /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them. + pub blocks: Vec, +} + +pub struct ZBlock { + /// Offset of block of data if simple compression were used. + pub uncompressed_ofs: u64, + + /// Actual offset within the file of the compressed data block. + pub compressed_ofs: u64, + + /// The number of bytes in this data block after decompression. This is + /// `block_size` in every data block but the last, which may be smaller. + pub uncompressed_size: u32, + + /// The number of bytes in this data block, as stored compressed in this + /// file. + pub compressed_size: u32, +} + +fn read_ztrailer( + r: &mut R, + endian: Endian, + ztrailer_ofs: u64, + ztrailer_len: u64, +) -> Result, Error> { + let start_offset = r.stream_position()?; + if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() { + return Ok(None); + } + let int_bias = endian.parse(read_bytes(r)?); + let zero = endian.parse(read_bytes(r)?); + let block_size = endian.parse(read_bytes(r)?); + let n_blocks: u32 = endian.parse(read_bytes(r)?); + let expected_n_blocks = (ztrailer_len - 24) / 24; + if n_blocks as u64 != expected_n_blocks { + return Err(Error::BadZlibTrailerNBlocks { + offset: ztrailer_ofs, + n_blocks, + expected_n_blocks, + ztrailer_len, + }); + } + let mut blocks = Vec::with_capacity(n_blocks as usize); + for _ in 0..n_blocks { + let uncompressed_ofs = endian.parse(read_bytes(r)?); + let compressed_ofs = endian.parse(read_bytes(r)?); + let uncompressed_size = endian.parse(read_bytes(r)?); + let compressed_size = endian.parse(read_bytes(r)?); + blocks.push(ZBlock { + uncompressed_ofs, + compressed_ofs, + uncompressed_size, + compressed_size, + }); + } + r.seek(SeekFrom::Start(start_offset))?; + Ok(Some(ZTrailer { + offset: ztrailer_ofs, + int_bias, + zero, + block_size, + blocks, + })) +} + +fn try_read_bytes(r: &mut R) -> Result, IoError> { + let mut buf = [0; N]; + let n = r.read(&mut buf)?; + if n > 0 { + if n < N { + r.read_exact(&mut buf[n..])?; + } + Ok(Some(buf)) + } else { + Ok(None) + } +} + +fn read_bytes(r: &mut R) -> Result<[u8; N], IoError> { + let mut buf = [0; N]; + r.read_exact(&mut buf)?; + Ok(buf) +} + +fn read_vec(r: &mut R, n: usize) -> Result, IoError> { + let mut vec = vec![0; n]; + r.read_exact(&mut vec)?; + Ok(vec) +}