From eb8cd5c6597e10e02bc0321ac5893ec7463797fa Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 25 Jul 2023 20:38:21 -0700 Subject: [PATCH] work --- rust/src/lib.rs | 114 +++++++++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/rust/src/lib.rs b/rust/src/lib.rs index b51f1cf209..8e8ee6cab4 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -27,6 +27,9 @@ pub enum Error { #[error("Invalid ZSAV compression code {0}")] InvalidZsavCompression(u32), + #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] + BadVariableWidth { offset: u64, width: i32 }, + #[error("Misplaced type 4 record near offset {0:#x}.")] MisplacedType4Record(u64), @@ -103,29 +106,41 @@ pub struct Header { /// Magic number. pub magic: Magic, - /// Endianness of the data in the file header. - pub endianness: Endian, + /// Eye-catcher string, product name, in the file's encoding. Padded + /// on the right with spaces. + pub eye_catcher: [u8; 60], - /// 0-based variable index of the weight variable, or `None` if the file is - /// unweighted. - pub weight_index: Option, + /// Layout code, normally either 2 or 3. + pub layout_code: u32, /// Number of variable positions, or `None` if the value in the file is /// questionably trustworthy. pub nominal_case_size: Option, + /// Compression type, if any, + pub compression: Option, + + /// 0-based variable index of the weight variable, or `None` if the file is + /// unweighted. + pub weight_index: Option, + + /// Claimed number of cases, if known. + pub n_cases: Option, + + /// Compression bias, usually 100.0. + pub bias: f64, + /// `dd mmm yy` in the file's encoding. pub creation_date: [u8; 9], /// `HH:MM:SS` in the file's encoding. pub creation_time: [u8; 8], - /// Eye-catcher string, then product name, in the file's encoding. Padded - /// on the right with spaces. - pub eye_catcher: [u8; 60], - /// File label, in the file's encoding. Padded on the right with spaces. pub file_label: [u8; 64], + + /// Endianness of the data in the file header. + pub endianness: Endian, } #[derive(Copy, Clone, PartialEq, Eq, Hash)] @@ -155,15 +170,30 @@ impl TryFrom<[u8; 4]> for Magic { } } +enum VarType { + Number, + String, +} + +impl VarType { + fn from_width(width: i32) -> VarType { + match width { + 0 => VarType::Number, + _ => VarType::String, + } + } +} + pub struct Reader { r: BufReader, + var_types: Vec, state: ReaderState, - endianness: Option, } enum ReaderState { Start, - Headers(Endian), + Headers(Endian, Option), + Data(Endian), End, } @@ -171,29 +201,37 @@ impl Reader { pub fn new(r: R) -> Result, Error> { Ok(Reader { r: BufReader::new(r), + var_types: Vec::new(), state: ReaderState::Start, - endianness: None, }) } fn _next(&mut self) -> Result, Error> { match self.state { ReaderState::Start => { let header = read_header(&mut self.r)?; - let endianness = header.endianness; - Ok(Some((Record::Header(header), ReaderState::Headers(endianness)))) + let next_state = ReaderState::Headers(header.endianness, header.compression); + Ok(Some((Record::Header(header), next_state))) } - ReaderState::Headers(e) => { - let rec_type: u32 = e.parse(read_bytes(&mut self.r)?); + ReaderState::Headers(endian, compression) => { + let rec_type: u32 = endian.parse(read_bytes(&mut self.r)?); let record = match rec_type { - 2 => Record::Variable(read_variable_record(&mut self.r, e)?), - 3 => Record::ValueLabel(read_value_label_record(&mut self.r, e)?), - 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, e)?), - 6 => Record::Document(read_document_record(&mut self.r, e)?), - 7 => Record::Extension(read_extension_record(&mut self.r, e)?), + 2 => { + let variable = read_variable_record(&mut self.r, endian)?; + self.var_types.push(VarType::from_width(variable.width)); + Record::Variable(variable) + } + 3 => Record::ValueLabel(read_value_label_record(&mut self.r, endian)?), + 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, endian)?), + 6 => Record::Document(read_document_record(&mut self.r, endian)?), + 7 => Record::Extension(read_extension_record(&mut self.r, endian)?), 999 => { let _: [u8; 4] = read_bytes(&mut self.r)?; - return Ok(Some((Record::EndOfHeaders, ReaderState::End))) - }, + let next_state = match compression { + None => ReaderState::Data(endian), + _ => ReaderState::End, + }; + return Ok(Some((Record::EndOfHeaders, next_state))); + } _ => { return Err(Error::BadRecordType { offset: self.r.stream_position()?, @@ -201,7 +239,7 @@ impl Reader { }) } }; - Ok(Some((record, ReaderState::Headers(e)))) + Ok(Some((record, ReaderState::Headers(endian, compression)))) } ReaderState::End => Ok(None), } @@ -239,6 +277,7 @@ fn read_header(r: &mut R) -> Result { let endianness = Endian::identify_u32(2, layout_code) .or_else(|| Endian::identify_u32(2, layout_code)) .ok_or_else(|| Error::NotASystemFile)?; + let layout_code = endianness.parse(layout_code); let nominal_case_size: u32 = endianness.parse(read_bytes(r)?); let nominal_case_size = @@ -268,13 +307,17 @@ fn read_header(r: &mut R) -> Result { Ok(Header { magic, - endianness, - weight_index, + layout_code, nominal_case_size, + compression, + weight_index, + n_cases, + bias, creation_date, creation_time, eye_catcher, file_label, + endianness, }) } @@ -615,25 +658,6 @@ fn read_zheader(r: &mut BufReader, e: Endian) -> Result