From: Ben Pfaff Date: Sun, 23 Jul 2023 23:00:04 +0000 (-0700) Subject: more library work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7434003a421667a79280c63277ee8c7e8b0e9352;p=pspp more library work --- diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 401a6ecb94..26db62dc11 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -23,23 +23,34 @@ pub enum Error { #[error("Invalid ZSAV compression code {0}")] InvalidZsavCompression(u32), - #[error("Misplaced type 4 record.")] - MisplacedType4Record, + #[error("Misplaced type 4 record near offset {0:#x}.")] + MisplacedType4Record(u64), - #[error("Number of document lines ({n}) must be greater than 0 and less than {max}.")] - BadDocumentLength { n: u32, max: u32 }, + #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")] + BadDocumentLength { offset: u64, n: u32, max: u32 }, - #[error("Unrecognized record type {0}.")] - BadRecordType(u32), + #[error("At offset {offset:#x}, Unrecognized record type {rec_type}.")] + BadRecordType { offset: u64, rec_type: u32 }, - #[error("Variable label indicator ({0}) is not 0 or 1.")] - BadVariableLabelIndicator(u32), + #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")] + BadVariableLabelCode { offset: u64, code: u32 }, - #[error("Numeric missing value indicator ({0}) is not -3, -2, 0, 1, 2, or 3.")] - BadNumericMissingValueIndicator(i32), + #[error( + "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3." + )] + BadNumericMissingValueCode { offset: u64, code: i32 }, - #[error("String missing value indicator ({0}) is not 0, 1, 2, or 3.")] - BadStringMissingValueIndicator(i32), + #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")] + BadStringMissingValueCode { offset: u64, code: i32 }, + + #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")] + BadNumberOfValueLabels { offset: u64, n: u32, max: u32 }, + + #[error("At offset {offset:#x}, variable index record (type 4) does not immediately follow value label record (type 3) as it should.")] + MissingVariableIndexRecord { offset: u64 }, + + #[error("At offset {offset:#x}, number of variables associated with a value label ({n}) is not between 1 and the number of variables ({max}).")] + BadNumberOfValueLabelVariables { offset: u64, n: u32, max: u32 }, } #[derive(Error, Debug)] @@ -61,6 +72,10 @@ pub struct Reader { r: BufReader, document_record: Option, + + variables: Vec, + + value_labels: Vec, } /// Magic number for a regular system file. @@ -120,16 +135,16 @@ impl Reader { let e = header.endianness; let mut document_record = None; let mut variables = Vec::new(); + let mut value_labels = Vec::new(); loop { + let offset = r.stream_position()?; let rec_type: u32 = e.parse(read_bytes(&mut r)?); match rec_type { 2 => variables.push(read_variable_record(&mut r, e)?), - /* - 3 => d.read_value_label_record()?, - */ + 3 => value_labels.push(read_value_label_record(&mut r, e, variables.len())?), // A Type 4 record is always immediately after a type 3 record, // the code for type 3 records reads the type 4 record too. - 4 => return Err(Error::MisplacedType4Record), + 4 => return Err(Error::MisplacedType4Record(offset)), 6 => { let d = read_document_record(&mut r, e)?; @@ -143,11 +158,16 @@ impl Reader { 7 => d.read_extension_record()?, */ 999 => break, - _ => return Err(Error::BadRecordType(rec_type)), + _ => return Err(Error::BadRecordType { offset, rec_type }), } } - Ok(Reader { r, document_record }) + Ok(Reader { + r, + document_record, + variables, + value_labels, + }) } } @@ -211,7 +231,7 @@ fn read_header(r: &mut R, warn: impl Fn(Warning)) -> Result( r: &mut BufReader, e: Endian, ) -> Result { - let pos = r.stream_position()?; + let offset = r.stream_position()?; let width: i32 = e.parse(read_bytes(r)?); let has_variable_label: u32 = e.parse(read_bytes(r)?); let missing_value_code: i32 = e.parse(read_bytes(r)?); @@ -259,16 +279,31 @@ fn read_variable_record( label } - _ => return Err(Error::BadVariableLabelIndicator(has_variable_label)), + _ => { + return Err(Error::BadVariableLabelCode { + offset, + code: has_variable_label, + }) + } }; let mut missing = Vec::new(); if missing_value_code != 0 { match (width, missing_value_code) { (0, -3 | -2 | 1 | 2 | 3) => (), - (0, _) => return Err(Error::BadNumericMissingValueIndicator(missing_value_code)), + (0, _) => { + return Err(Error::BadNumericMissingValueCode { + offset, + code: missing_value_code, + }) + } (_, 0..=3) => (), - (_, _) => return Err(Error::BadStringMissingValueIndicator(missing_value_code)), + (_, _) => { + return Err(Error::BadStringMissingValueCode { + offset, + code: missing_value_code, + }) + } } for _ in 0..missing_value_code.abs() { @@ -277,7 +312,7 @@ fn read_variable_record( } Ok(VariableRecord { - pos, + offset, width, name, print_format, @@ -288,6 +323,73 @@ fn read_variable_record( }) } +pub struct ValueLabelRecord { + /// Offset from the start of the file to the start of the record. + pub offset: u64, + + /// The labels. + pub labels: Vec<([u8; 8], Vec)>, + + /// The 0-based indexes of the variables to which the labels are assigned. + pub var_indexes: Vec, +} + +pub const MAX_VALUE_LABELS: u32 = u32::MAX / 8; + +fn read_value_label_record( + r: &mut BufReader, + e: Endian, + n_var_records: usize, +) -> Result { + let offset = r.stream_position()?; + let n: u32 = e.parse(read_bytes(r)?); + if n > MAX_VALUE_LABELS { + return Err(Error::BadNumberOfValueLabels { + offset, + n, + max: MAX_VALUE_LABELS, + }); + } + + let mut labels = Vec::new(); + for _ in 0..n { + let value: [u8; 8] = read_bytes(r)?; + let label_len: u8 = e.parse(read_bytes(r)?); + let label_len = label_len as usize; + let padded_len = Integer::next_multiple_of(&(label_len + 1), &8); + + let mut label = read_vec(r, padded_len)?; + label.truncate(label_len); + labels.push((value, label)); + } + + let rec_type: u32 = e.parse(read_bytes(r)?); + if rec_type != 4 { + return Err(Error::MissingVariableIndexRecord { + offset: r.stream_position()?, + }); + } + + let n_vars: u32 = e.parse(read_bytes(r)?); + if n_vars < 1 || n_vars as usize > n_var_records { + return Err(Error::BadNumberOfValueLabelVariables { + offset: r.stream_position()?, + n: n_vars, + max: n_var_records as u32, + }); + } + let mut var_indexes = Vec::with_capacity(n_vars as usize); + for _ in 0..n_vars { + var_indexes.push(e.parse(read_bytes(r)?)); + } + + Ok(ValueLabelRecord { + offset, + labels, + var_indexes, + }) +} + pub struct DocumentRecord { /// Offset from the start of the file to the start of the record. pub pos: u64, @@ -300,11 +402,13 @@ fn read_document_record( r: &mut BufReader, e: Endian, ) -> Result, Error> { + let offset = r.stream_position()?; let n: u32 = e.parse(read_bytes(r)?); if n == 0 { Ok(None) } else if n > DOC_MAX_LINES { Err(Error::BadDocumentLength { + offset, n, max: DOC_MAX_LINES, })