From: Ben Pfaff Date: Mon, 16 Jun 2025 15:20:06 +0000 (-0700) Subject: fixed test X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9d6a76671206fff1a484315e1d143c8939ab8537;p=pspp fixed test --- diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index e6154fb4e1..876284d6b0 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -5,7 +5,7 @@ use std::{ borrow::Cow, cmp::Ordering, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, - fmt::{Debug, Formatter, Result as FmtResult}, + fmt::{Debug, Display, Formatter, Result as FmtResult}, hash::Hash, ops::{Bound, RangeBounds, RangeInclusive}, str::FromStr, @@ -155,6 +155,10 @@ impl VarWidth { self.as_string_width().unwrap() - segment_idx * Self::EFFECTIVE_VLS_CHUNK } } + + pub fn display_adjective(&self) -> VarWidthAdjective { + VarWidthAdjective(*self) + } } impl From for VarType { @@ -166,6 +170,17 @@ impl From for VarType { } } +pub struct VarWidthAdjective(VarWidth); + +impl Display for VarWidthAdjective { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + match self.0 { + VarWidth::Numeric => write!(f, "numeric"), + VarWidth::String(width) => write!(f, "{width}-byte string"), + } + } +} + #[derive(Clone)] pub enum Datum { Number(Option), diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index d1ee09ebff..0121133528 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -192,6 +192,15 @@ pub enum Error { #[error("File header claims {expected} variable positions but {actual} were read from file.")] WrongVariablePositions { actual: usize, expected: usize }, + #[error("Unknown variable name \"{name}\" in long string missing value record.")] + LongStringMissingValueUnknownVariable { name: Identifier }, + + #[error("Invalid long string missing value for {} variable {name}.", width.display_adjective())] + LongStringMissingValueBadWdith { name: Identifier, width: VarWidth }, + + #[error("Long string missing values record says variable {name} has {count} missing values, but only 1 to 3 missing values are allowed.")] + LongStringMissingValueInvalidCount { name: Identifier, count: usize }, + #[error("Details TBD (cooked)")] TBD, } @@ -833,15 +842,31 @@ pub fn decode( } } - for record in headers + for mut record in headers .long_string_missing_values .drain(..) .flat_map(|record| record.0.into_iter()) { let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else { - warn(dbg!(Error::TBD)); + warn(Error::LongStringMissingValueUnknownVariable { + name: record.var_name.clone(), + }); continue; }; + if !variable.width.is_long_string() { + warn(Error::LongStringMissingValueBadWdith { + name: record.var_name.clone(), + width: variable.width, + }); + continue; + } + if record.missing_values.len() > 3 { + warn(Error::LongStringMissingValueInvalidCount { + name: record.var_name.clone(), + count: record.missing_values.len(), + }); + record.missing_values.truncate(3); + } let values = record .missing_values .into_iter() @@ -851,12 +876,12 @@ pub fn decode( Datum::String(value) }) .collect::>(); - dbg!(&values); match MissingValues::new(values, None) { Ok(missing_values) => variable.missing_values = missing_values, - Err(MissingValuesError::TooMany) => warn(Error::TBD), Err(MissingValuesError::TooWide) => warn(Error::TBD), - Err(MissingValuesError::MixedTypes) => unreachable!(), + Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => { + unreachable!() + } } } diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 7b8970a1af..2d10d651ae 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -2511,35 +2511,47 @@ static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord { }; impl LongStringMissingValueRecord { - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse( + ext: &Extension, + endian: Endian, + warn: &mut dyn FnMut(Warning), + ) -> Result { ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?; let mut input = &ext.data[..]; let mut missing_value_set = Vec::new(); while !input.is_empty() { let var_name = read_string(&mut input, endian)?; + dbg!(&var_name); let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?); let value_len: u32 = endian.parse(read_bytes(&mut input)?); if value_len != 8 { let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start; - return Err(Warning::BadLongMissingValueLength { + warn(Warning::BadLongMissingValueLength { record_offset: ext.offsets.start, offset, value_len, }); + read_vec( + &mut input, + dbg!(value_len as usize * n_missing_values as usize), + )?; + continue; } let mut missing_values = Vec::new(); for i in 0..n_missing_values { - let value: [u8; 8] = read_bytes(&mut input)?; - let numeric_value: u64 = endian.parse(value); - let value = if i > 0 && numeric_value == 8 { + if i > 0 { // Tolerate files written by old, buggy versions of PSPP // where we believed that the value_length was repeated // before each missing value. - read_bytes(&mut input)? - } else { - value - }; + let mut peek = input; + let number: u32 = endian.parse(read_bytes(&mut peek)?); + if number == 8 { + input = peek; + } + } + + let value: [u8; 8] = read_bytes(&mut input)?; missing_values.push(RawStrArray(value)); } missing_value_set.push(LongStringMissingValues { @@ -3059,7 +3071,7 @@ impl Extension { 11 => VarDisplayRecord::parse(&extension, var_types, endian, warn), 7 | 19 => MultipleResponseRecord::parse(&extension, endian), 21 => LongStringValueLabelRecord::parse(&extension, endian), - 22 => LongStringMissingValueRecord::parse(&extension, endian), + 22 => LongStringMissingValueRecord::parse(&extension, endian, warn), 20 => EncodingRecord::parse(&extension, endian), 16 => NumberOfCasesRecord::parse(&extension, endian), 5 => RawVariableSetRecord::parse(extension), diff --git a/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.expected b/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.expected index e69de29bb2..95e4106d27 100644 --- a/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.expected +++ b/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.expected @@ -0,0 +1,36 @@ +In long string missing values record starting at offset 0x238, value length at offset 0x2a8 is 12 instead of the expected 8. + +File header claims 8 variable positions but 9 were read from file. + +Long string missing values record says variable STR2 has 4 missing values, but only 1 to 3 missing values are allowed. + +Unknown variable name "Nonexistent" in long string missing value record. + +Invalid long string missing value for numeric variable NUM1. + +Invalid long string missing value for 7-byte string variable STR4. + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 1│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬──────────────────────────────╮ +│Label │PSPP synthetic test file: ôõöø│ +│Variables│ 5│ +╰─────────┴──────────────────────────────╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────────────────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────────────────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│str1│ 2│ │Nominal │Input│ 9│Left │A9 │A9 │ │ +│str2│ 3│ │Nominal │Input│ 10│Left │A10 │A10 │"abcdefgh"; "ijklmnop"; "qrstuvwx"│ +│str3│ 4│ │Nominal │Input│ 11│Left │A11 │A11 │"ABCDEFGH"; "IJKLMNOP" │ +│str4│ 5│ │Nominal │Input│ 7│Left │A7 │A7 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────────────────────────╯ diff --git a/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.sack b/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.sack index d2db3d8070..3b4ef337d5 100644 --- a/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.sack +++ b/rust/pspp/src/sys/testdata/invalid_long_string_missing_values.sack @@ -1,7 +1,7 @@ # File header. "$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; 2; # Layout code -7; # Nominal case size +8; # Nominal case size 0; # Not compressed 0; # Not weighted 1; # 1 case. @@ -21,6 +21,8 @@ i8 0 *3; 2; -1; 0; 0; 0; 0; s8 ""; 2; 11; 0; 0; 0x010b00 *2; s8 "STR3"; 2; -1; 0; 0; 0; 0; s8 ""; +2; 7; 0; 0; 0x010700 *2; s8 "STR4"; +2; -1; 0; 0; 0; 0; s8 ""; # Machine integer info record. 7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; @@ -49,6 +51,9 @@ COUNT("STR3"); i8 1; >>COUNT("abcdefghijkl")<<; # Buggy way that this was written in old PSPP, with a length # before each missing value instead of just once. COUNT("STR3"); i8 2; 8; "ABCDEFGH"; >>8<<; "IJKLMNOP"; + +# Invalid 8-byte missing value for 7-byte string. +COUNT("STR4"); i8 1; 8; "ABCDEFGH"; ); # Character encoding record. diff --git a/rust/pspp/src/sys/testdata/variable_labels_and_missing_values.expected b/rust/pspp/src/sys/testdata/variable_labels_and_missing_values.expected index f7e5a27ad2..a448646811 100644 --- a/rust/pspp/src/sys/testdata/variable_labels_and_missing_values.expected +++ b/rust/pspp/src/sys/testdata/variable_labels_and_missing_values.expected @@ -13,28 +13,28 @@ │Variables│ 21│ ╰─────────┴──────────────────────────────╯ -╭────────────────────────────────┬────────┬────────────────────────────────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬───────────────────────────────────────────╮ -│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │ -├────────────────────────────────┼────────┼────────────────────────────────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼───────────────────────────────────────────┤ -│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ -│Numeric variable 2's label (ùúû)│ 2│Numeric variable 2's label (ùúû)│ │Input│ 8│Right │F8.0 │F8.0 │ │ -│num3 │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │1 │ -│Another numeric variable label │ 4│Another numeric variable label │ │Input│ 8│Right │F8.0 │F8.0 │1 │ -│num5 │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2 │ -│num6 │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2; 3 │ -│num7 │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3 │ -│num8 │ 8│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3; 5 │ -│num9 │ 9│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU HIGH; -5 │ -│numàèìñò │ 10│ │ │Input│ 8│Right │F8.0 │F8.0 │LOW THRU 1; 5 │ -│str1 │ 11│ │Nominal │Input│ 4│Left │A4 │A4 │ │ -│String variable 2's label │ 12│String variable 2's label │Nominal │Input│ 4│Left │A4 │A4 │ │ -│str3 │ 13│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS" │ -│Another string variable label │ 14│Another string variable label │Nominal │Input│ 4│Left │A4 │A4 │"OTHR" │ -│str5 │ 15│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR" │ -│str6 │ 16│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR"; "MORE" │ -│str7 │ 17│ │Nominal │Input│ 11│Left │A11 │A11 │"first8by" │ -│str8 │ 18│ │Nominal │Input│ 9│Left │A9 │A9 │"abcdefgh " │ -│str9 │ 19│ │Nominal │Input│ 10│Left │A10 │A10 │"abcdefgh "; "01234567 " │ -│str10 │ 20│ │Nominal │Input│ 11│Left │A11 │A11 │"abcdefgh "; "01234567 "; "0 "│ -│25-byte string │ 21│25-byte string │Nominal │Input│ 25│Left │A25 │A25 │ │ -╰────────────────────────────────┴────────┴────────────────────────────────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴───────────────────────────────────────────╯ +╭────────────────────────────────┬────────┬────────────────────────────────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬───────────────────────────╮ +│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │ +├────────────────────────────────┼────────┼────────────────────────────────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼───────────────────────────┤ +│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│Numeric variable 2's label (ùúû)│ 2│Numeric variable 2's label (ùúû)│ │Input│ 8│Right │F8.0 │F8.0 │ │ +│num3 │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │1 │ +│Another numeric variable label │ 4│Another numeric variable label │ │Input│ 8│Right │F8.0 │F8.0 │1 │ +│num5 │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2 │ +│num6 │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2; 3 │ +│num7 │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3 │ +│num8 │ 8│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3; 5 │ +│num9 │ 9│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU HIGH; -5 │ +│numàèìñò │ 10│ │ │Input│ 8│Right │F8.0 │F8.0 │LOW THRU 1; 5 │ +│str1 │ 11│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│String variable 2's label │ 12│String variable 2's label │Nominal │Input│ 4│Left │A4 │A4 │ │ +│str3 │ 13│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS" │ +│Another string variable label │ 14│Another string variable label │Nominal │Input│ 4│Left │A4 │A4 │"OTHR" │ +│str5 │ 15│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR" │ +│str6 │ 16│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR"; "MORE" │ +│str7 │ 17│ │Nominal │Input│ 11│Left │A11 │A11 │"first8by" │ +│str8 │ 18│ │Nominal │Input│ 9│Left │A9 │A9 │"abcdefgh" │ +│str9 │ 19│ │Nominal │Input│ 10│Left │A10 │A10 │"abcdefgh"; "01234567" │ +│str10 │ 20│ │Nominal │Input│ 11│Left │A11 │A11 │"abcdefgh"; "01234567"; "0"│ +│25-byte string │ 21│25-byte string │Nominal │Input│ 25│Left │A25 │A25 │ │ +╰────────────────────────────────┴────────┴────────────────────────────────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴───────────────────────────╯