From 309f7b71eb23c56c8738b352934742b9c9870c63 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 19 Jun 2025 13:37:40 -0700 Subject: [PATCH] more tests --- rust/pspp/src/identifier.rs | 3 + rust/pspp/src/sys/cooked.rs | 17 +++++- rust/pspp/src/sys/test.rs | 10 ++++ .../multiple_response_sets_bad_name.expected | 53 ++++++++++++++++++ .../multiple_response_sets_bad_name.sack | 56 +++++++++++++++++++ .../variable_sets_unknown_variable.expected | 46 +++++++++++++++ .../variable_sets_unknown_variable.sack | 40 +++++++++++++ 7 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.expected create mode 100644 rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.sack create mode 100644 rust/pspp/src/sys/testdata/variable_sets_unknown_variable.expected create mode 100644 rust/pspp/src/sys/testdata/variable_sets_unknown_variable.sack diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index c6b1fc54c0..e891347f9d 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -124,6 +124,9 @@ pub enum Error { encoding: &'static str, c: char, }, + + #[error("Multiple response set name \"{0}\" does not begin with required \"$\".")] + MissingAt(Identifier), } pub enum ReservedWord { diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index fc0a28988e..dcac4af6bd 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -236,6 +236,12 @@ pub enum Error { name: &'static str, }, + #[error("Variable set \"{variable_set}\" includes unknown variable {variable}.")] + UnknownVariableSetVariable { + variable_set: String, + variable: Identifier, + }, + #[error("Details TBD (cooked)")] TBD, } @@ -971,7 +977,10 @@ pub fn decode( let mut variables = Vec::with_capacity(record.variable_names.len()); for variable_name in record.variable_names { let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else { - warn(dbg!(Error::TBD)); + warn(Error::UnknownVariableSetVariable { + variable_set: record.name.clone(), + variable: variable_name.clone(), + }); continue; }; variables.push(dict_index); @@ -1003,6 +1012,12 @@ impl MultipleResponseSet { warn: &mut impl FnMut(Error), ) -> Result { let mr_set_name = input.name.clone(); + if !mr_set_name.0.starts_with("$") { + return Err(Error::InvalidMrSetName(IdError::MissingAt( + mr_set_name.clone(), + ))); + } + let mut variables = Vec::with_capacity(input.short_names.len()); for short_name in input.short_names.iter() { let Some(dict_index) = dictionary.variables.get_index_of(&short_name.0) else { diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index bebe3b67ad..93ed921471 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -227,6 +227,16 @@ fn wrong_special_floats() { test_sysfile("wrong_special_floats"); } +#[test] +fn variable_sets_unknown_variable() { + test_sysfile("variable_sets_unknown_variable"); +} + +#[test] +fn multiple_response_sets_bad_name() { + test_sysfile("multiple_response_sets_bad_name"); +} + /// Duplicate variable name handling negative test. /// /// SPSS-generated system file can contain duplicate variable names (see bug diff --git a/rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.expected b/rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.expected new file mode 100644 index 0000000000..6793e1b229 --- /dev/null +++ b/rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.expected @@ -0,0 +1,53 @@ +Invalid multiple response set name. Multiple response set name "b" does not begin with required "$". + +Invalid multiple response set name. Multiple response set name "e" does not begin with required "$". + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 0│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬────────────────────────╮ +│Label │PSPP synthetic test file│ +│Variables│ 16│ +╰─────────┴────────────────────────╯ + +╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│あ│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│b │ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│c │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│d │ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│e │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│f │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│g │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│h │ 8│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│i │ 9│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│j │ 10│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│k │ 11│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│l │ 12│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│m │ 13│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│n │ 14│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +│o │ 15│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +│p │ 16│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬─────────────┬───────────┬─────────────┬────────────────╮ +│Name│ Label │ Encoding │Counted Value│Member Variables│ +├────┼─────────────┼───────────┼─────────────┼────────────────┤ +│$a │my mcgroup │Categories │ │あ │ +│ │ │ │ │b │ +│ │ │ │ │c │ +│$c │mdgroup #2 │Dichotomies│はい │h │ +│ │ │ │ │i │ +│ │ │ │ │j │ +│$d │third mdgroup│Dichotomies│ 34.00│k │ +│ │ │ │ │l │ +│ │ │ │ │m │ +╰────┴─────────────┴───────────┴─────────────┴────────────────╯ diff --git a/rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.sack b/rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.sack new file mode 100644 index 0000000000..e804999f53 --- /dev/null +++ b/rust/pspp/src/sys/testdata/multiple_response_sets_bad_name.sack @@ -0,0 +1,56 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +16; # Nominal case size +0; # Not compressed +0; # Not weighted +0; # No cases. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# $a +2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 ""; +2; 0; 0; 0; 0x050800 *2; s8 "B"; +2; 0; 0; 0; 0x050800 *2; s8 "C"; + +# $b +2; 0; 0; 0; 0x050800 *2; s8 "D"; +2; 0; 0; 0; 0x050800 *2; s8 "E"; +2; 0; 0; 0; 0x050800 *2; s8 "F"; +2; 0; 0; 0; 0x050800 *2; s8 "G"; + +# $c +2; 4; 0; 0; 0x010400 *2; s8 "H"; +2; 4; 0; 0; 0x010400 *2; s8 "I"; +2; 4; 0; 0; 0x010400 *2; s8 "J"; + +# $d +2; 0; 0; 0; 0x050800 *2; s8 "K"; +2; 0; 0; 0; 0x050800 *2; s8 "L"; +2; 0; 0; 0; 0x050800 *2; s8 "M"; + +# $e +2; 6; 0; 0; 0x010600 *2; s8 "N"; +2; 6; 0; 0; 0x010600 *2; s8 "O"; +2; 6; 0; 0; 0x010600 *2; s8 "P"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932; + +7; 7; 1; +COUNT( + "$a=C 10 my mcgroup "; i8 0x82; i8 0xa0; " b c"; i8 10; + "b=D2 55 0 g e f d"; i8 10; + "$c=D4 "; i8 0x82; i8 0xcd; i8 0x82; i8 0xa2; " 10 mdgroup #2 h i j"; i8 10); + +7; 19; 1; +COUNT( + "$d=E 1 2 34 13 third mdgroup k l m"; i8 10; + "e=E 11 6 choice 0 n o p"; i8 10); + +# Character encoding record. +7; 20; 1; 9; "shift_jis"; + +# Dictionary termination record. +999; 0; diff --git a/rust/pspp/src/sys/testdata/variable_sets_unknown_variable.expected b/rust/pspp/src/sys/testdata/variable_sets_unknown_variable.expected new file mode 100644 index 0000000000..bf63c319e6 --- /dev/null +++ b/rust/pspp/src/sys/testdata/variable_sets_unknown_variable.expected @@ -0,0 +1,46 @@ +Variable set "Variable Set 1" includes unknown variable xyzzy. + +Variable set "vs2" includes unknown variable foo. + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 0│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬────────────────────────╮ +│Label │PSPP synthetic test file│ +│Variables│ 10│ +╰─────────┴────────────────────────╯ + +╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│あ│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│b │ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│c │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│d │ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│e │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│f │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│g │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│h │ 8│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│i │ 9│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│j │ 10│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭───────────────────────────┬────────╮ +│Variable Set and Position │Variable│ +├───────────────────────────┼────────┤ +│Variable Set 1 1│あ │ +│ 2│b │ +│ 3│c │ +├───────────────────────────┼────────┤ +│vs2 1│d │ +│ 2│e │ +│ 3│f │ +│ 4│g │ +╰───────────────────────────┴────────╯ diff --git a/rust/pspp/src/sys/testdata/variable_sets_unknown_variable.sack b/rust/pspp/src/sys/testdata/variable_sets_unknown_variable.sack new file mode 100644 index 0000000000..32628ed810 --- /dev/null +++ b/rust/pspp/src/sys/testdata/variable_sets_unknown_variable.sack @@ -0,0 +1,40 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +10; # Nominal case size +0; # Not compressed +0; # Not weighted +0; # No cases. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Variable Set 1 +2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 ""; +2; 0; 0; 0; 0x050800 *2; s8 "B"; +2; 0; 0; 0; 0x050800 *2; s8 "C"; + +# vs2 +2; 0; 0; 0; 0x050800 *2; s8 "D"; +2; 0; 0; 0; 0x050800 *2; s8 "E"; +2; 0; 0; 0; 0x050800 *2; s8 "F"; +2; 0; 0; 0; 0x050800 *2; s8 "G"; + +# c +2; 4; 0; 0; 0x010400 *2; s8 "H"; +2; 4; 0; 0; 0x010400 *2; s8 "I"; +2; 4; 0; 0; 0x010400 *2; s8 "J"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932; + +7; 5; 1; +COUNT( + "Variable Set 1= "; i8 0x82; i8 0xa0; " "; >>"xyzzy"<<; " b c"; i8 10; + "vs2=d "; >>"foo"<<; " e f g"; i8 10;); + +# Character encoding record. +7; 20; 1; 9; "shift_jis"; + +# Dictionary termination record. +999; 0; -- 2.30.2