From aed004cd688a06e8e6593c72d8146980445830ab Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 8 Jun 2025 10:30:03 -0700 Subject: [PATCH] work on sysfile tests --- rust/pspp/src/dictionary.rs | 71 ++++++++++++++----- rust/pspp/src/sys/cooked.rs | 14 ++-- rust/pspp/src/sys/raw.rs | 12 ++-- rust/pspp/src/sys/test.rs | 21 ++++-- .../testdata/empty_document_record.expected | 20 ++++++ .../sys/testdata/empty_document_record.sack | 28 ++++++++ .../src/sys/testdata/variable_sets.expected | 54 ++++++++++++++ rust/pspp/src/sys/testdata/variable_sets.sack | 43 +++++++++++ 8 files changed, 227 insertions(+), 36 deletions(-) create mode 100644 rust/pspp/src/sys/testdata/empty_document_record.expected create mode 100644 rust/pspp/src/sys/testdata/empty_document_record.sack create mode 100644 rust/pspp/src/sys/testdata/variable_sets.expected create mode 100644 rust/pspp/src/sys/testdata/variable_sets.sack diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 38a22b1ae5..116491b740 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -319,7 +319,7 @@ pub struct Dictionary { /// Variable sets. /// /// Only the GUI makes use of variable sets. - pub variable_sets: HashSet>, + pub variable_sets: Vec, /// Character encoding for the dictionary and the data. pub encoding: &'static Encoding, @@ -351,7 +351,7 @@ impl Dictionary { vectors: HashSet::new(), attributes: Attributes::new(), mrsets: HashSet::new(), - variable_sets: HashSet::new(), + variable_sets: Vec::new(), encoding, } } @@ -507,13 +507,8 @@ impl Dictionary { .collect(); self.variable_sets = self .variable_sets - .drain() - .filter_map(|var_set_by_id| { - var_set_by_id - .0 - .with_updated_dict_indexes(f) - .map(ByIdentifier::new) - }) + .drain(..) + .filter_map(|var_set| var_set.with_updated_dict_indexes(f)) .collect(); } @@ -549,6 +544,10 @@ impl Dictionary { OutputValueLabels::new(self) } + pub fn output_variable_sets(&self) -> OutputVariableSets { + OutputVariableSets::new(self) + } + pub fn to_pivot_rows(&self) -> (Group, Vec) { let mut group = Group::new("Dictionary Information"); let mut values = Vec::new(); @@ -664,7 +663,7 @@ impl<'a> OutputValueLabels<'a> { fn new(dictionary: &'a Dictionary) -> Self { Self { dictionary } } - fn any_value_labels(&self) -> bool { + pub fn any_value_labels(&self) -> bool { self.dictionary .variables .iter() @@ -741,6 +740,50 @@ impl VariableField { } } +pub struct OutputVariableSets<'a> { + dictionary: &'a Dictionary, +} + +impl<'a> OutputVariableSets<'a> { + fn new(dictionary: &'a Dictionary) -> Self { + Self { dictionary } + } + pub fn any_variable_sets(&self) -> bool { + !self.dictionary.variable_sets.is_empty() + } + pub fn to_pivot_table(&self) -> Option { + if !self.any_variable_sets() { + return None; + } + + let mut variable_sets = Group::new("Variable Set and Position").with_label_shown(); + let mut data = Vec::new(); + for vs in &self.dictionary.variable_sets { + let mut group = Group::new(vs.name.as_str()); + for (variable, index) in vs.variables.iter().copied().zip(1usize..) { + group.push(Value::new_integer(Some(index as f64))); + data.push(Value::new_variable(&self.dictionary.variables[variable])); + } + if vs.variables.is_empty() { + group.push(Value::new_text("n/a")); + data.push(Value::new_text("(empty)")); + } + variable_sets.push(group); + } + let mut pt = PivotTable::new([ + (Axis3::Y, Dimension::new(variable_sets)), + ( + Axis3::X, + Dimension::new(Group::new("Attributes").with("Variable")), + ), + ]); + for (row, datum) in data.into_iter().enumerate() { + pt.insert(&[row, 0], datum); + } + Some(pt) + } +} + fn update_dict_index_vec(dict_indexes: &mut Vec, f: F) where F: Fn(DictIndex) -> Option, @@ -1056,7 +1099,7 @@ pub enum MultipleResponseType { #[derive(Clone, Debug)] pub struct VariableSet { - pub name: Identifier, + pub name: String, pub variables: Vec, } @@ -1070,12 +1113,6 @@ impl VariableSet { } } -impl HasIdentifier for VariableSet { - fn identifier(&self) -> &UniCase { - &self.name.0 - } -} - #[cfg(test)] mod test { use std::collections::HashSet; diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 1fbbbab2e2..b49d21f67b 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -851,15 +851,11 @@ pub fn decode( }; variables.push(dict_index); } - if !variables.is_empty() { - let variable_set = VariableSet { - name: record.name, - variables, - }; - dictionary - .variable_sets - .insert(ByIdentifier::new(variable_set)); - } + let variable_set = VariableSet { + name: record.name, + variables, + }; + dictionary.variable_sets.push(variable_set); } let metadata = Metadata::decode(&headers, warn); diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 50fbe9930b..53f80a1b90 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -193,6 +193,9 @@ pub enum Warning { #[error("Invalid variable name in variable set record. {0}")] InvalidVariableSetName(IdError), + #[error("Variable set missing name delimiter.")] + VariableSetMissingEquals, + #[error("Invalid multiple response set name. {0}")] InvalidMrSetName(IdError), @@ -2761,14 +2764,15 @@ impl ProductInfoRecord { } #[derive(Clone, Debug)] pub struct VariableSet { - pub name: Identifier, + pub name: String, pub variable_names: Vec, } impl VariableSet { fn parse(input: &str, decoder: &Decoder) -> Result { - let (name, input) = input.split_once('=').ok_or(Warning::TBD)?; - let name = decoder.new_identifier(name).map_err(|_| Warning::TBD)?; + let (name, input) = input + .split_once('=') + .ok_or(Warning::VariableSetMissingEquals)?; let mut vars = Vec::new(); for var in input.split_ascii_whitespace() { if let Some(identifier) = decoder @@ -2781,7 +2785,7 @@ impl VariableSet { } } Ok(VariableSet { - name, + name: name.to_string(), variable_names: vars, }) } diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index ce107c1cc3..f94b147db0 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -3,20 +3,16 @@ use std::{io::Cursor, path::Path, sync::Arc}; use crate::{ endian::Endian, output::{ - pivot::{ - test::{assert_lines_eq, assert_rendering}, - Axis3, Dimension, PivotTable, - }, + pivot::{test::assert_lines_eq, Axis3, Dimension, PivotTable}, Details, Item, Text, }, sys::{ - cooked::{decode, Headers, Metadata}, + cooked::{decode, Headers}, raw::{encoding_from_headers, Decoder, Reader, Record}, sack::sack, }, }; -use chrono::{NaiveDate, NaiveTime}; use enum_iterator::all; #[test] @@ -44,6 +40,16 @@ fn documents() { test_sysfile("documents"); } +#[test] +fn empty_document_record() { + test_sysfile("empty_document_record"); +} + +#[test] +fn variable_sets() { + test_sysfile("variable_sets"); +} + fn test_sysfile(name: &str) { let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) .join("src/sys/testdata") @@ -95,6 +101,9 @@ fn test_sysfile(name: &str) { if let Some(pt) = dictionary.output_value_labels().to_pivot_table() { output.push(Arc::new(pt.into())); } + if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() { + output.push(Arc::new(pt.into())); + } let output = Item::new(Details::Group(output)); assert_lines_eq( diff --git a/rust/pspp/src/sys/testdata/empty_document_record.expected b/rust/pspp/src/sys/testdata/empty_document_record.expected new file mode 100644 index 0000000000..4489a0b363 --- /dev/null +++ b/rust/pspp/src/sys/testdata/empty_document_record.expected @@ -0,0 +1,20 @@ +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 1│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬────────────────────────╮ +│Label │PSPP synthetic test file│ +│Variables│ 1│ +╰─────────┴────────────────────────╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ diff --git a/rust/pspp/src/sys/testdata/empty_document_record.sack b/rust/pspp/src/sys/testdata/empty_document_record.sack new file mode 100644 index 0000000000..1b0cf10d87 --- /dev/null +++ b/rust/pspp/src/sys/testdata/empty_document_record.sack @@ -0,0 +1,28 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +1; # Nominal case size +0; # Not compressed +0; # Not weighted +1; # 1 case. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + +# Document record. +6; 0; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# Dictionary termination record. +999; 0; + +# Data. +1.0; diff --git a/rust/pspp/src/sys/testdata/variable_sets.expected b/rust/pspp/src/sys/testdata/variable_sets.expected new file mode 100644 index 0000000000..982db894ea --- /dev/null +++ b/rust/pspp/src/sys/testdata/variable_sets.expected @@ -0,0 +1,54 @@ +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 0│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬────────────────────────╮ +│Label │PSPP synthetic test file│ +│Variables│ 10│ +╰─────────┴────────────────────────╯ + +╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│あ│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│b │ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│c │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│d │ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│e │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│f │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│g │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│h │ 8│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│i │ 9│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│j │ 10│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭─────────────────────────────┬────────╮ +│Variable Set and Position │Variable│ +├─────────────────────────────┼────────┤ +│Variable Set 1 1 │あ │ +│ 2 │b │ +│ 3 │c │ +├─────────────────────────────┼────────┤ +│vs2 1 │d │ +│ 2 │e │ +│ 3 │f │ +│ 4 │g │ +├─────────────────────────────┼────────┤ +│c 1 │h │ +│ 2 │i │ +│ 3 │j │ +├─────────────────────────────┼────────┤ +│d 1 │e │ +│ 2 │g │ +│ 3 │i │ +│ 4 │b │ +│ 5 │f │ +├─────────────────────────────┼────────┤ +│Empty Variable Set n/a│(empty) │ +╰─────────────────────────────┴────────╯ diff --git a/rust/pspp/src/sys/testdata/variable_sets.sack b/rust/pspp/src/sys/testdata/variable_sets.sack new file mode 100644 index 0000000000..3735e107f6 --- /dev/null +++ b/rust/pspp/src/sys/testdata/variable_sets.sack @@ -0,0 +1,43 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +10; # Nominal case size +0; # Not compressed +0; # Not weighted +0; # No cases. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Variable Set 1 +2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 ""; +2; 0; 0; 0; 0x050800 *2; s8 "B"; +2; 0; 0; 0; 0x050800 *2; s8 "C"; + +# vs2 +2; 0; 0; 0; 0x050800 *2; s8 "D"; +2; 0; 0; 0; 0x050800 *2; s8 "E"; +2; 0; 0; 0; 0x050800 *2; s8 "F"; +2; 0; 0; 0; 0x050800 *2; s8 "G"; + +# c +2; 4; 0; 0; 0x010400 *2; s8 "H"; +2; 4; 0; 0; 0x010400 *2; s8 "I"; +2; 4; 0; 0; 0x010400 *2; s8 "J"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932; + +7; 5; 1; +COUNT( + "Variable Set 1= "; i8 0x82; i8 0xa0; " b c"; i8 10; + "vs2=d e f g"; i8 10; + "c=h i j"; i8 13; i8 10; + "d= e g i b f"; i8 10; + "Empty Variable Set= "; i8 10); + +# Character encoding record. +7; 20; 1; 9; "shift_jis"; + +# Dictionary termination record. +999; 0; -- 2.30.2