work on sysfile tests
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 8 Jun 2025 17:30:03 +0000 (10:30 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 8 Jun 2025 17:30:03 +0000 (10:30 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/test.rs
rust/pspp/src/sys/testdata/empty_document_record.expected [new file with mode: 0644]
rust/pspp/src/sys/testdata/empty_document_record.sack [new file with mode: 0644]
rust/pspp/src/sys/testdata/variable_sets.expected [new file with mode: 0644]
rust/pspp/src/sys/testdata/variable_sets.sack [new file with mode: 0644]

index 38a22b1ae5ee5adf76ccf71ba60cea4cb2eaceff..116491b740049022b9535373935b1627fddbb8db 100644 (file)
@@ -319,7 +319,7 @@ pub struct Dictionary {
     /// Variable sets.
     ///
     /// Only the GUI makes use of variable sets.
-    pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
+    pub variable_sets: Vec<VariableSet>,
 
     /// Character encoding for the dictionary and the data.
     pub encoding: &'static Encoding,
@@ -351,7 +351,7 @@ impl Dictionary {
             vectors: HashSet::new(),
             attributes: Attributes::new(),
             mrsets: HashSet::new(),
-            variable_sets: HashSet::new(),
+            variable_sets: Vec::new(),
             encoding,
         }
     }
@@ -507,13 +507,8 @@ impl Dictionary {
             .collect();
         self.variable_sets = self
             .variable_sets
-            .drain()
-            .filter_map(|var_set_by_id| {
-                var_set_by_id
-                    .0
-                    .with_updated_dict_indexes(f)
-                    .map(ByIdentifier::new)
-            })
+            .drain(..)
+            .filter_map(|var_set| var_set.with_updated_dict_indexes(f))
             .collect();
     }
 
@@ -549,6 +544,10 @@ impl Dictionary {
         OutputValueLabels::new(self)
     }
 
+    pub fn output_variable_sets(&self) -> OutputVariableSets {
+        OutputVariableSets::new(self)
+    }
+
     pub fn to_pivot_rows(&self) -> (Group, Vec<Value>) {
         let mut group = Group::new("Dictionary Information");
         let mut values = Vec::new();
@@ -664,7 +663,7 @@ impl<'a> OutputValueLabels<'a> {
     fn new(dictionary: &'a Dictionary) -> Self {
         Self { dictionary }
     }
-    fn any_value_labels(&self) -> bool {
+    pub fn any_value_labels(&self) -> bool {
         self.dictionary
             .variables
             .iter()
@@ -741,6 +740,50 @@ impl VariableField {
     }
 }
 
+pub struct OutputVariableSets<'a> {
+    dictionary: &'a Dictionary,
+}
+
+impl<'a> OutputVariableSets<'a> {
+    fn new(dictionary: &'a Dictionary) -> Self {
+        Self { dictionary }
+    }
+    pub fn any_variable_sets(&self) -> bool {
+        !self.dictionary.variable_sets.is_empty()
+    }
+    pub fn to_pivot_table(&self) -> Option<PivotTable> {
+        if !self.any_variable_sets() {
+            return None;
+        }
+
+        let mut variable_sets = Group::new("Variable Set and Position").with_label_shown();
+        let mut data = Vec::new();
+        for vs in &self.dictionary.variable_sets {
+            let mut group = Group::new(vs.name.as_str());
+            for (variable, index) in vs.variables.iter().copied().zip(1usize..) {
+                group.push(Value::new_integer(Some(index as f64)));
+                data.push(Value::new_variable(&self.dictionary.variables[variable]));
+            }
+            if vs.variables.is_empty() {
+                group.push(Value::new_text("n/a"));
+                data.push(Value::new_text("(empty)"));
+            }
+            variable_sets.push(group);
+        }
+        let mut pt = PivotTable::new([
+            (Axis3::Y, Dimension::new(variable_sets)),
+            (
+                Axis3::X,
+                Dimension::new(Group::new("Attributes").with("Variable")),
+            ),
+        ]);
+        for (row, datum) in data.into_iter().enumerate() {
+            pt.insert(&[row, 0], datum);
+        }
+        Some(pt)
+    }
+}
+
 fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
 where
     F: Fn(DictIndex) -> Option<DictIndex>,
@@ -1056,7 +1099,7 @@ pub enum MultipleResponseType {
 
 #[derive(Clone, Debug)]
 pub struct VariableSet {
-    pub name: Identifier,
+    pub name: String,
     pub variables: Vec<DictIndex>,
 }
 
@@ -1070,12 +1113,6 @@ impl VariableSet {
     }
 }
 
-impl HasIdentifier for VariableSet {
-    fn identifier(&self) -> &UniCase<String> {
-        &self.name.0
-    }
-}
-
 #[cfg(test)]
 mod test {
     use std::collections::HashSet;
index 1fbbbab2e2299814dc468d8d46e2442ac1122086..b49d21f67b92f34a033a2f162bed1bb4ffa6f804 100644 (file)
@@ -851,15 +851,11 @@ pub fn decode(
             };
             variables.push(dict_index);
         }
-        if !variables.is_empty() {
-            let variable_set = VariableSet {
-                name: record.name,
-                variables,
-            };
-            dictionary
-                .variable_sets
-                .insert(ByIdentifier::new(variable_set));
-        }
+        let variable_set = VariableSet {
+            name: record.name,
+            variables,
+        };
+        dictionary.variable_sets.push(variable_set);
     }
 
     let metadata = Metadata::decode(&headers, warn);
index 50fbe9930bb0095250467d103601d0811e533728..53f80a1b905f2506df716e093775cce22b999a7f 100644 (file)
@@ -193,6 +193,9 @@ pub enum Warning {
     #[error("Invalid variable name in variable set record.  {0}")]
     InvalidVariableSetName(IdError),
 
+    #[error("Variable set missing name delimiter.")]
+    VariableSetMissingEquals,
+
     #[error("Invalid multiple response set name.  {0}")]
     InvalidMrSetName(IdError),
 
@@ -2761,14 +2764,15 @@ impl ProductInfoRecord {
 }
 #[derive(Clone, Debug)]
 pub struct VariableSet {
-    pub name: Identifier,
+    pub name: String,
     pub variable_names: Vec<Identifier>,
 }
 
 impl VariableSet {
     fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
-        let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
-        let name = decoder.new_identifier(name).map_err(|_| Warning::TBD)?;
+        let (name, input) = input
+            .split_once('=')
+            .ok_or(Warning::VariableSetMissingEquals)?;
         let mut vars = Vec::new();
         for var in input.split_ascii_whitespace() {
             if let Some(identifier) = decoder
@@ -2781,7 +2785,7 @@ impl VariableSet {
             }
         }
         Ok(VariableSet {
-            name,
+            name: name.to_string(),
             variable_names: vars,
         })
     }
index ce107c1cc3080373f160f49d6f1ab1f59f91eac9..f94b147db0c34c9dbea5f53889c680753472fd4c 100644 (file)
@@ -3,20 +3,16 @@ use std::{io::Cursor, path::Path, sync::Arc};
 use crate::{
     endian::Endian,
     output::{
-        pivot::{
-            test::{assert_lines_eq, assert_rendering},
-            Axis3, Dimension, PivotTable,
-        },
+        pivot::{test::assert_lines_eq, Axis3, Dimension, PivotTable},
         Details, Item, Text,
     },
     sys::{
-        cooked::{decode, Headers, Metadata},
+        cooked::{decode, Headers},
         raw::{encoding_from_headers, Decoder, Reader, Record},
         sack::sack,
     },
 };
 
-use chrono::{NaiveDate, NaiveTime};
 use enum_iterator::all;
 
 #[test]
@@ -44,6 +40,16 @@ fn documents() {
     test_sysfile("documents");
 }
 
+#[test]
+fn empty_document_record() {
+    test_sysfile("empty_document_record");
+}
+
+#[test]
+fn variable_sets() {
+    test_sysfile("variable_sets");
+}
+
 fn test_sysfile(name: &str) {
     let input_filename = Path::new(env!("CARGO_MANIFEST_DIR"))
         .join("src/sys/testdata")
@@ -95,6 +101,9 @@ fn test_sysfile(name: &str) {
         if let Some(pt) = dictionary.output_value_labels().to_pivot_table() {
             output.push(Arc::new(pt.into()));
         }
+        if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
+            output.push(Arc::new(pt.into()));
+        }
         let output = Item::new(Details::Group(output));
 
         assert_lines_eq(
diff --git a/rust/pspp/src/sys/testdata/empty_document_record.expected b/rust/pspp/src/sys/testdata/empty_document_record.expected
new file mode 100644 (file)
index 0000000..4489a0b
--- /dev/null
@@ -0,0 +1,20 @@
+╭──────────────────────┬────────────────────────╮
+│       Created        │    01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product        │PSPP synthetic test file│
+│       Version        │1.2.3                   │
+├──────────────────────┼────────────────────────┤
+│       Compression    │None                    │
+│       Number of Cases│                       1│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬────────────────────────╮
+│Label    │PSPP synthetic test file│
+│Variables│                       1│
+╰─────────┴────────────────────────╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│    │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│       1│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
diff --git a/rust/pspp/src/sys/testdata/empty_document_record.sack b/rust/pspp/src/sys/testdata/empty_document_record.sack
new file mode 100644 (file)
index 0000000..1b0cf10
--- /dev/null
@@ -0,0 +1,28 @@
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+1; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+1; # 1 case.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
+i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252;
+
+# Document record.
+6; 0;
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Dictionary termination record.
+999; 0;
+
+# Data.
+1.0;
diff --git a/rust/pspp/src/sys/testdata/variable_sets.expected b/rust/pspp/src/sys/testdata/variable_sets.expected
new file mode 100644 (file)
index 0000000..982db89
--- /dev/null
@@ -0,0 +1,54 @@
+╭──────────────────────┬────────────────────────╮
+│       Created        │    01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product        │PSPP synthetic test file│
+│       Version        │1.2.3                   │
+├──────────────────────┼────────────────────────┤
+│       Compression    │None                    │
+│       Number of Cases│                       0│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬────────────────────────╮
+│Label    │PSPP synthetic test file│
+│Variables│                      10│
+╰─────────┴────────────────────────╯
+
+╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│  │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│あ│       1│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│b │       2│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│c │       3│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│d │       4│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│e │       5│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│f │       6│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│g │       7│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│h │       8│     │Nominal          │Input│    4│Left     │A4          │A4          │              │
+│i │       9│     │Nominal          │Input│    4│Left     │A4          │A4          │              │
+│j │      10│     │Nominal          │Input│    4│Left     │A4          │A4          │              │
+╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭─────────────────────────────┬────────╮
+│Variable Set and Position    │Variable│
+├─────────────────────────────┼────────┤
+│Variable Set 1            1  │あ      │
+│                          2  │b       │
+│                          3  │c       │
+├─────────────────────────────┼────────┤
+│vs2                       1  │d       │
+│                          2  │e       │
+│                          3  │f       │
+│                          4  │g       │
+├─────────────────────────────┼────────┤
+│c                         1  │h       │
+│                          2  │i       │
+│                          3  │j       │
+├─────────────────────────────┼────────┤
+│d                         1  │e       │
+│                          2  │g       │
+│                          3  │i       │
+│                          4  │b       │
+│                          5  │f       │
+├─────────────────────────────┼────────┤
+│Empty Variable Set        n/a│(empty) │
+╰─────────────────────────────┴────────╯
diff --git a/rust/pspp/src/sys/testdata/variable_sets.sack b/rust/pspp/src/sys/testdata/variable_sets.sack
new file mode 100644 (file)
index 0000000..3735e10
--- /dev/null
@@ -0,0 +1,43 @@
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+10; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+0; # No cases.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
+i8 0 *3;
+
+# Variable Set 1
+2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 "";
+2; 0; 0; 0; 0x050800 *2; s8 "B";
+2; 0; 0; 0; 0x050800 *2; s8 "C";
+
+# vs2
+2; 0; 0; 0; 0x050800 *2; s8 "D";
+2; 0; 0; 0; 0x050800 *2; s8 "E";
+2; 0; 0; 0; 0x050800 *2; s8 "F";
+2; 0; 0; 0; 0x050800 *2; s8 "G";
+
+# c
+2; 4; 0; 0; 0x010400 *2; s8 "H";
+2; 4; 0; 0; 0x010400 *2; s8 "I";
+2; 4; 0; 0; 0x010400 *2; s8 "J";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932;
+
+7; 5; 1;
+COUNT(
+  "Variable Set 1= "; i8 0x82; i8 0xa0; " b c"; i8 10;
+  "vs2=d e f g"; i8 10;
+  "c=h i j"; i8 13; i8 10;
+  "d= e g i b f"; i8 10;
+  "Empty Variable Set= "; i8 10);
+
+# Character encoding record.
+7; 20; 1; 9; "shift_jis";
+
+# Dictionary termination record.
+999; 0;