test writing multiple response sets
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 4 Aug 2025 01:39:02 +0000 (18:39 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 4 Aug 2025 01:39:02 +0000 (18:39 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/sys/write.rs

index a07db8dd3964a0108fd57bad08a98af34d7cab40..3ad1bf5033751d4d0e9cfc7fa7e1b5e96d3f19da 100644 (file)
@@ -1668,11 +1668,18 @@ impl<'a> Serialize for Vectors<'a> {
     }
 }
 
+#[derive(Copy, Clone, Debug)]
 pub struct VariableSet<'a> {
     dictionary: &'a Dictionary,
     variable_set: &'a DictIndexVariableSet,
 }
 
+impl<'a> PartialEq for VariableSet<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        self.variable_set == other.variable_set
+    }
+}
+
 impl<'a> VariableSet<'a> {
     pub fn name(&self) -> &'a String {
         &self.variable_set.name
@@ -1957,7 +1964,7 @@ pub enum CategoryLabels {
     CountedValues { use_var_label_as_mrset_label: bool },
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub struct DictIndexVariableSet {
     pub name: String,
     pub variables: Vec<DictIndex>,
index cb05364e3c487b28945a2cead2110dddca16e27f..e61456d9e6389852d8f377ddbd03580376a4d963 100644 (file)
@@ -221,7 +221,7 @@ where
         self.write_documents()?;
         self.write_integer_record()?;
         self.write_float_record()?;
-        self.write_var_sets()?;
+        self.write_variable_sets()?;
         self.write_mrsets(true)?;
         self.write_variable_display_parameters()?;
         self.write_long_variable_names()?;
@@ -481,7 +481,7 @@ where
             .write_le(self.writer)
     }
 
-    fn write_var_sets(&mut self) -> Result<(), BinError> {
+    fn write_variable_sets(&mut self) -> Result<(), BinError> {
         let mut s = String::new();
         for set in &self.dictionary.variable_sets() {
             write!(&mut s, "{}= ", set.name()).unwrap();
@@ -512,10 +512,10 @@ where
                         CategoryLabels::VarLabels => b"D".as_slice(),
                         CategoryLabels::CountedValues {
                             use_var_label_as_mrset_label: true,
-                        } => b"E 11".as_slice(),
+                        } => b"E 11 ".as_slice(),
                         CategoryLabels::CountedValues {
                             use_var_label_as_mrset_label: false,
-                        } => b"E 1".as_slice(),
+                        } => b"E 1 ".as_slice(),
                     };
                     output.extend_from_slice(leader);
 
@@ -528,8 +528,9 @@ where
                     };
                     write!(&mut output, "{} ", value.len()).unwrap();
                     output.append(&mut value);
+                    output.push(b' ');
                 }
-                MultipleResponseType::MultipleCategory => write!(&mut output, "C").unwrap(),
+                MultipleResponseType::MultipleCategory => write!(&mut output, "C ").unwrap(),
             }
 
             let label = if set.mr_type.label_from_var_label() {
@@ -1210,10 +1211,16 @@ mod tests {
 
     use crate::{
         data::{ByteString, Datum},
-        dictionary::{Dictionary, MissingValueRange, VarWidth, Variable},
-        identifier::Identifier,
+        dictionary::{
+            CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary,
+            MissingValueRange, MultipleResponseType, VarWidth, Variable,
+        },
+        identifier::{ByIdentifier, Identifier},
         sys::{
-            raw::records::{RawHeader, RawVariableRecord, VariableRecord},
+            raw::{
+                records::{DocumentRecord, RawHeader, RawVariableRecord, VariableRecord},
+                Decoder,
+            },
             write::DictionaryWriter,
             ReadOptions, WriteOptions,
         },
@@ -1495,21 +1502,21 @@ mod tests {
                 VarWidth::String(8),
                 vec![(
                     Datum::String(ByteString::from("abcdefgh")),
-                    "Longer variable label",
+                    "Longer value label",
                 )],
             ),
             (
                 VarWidth::String(9),
                 vec![(
                     Datum::String(ByteString::from("abcdefghi")),
-                    "Variable label for 9-byte value",
+                    "value label for 9-byte value",
                 )],
             ),
             (
                 VarWidth::String(300),
                 vec![(
                     Datum::String(ByteString::from(vec![b'x'; 300])),
-                    "Variable label for 300-byte value",
+                    "value label for 300-byte value",
                 )],
             ),
         ];
@@ -1553,4 +1560,186 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn documents() {
+        let expected = vec![String::from("Line one"), String::from("Line two")];
+        let mut dictionary = Dictionary::new(UTF_8);
+        dictionary.documents = expected.clone();
+
+        let mut raw = Vec::new();
+        DictionaryWriter::new(
+            &WriteOptions::reproducible(None),
+            &mut Cursor::new(&mut raw),
+            &dictionary,
+        )
+        .write_documents()
+        .unwrap();
+
+        let actual = DocumentRecord::read(&mut Cursor::new(&raw[4..]), Endian::Little)
+            .unwrap()
+            .decode(&mut Decoder::new(UTF_8, |_| panic!()))
+            .lines
+            .into_iter()
+            .map(|mut s| {
+                s.truncate(s.trim_end().len());
+                s
+            })
+            .collect::<Vec<_>>();
+        assert_eq!(&actual, &expected);
+    }
+
+    #[test]
+    fn variable_sets() {
+        let mut expected = Dictionary::new(UTF_8);
+        for index in 0..10 {
+            expected
+                .add_var(Variable::new(
+                    Identifier::new(format!("var{index}")).unwrap(),
+                    VarWidth::Numeric,
+                    UTF_8,
+                ))
+                .unwrap();
+        }
+
+        for (index, variables) in [vec![0], vec![1, 2], vec![3, 4, 5], vec![6, 7, 8, 9]]
+            .into_iter()
+            .enumerate()
+        {
+            expected.add_variable_set(DictIndexVariableSet {
+                name: format!("Variable Set {index}"),
+                variables,
+            });
+        }
+
+        let raw = WriteOptions::new()
+            .write_writer(&expected, Cursor::new(Vec::new()))
+            .unwrap()
+            .finish()
+            .unwrap()
+            .unwrap()
+            .into_inner();
+        let actual = ReadOptions::new(|_| panic!())
+            .open_reader(Cursor::new(raw))
+            .unwrap()
+            .dictionary;
+
+        assert!(actual
+            .variable_sets()
+            .iter()
+            .eq(expected.variable_sets().iter()),);
+    }
+
+    /// Test writing multiple response sets.
+    ///
+    /// This is the example given in the documentation for the system file
+    /// format.
+    #[test]
+    fn mrsets() {
+        let mut dictionary = Dictionary::new(UTF_8);
+        for (variables, width) in [
+            ('a'..='g', VarWidth::Numeric),
+            ('h'..='j', VarWidth::String(3)),
+            ('k'..='m', VarWidth::Numeric),
+            ('n'..='p', VarWidth::String(6)),
+        ] {
+            for variable in variables {
+                dictionary
+                    .add_var(Variable::new(
+                        Identifier::new(variable.to_string()).unwrap(),
+                        width,
+                        UTF_8,
+                    ))
+                    .unwrap();
+            }
+        }
+        dictionary
+            .mrsets
+            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+                name: Identifier::new("$a").unwrap(),
+                label: String::from("my mcgroup"),
+                width: VarWidth::Numeric..=VarWidth::Numeric,
+                mr_type: MultipleResponseType::MultipleCategory,
+                variables: vec![0, 1, 2],
+            }));
+        dictionary
+            .mrsets
+            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+                name: Identifier::new("$b").unwrap(),
+                label: String::new(),
+                width: VarWidth::Numeric..=VarWidth::Numeric,
+                mr_type: MultipleResponseType::MultipleDichotomy {
+                    datum: Datum::Number(Some(55.0)),
+                    labels: CategoryLabels::VarLabels,
+                },
+                variables: vec![6, 4, 5, 3],
+            }));
+        dictionary
+            .mrsets
+            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+                name: Identifier::new("$c").unwrap(),
+                label: String::from("mdgroup #2"),
+                width: VarWidth::String(3)..=VarWidth::String(3),
+                mr_type: MultipleResponseType::MultipleDichotomy {
+                    datum: Datum::String("Yes".into()),
+                    labels: CategoryLabels::VarLabels,
+                },
+                variables: vec![7, 8, 9],
+            }));
+        dictionary
+            .mrsets
+            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+                name: Identifier::new("$d").unwrap(),
+                label: String::from("third mdgroup"),
+                width: VarWidth::Numeric..=VarWidth::Numeric,
+                mr_type: MultipleResponseType::MultipleDichotomy {
+                    datum: Datum::Number(Some(34.0)),
+                    labels: CategoryLabels::CountedValues {
+                        use_var_label_as_mrset_label: false,
+                    },
+                },
+                variables: vec![10, 11, 12],
+            }));
+        dictionary
+            .mrsets
+            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+                name: Identifier::new("$e").unwrap(),
+                label: String::new(),
+                width: VarWidth::String(6)..=VarWidth::String(6),
+                mr_type: MultipleResponseType::MultipleDichotomy {
+                    datum: Datum::String("choice".into()),
+                    labels: CategoryLabels::CountedValues {
+                        use_var_label_as_mrset_label: true,
+                    },
+                },
+                variables: vec![13, 14, 15],
+            }));
+
+        fn get_mrsets(dictionary: &Dictionary, pre_v14: bool) -> String {
+            let mut raw = Vec::new();
+            DictionaryWriter::new(
+                &WriteOptions::reproducible(None),
+                &mut Cursor::new(&mut raw),
+                dictionary,
+            )
+            .write_mrsets(pre_v14)
+            .unwrap();
+
+            str::from_utf8(&raw[16..]).unwrap().into()
+        }
+
+        assert_eq!(
+            &get_mrsets(&dictionary, true),
+            "$a=C 10 my mcgroup a b c
+$b=D2 55 0  g e f d
+$c=D3 Yes 10 mdgroup #2 h i j
+"
+        );
+        assert_eq!(
+            &get_mrsets(&dictionary, false),
+            "$d=E 1 2 34 13 third mdgroup k l m
+$e=E 11 6 choice 0  n o p
+"
+        );
+    }
 }