From 958701877583e12b911cce21d8eaf26101aba14d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 3 Aug 2025 18:39:02 -0700 Subject: [PATCH] test writing multiple response sets --- rust/pspp/src/dictionary.rs | 9 +- rust/pspp/src/sys/write.rs | 211 ++++++++++++++++++++++++++++++++++-- 2 files changed, 208 insertions(+), 12 deletions(-) diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index a07db8dd39..3ad1bf5033 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -1668,11 +1668,18 @@ impl<'a> Serialize for Vectors<'a> { } } +#[derive(Copy, Clone, Debug)] pub struct VariableSet<'a> { dictionary: &'a Dictionary, variable_set: &'a DictIndexVariableSet, } +impl<'a> PartialEq for VariableSet<'a> { + fn eq(&self, other: &Self) -> bool { + self.variable_set == other.variable_set + } +} + impl<'a> VariableSet<'a> { pub fn name(&self) -> &'a String { &self.variable_set.name @@ -1957,7 +1964,7 @@ pub enum CategoryLabels { CountedValues { use_var_label_as_mrset_label: bool }, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct DictIndexVariableSet { pub name: String, pub variables: Vec, diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index cb05364e3c..e61456d9e6 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -221,7 +221,7 @@ where self.write_documents()?; self.write_integer_record()?; self.write_float_record()?; - self.write_var_sets()?; + self.write_variable_sets()?; self.write_mrsets(true)?; self.write_variable_display_parameters()?; self.write_long_variable_names()?; @@ -481,7 +481,7 @@ where .write_le(self.writer) } - fn write_var_sets(&mut self) -> Result<(), BinError> { + fn write_variable_sets(&mut self) -> Result<(), BinError> { let mut s = String::new(); for set in &self.dictionary.variable_sets() { write!(&mut s, "{}= ", set.name()).unwrap(); @@ -512,10 +512,10 @@ where CategoryLabels::VarLabels => b"D".as_slice(), CategoryLabels::CountedValues { use_var_label_as_mrset_label: true, - } => b"E 11".as_slice(), + } => b"E 11 ".as_slice(), CategoryLabels::CountedValues { use_var_label_as_mrset_label: false, - } => b"E 1".as_slice(), + } => b"E 1 ".as_slice(), }; output.extend_from_slice(leader); @@ -528,8 +528,9 @@ where }; write!(&mut output, "{} ", value.len()).unwrap(); output.append(&mut value); + output.push(b' '); } - MultipleResponseType::MultipleCategory => write!(&mut output, "C").unwrap(), + MultipleResponseType::MultipleCategory => write!(&mut output, "C ").unwrap(), } let label = if set.mr_type.label_from_var_label() { @@ -1210,10 +1211,16 @@ mod tests { use crate::{ data::{ByteString, Datum}, - dictionary::{Dictionary, MissingValueRange, VarWidth, Variable}, - identifier::Identifier, + dictionary::{ + CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, + MissingValueRange, MultipleResponseType, VarWidth, Variable, + }, + identifier::{ByIdentifier, Identifier}, sys::{ - raw::records::{RawHeader, RawVariableRecord, VariableRecord}, + raw::{ + records::{DocumentRecord, RawHeader, RawVariableRecord, VariableRecord}, + Decoder, + }, write::DictionaryWriter, ReadOptions, WriteOptions, }, @@ -1495,21 +1502,21 @@ mod tests { VarWidth::String(8), vec![( Datum::String(ByteString::from("abcdefgh")), - "Longer variable label", + "Longer value label", )], ), ( VarWidth::String(9), vec![( Datum::String(ByteString::from("abcdefghi")), - "Variable label for 9-byte value", + "value label for 9-byte value", )], ), ( VarWidth::String(300), vec![( Datum::String(ByteString::from(vec![b'x'; 300])), - "Variable label for 300-byte value", + "value label for 300-byte value", )], ), ]; @@ -1553,4 +1560,186 @@ mod tests { } } } + + #[test] + fn documents() { + let expected = vec![String::from("Line one"), String::from("Line two")]; + let mut dictionary = Dictionary::new(UTF_8); + dictionary.documents = expected.clone(); + + let mut raw = Vec::new(); + DictionaryWriter::new( + &WriteOptions::reproducible(None), + &mut Cursor::new(&mut raw), + &dictionary, + ) + .write_documents() + .unwrap(); + + let actual = DocumentRecord::read(&mut Cursor::new(&raw[4..]), Endian::Little) + .unwrap() + .decode(&mut Decoder::new(UTF_8, |_| panic!())) + .lines + .into_iter() + .map(|mut s| { + s.truncate(s.trim_end().len()); + s + }) + .collect::>(); + assert_eq!(&actual, &expected); + } + + #[test] + fn variable_sets() { + let mut expected = Dictionary::new(UTF_8); + for index in 0..10 { + expected + .add_var(Variable::new( + Identifier::new(format!("var{index}")).unwrap(), + VarWidth::Numeric, + UTF_8, + )) + .unwrap(); + } + + for (index, variables) in [vec![0], vec![1, 2], vec![3, 4, 5], vec![6, 7, 8, 9]] + .into_iter() + .enumerate() + { + expected.add_variable_set(DictIndexVariableSet { + name: format!("Variable Set {index}"), + variables, + }); + } + + let raw = WriteOptions::new() + .write_writer(&expected, Cursor::new(Vec::new())) + .unwrap() + .finish() + .unwrap() + .unwrap() + .into_inner(); + let actual = ReadOptions::new(|_| panic!()) + .open_reader(Cursor::new(raw)) + .unwrap() + .dictionary; + + assert!(actual + .variable_sets() + .iter() + .eq(expected.variable_sets().iter()),); + } + + /// Test writing multiple response sets. + /// + /// This is the example given in the documentation for the system file + /// format. + #[test] + fn mrsets() { + let mut dictionary = Dictionary::new(UTF_8); + for (variables, width) in [ + ('a'..='g', VarWidth::Numeric), + ('h'..='j', VarWidth::String(3)), + ('k'..='m', VarWidth::Numeric), + ('n'..='p', VarWidth::String(6)), + ] { + for variable in variables { + dictionary + .add_var(Variable::new( + Identifier::new(variable.to_string()).unwrap(), + width, + UTF_8, + )) + .unwrap(); + } + } + dictionary + .mrsets + .insert(ByIdentifier::new(DictIndexMultipleResponseSet { + name: Identifier::new("$a").unwrap(), + label: String::from("my mcgroup"), + width: VarWidth::Numeric..=VarWidth::Numeric, + mr_type: MultipleResponseType::MultipleCategory, + variables: vec![0, 1, 2], + })); + dictionary + .mrsets + .insert(ByIdentifier::new(DictIndexMultipleResponseSet { + name: Identifier::new("$b").unwrap(), + label: String::new(), + width: VarWidth::Numeric..=VarWidth::Numeric, + mr_type: MultipleResponseType::MultipleDichotomy { + datum: Datum::Number(Some(55.0)), + labels: CategoryLabels::VarLabels, + }, + variables: vec![6, 4, 5, 3], + })); + dictionary + .mrsets + .insert(ByIdentifier::new(DictIndexMultipleResponseSet { + name: Identifier::new("$c").unwrap(), + label: String::from("mdgroup #2"), + width: VarWidth::String(3)..=VarWidth::String(3), + mr_type: MultipleResponseType::MultipleDichotomy { + datum: Datum::String("Yes".into()), + labels: CategoryLabels::VarLabels, + }, + variables: vec![7, 8, 9], + })); + dictionary + .mrsets + .insert(ByIdentifier::new(DictIndexMultipleResponseSet { + name: Identifier::new("$d").unwrap(), + label: String::from("third mdgroup"), + width: VarWidth::Numeric..=VarWidth::Numeric, + mr_type: MultipleResponseType::MultipleDichotomy { + datum: Datum::Number(Some(34.0)), + labels: CategoryLabels::CountedValues { + use_var_label_as_mrset_label: false, + }, + }, + variables: vec![10, 11, 12], + })); + dictionary + .mrsets + .insert(ByIdentifier::new(DictIndexMultipleResponseSet { + name: Identifier::new("$e").unwrap(), + label: String::new(), + width: VarWidth::String(6)..=VarWidth::String(6), + mr_type: MultipleResponseType::MultipleDichotomy { + datum: Datum::String("choice".into()), + labels: CategoryLabels::CountedValues { + use_var_label_as_mrset_label: true, + }, + }, + variables: vec![13, 14, 15], + })); + + fn get_mrsets(dictionary: &Dictionary, pre_v14: bool) -> String { + let mut raw = Vec::new(); + DictionaryWriter::new( + &WriteOptions::reproducible(None), + &mut Cursor::new(&mut raw), + dictionary, + ) + .write_mrsets(pre_v14) + .unwrap(); + + str::from_utf8(&raw[16..]).unwrap().into() + } + + assert_eq!( + &get_mrsets(&dictionary, true), + "$a=C 10 my mcgroup a b c +$b=D2 55 0 g e f d +$c=D3 Yes 10 mdgroup #2 h i j +" + ); + assert_eq!( + &get_mrsets(&dictionary, false), + "$d=E 1 2 34 13 third mdgroup k l m +$e=E 11 6 choice 0 n o p +" + ); + } } -- 2.30.2