From 7c63f6e84d9ae8a6ef053c69c0fa0ce1125c9cb2 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 8 Jun 2025 12:36:02 -0700 Subject: [PATCH] mrsets work --- rust/doc/src/system-file.md | 99 +++++++++---------- rust/pspp/src/dictionary.rs | 70 +++++++++++-- rust/pspp/src/output/pivot/mod.rs | 13 ++- rust/pspp/src/sys/raw.rs | 9 +- rust/pspp/src/sys/test.rs | 3 + .../testdata/multiple_response_sets.expected | 56 +++++++++++ 6 files changed, 190 insertions(+), 60 deletions(-) diff --git a/rust/doc/src/system-file.md b/rust/doc/src/system-file.md index d04d165f7d..91b6f33e88 100644 --- a/rust/doc/src/system-file.md +++ b/rust/doc/src/system-file.md @@ -930,35 +930,35 @@ is used for multiple dichotomy sets that use the - One line feed (byte 0x0a). Sometimes multiple, even hundreds, of line feeds are present. -Example: Given appropriate variable definitions, consider the -following MRSETS command: - -``` -MRSETS /MCGROUP NAME=$a LABEL='my mcgroup' VARIABLES=a b c - /MDGROUP NAME=$b VARIABLES=g e f d VALUE=55 - /MDGROUP NAME=$c LABEL='mdgroup #2' VARIABLES=h i j VALUE='Yes' - /MDGROUP NAME=$d LABEL='third mdgroup' CATEGORYLABELS=COUNTEDVALUES - VARIABLES=k l m VALUE=34 - /MDGROUP NAME=$e CATEGORYLABELS=COUNTEDVALUES LABELSOURCE=VARLABEL - VARIABLES=n o p VALUE='choice'. -``` - -The above would generate the following multiple response set record -of subtype 7: - -``` -$a=C 10 my mcgroup a b c -$b=D2 55 0 g e f d -$c=D3 Yes 10 mdgroup #2 h i j -``` - -It would also generate the following multiple response set record -with subtype 19: - -``` -$d=E 1 2 34 13 third mdgroup k l m -$e=E 11 6 choice 0 n o p -``` +> Example: Given appropriate variable definitions, consider the +> following MRSETS command: +> +> ``` +> MRSETS /MCGROUP NAME=$a LABEL='my mcgroup' VARIABLES=a b c +> /MDGROUP NAME=$b VARIABLES=g e f d VALUE=55 +> /MDGROUP NAME=$c LABEL='mdgroup #2' VARIABLES=h i j VALUE='Yes' +> /MDGROUP NAME=$d LABEL='third mdgroup' CATEGORYLABELS=COUNTEDVALUES +> VARIABLES=k l m VALUE=34 +> /MDGROUP NAME=$e CATEGORYLABELS=COUNTEDVALUES LABELSOURCE=VARLABEL +> VARIABLES=n o p VALUE='choice'. +> ``` +> +> The above would generate the following multiple response set record +> of subtype 7: +> +> ``` +> $a=C 10 my mcgroup a b c +> $b=D2 55 0 g e f d +> $c=D3 Yes 10 mdgroup #2 h i j +> ``` +> +> It would also generate the following multiple response set record +> with subtype 19: +> +> ``` +> $d=E 1 2 34 13 third mdgroup k l m +> $e=E 11 6 choice 0 n o p +> ``` [^note]: This part of the format may not be fully understood, because only a single example of each possibility has been examined. @@ -1626,24 +1626,22 @@ ATTRIBUTE` commands, respectively. The total length is `count` bytes. -### Example - -A system file produced with the following VARIABLE ATTRIBUTE commands in -effect: - -``` -VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=fred[1]('23') fred[2]('34'). -VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=bert('123'). -``` - -will contain a variable attribute record with the following contents: - -``` -0000 07 00 00 00 12 00 00 00 01 00 00 00 22 00 00 00 |............"...| -0010 64 75 6d 6d 79 3a 66 72 65 64 28 27 32 33 27 0a |dummy:fred('23'.| -0020 27 33 34 27 0a 29 62 65 72 74 28 27 31 32 33 27 |'34'.)bert('123'| -0030 0a 29 |.) | -``` +> Example: A system file produced with the following `VARIABLE +> ATTRIBUTE` commands in effect: +> +> ``` +> VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=fred[1]('23') fred[2]('34'). +> VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=bert('123'). +> ``` +> +> will contain a variable attribute record with the following contents: +> +> ``` +> 0000 07 00 00 00 12 00 00 00 01 00 00 00 22 00 00 00 |............"...| +> 0010 64 75 6d 6d 79 3a 66 72 65 64 28 27 32 33 27 0a |dummy:fred('23'.| +> 0020 27 33 34 27 0a 29 62 65 72 74 28 27 31 32 33 27 |'34'.)bert('123'| +> 0030 0a 29 |.) | +> ``` ### Variable Roles @@ -1777,9 +1775,10 @@ case. The format of the data record varies depending on the value of - 1 through 251 A number with value `code - bias`, where `code` is the value of - the compression code and `bias` comes from the file header. For - example, code 105 with bias 100.0 (the normal value) indicates a - numeric variable of value 5. + the compression code and `bias` comes from the file header. + + > Example: Code 105 with bias 100.0 (the normal value) indicates a + > numeric variable of value 5. A code of 0 (after subtracting the bias) in a string field encodes null bytes. This is unusual, since a string field normally diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 116491b740..fd27bb9723 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -4,7 +4,7 @@ use core::str; use std::{ borrow::Cow, cmp::Ordering, - collections::{HashMap, HashSet}, + collections::{BTreeSet, HashMap, HashSet}, fmt::{Debug, Formatter, Result as FmtResult}, hash::Hash, ops::{Bound, RangeBounds, RangeInclusive}, @@ -314,7 +314,7 @@ pub struct Dictionary { pub attributes: Attributes, /// Multiple response sets. - pub mrsets: HashSet>, + pub mrsets: BTreeSet>, /// Variable sets. /// @@ -350,7 +350,7 @@ impl Dictionary { documents: Vec::new(), vectors: HashSet::new(), attributes: Attributes::new(), - mrsets: HashSet::new(), + mrsets: BTreeSet::new(), variable_sets: Vec::new(), encoding, } @@ -495,9 +495,8 @@ impl Dictionary { .map(ByIdentifier::new) }) .collect(); - self.mrsets = self - .mrsets - .drain() + self.mrsets = std::mem::take(&mut self.mrsets) + .into_iter() .filter_map(|mrset_by_id| { mrset_by_id .0 @@ -548,6 +547,10 @@ impl Dictionary { OutputVariableSets::new(self) } + pub fn output_mrsets(&self) -> OutputMrsets { + OutputMrsets::new(self) + } + pub fn to_pivot_rows(&self) -> (Group, Vec) { let mut group = Group::new("Dictionary Information"); let mut values = Vec::new(); @@ -784,6 +787,61 @@ impl<'a> OutputVariableSets<'a> { } } +pub struct OutputMrsets<'a> { + dictionary: &'a Dictionary, +} + +impl<'a> OutputMrsets<'a> { + fn new(dictionary: &'a Dictionary) -> Self { + Self { dictionary } + } + pub fn any_mrsets(&self) -> bool { + !self.dictionary.mrsets.is_empty() + } + pub fn to_pivot_table(&self) -> Option { + if !self.any_mrsets() { + return None; + } + + let attributes = Group::new("Attributes") + .with("Label") + .with("Encoding") + .with("Counted Value") + .with("Member Variables"); + + let mut mrsets = Group::new("Name").with_label_shown(); + for mrset in &self.dictionary.mrsets { + mrsets.push(mrset.name.as_str()); + } + let mut pt = PivotTable::new([ + (Axis3::Y, Dimension::new(mrsets)), + (Axis3::X, Dimension::new(attributes)), + ]); + for (row, mrset) in self.dictionary.mrsets.iter().enumerate() { + pt.insert(&[row, 0], mrset.label.as_str()); + + let mr_type_name = match &mrset.mr_type { + MultipleResponseType::MultipleDichotomy { datum, .. } => { + pt.insert(&[row, 2], Value::new_datum(datum, self.dictionary.encoding)); + "Dichotomies" + } + MultipleResponseType::MultipleCategory => "Categories", + }; + + pt.insert(&[row, 1], Value::new_text(mr_type_name)); + pt.insert( + &[row, 3], + mrset + .variables + .iter() + .flat_map(|index| [self.dictionary.variables[*index].name.as_str(), "\n"]) + .collect::(), + ); + } + Some(pt) + } +} + fn update_dict_index_vec(dict_indexes: &mut Vec, f: F) where F: Fn(DictIndex) -> Option, diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index ea97ad5add..35c38244db 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -1477,8 +1477,9 @@ impl PivotTable { cell_index(data_indexes, self.dimensions.iter().map(|d| d.len())) } - pub fn insert(&mut self, data_indexes: &[usize], value: Value) { - self.cells.insert(self.cell_index(data_indexes), value); + pub fn insert(&mut self, data_indexes: &[usize], value: impl Into) { + self.cells + .insert(self.cell_index(data_indexes), value.into()); } pub fn get(&self, data_indexes: &[usize]) -> Option<&Value> { @@ -1760,7 +1761,7 @@ impl Value { variable_label: variable.label.clone(), })) } - pub fn new_value(value: &Datum, encoding: &'static Encoding) -> Self { + pub fn new_datum(value: &Datum, encoding: &'static Encoding) -> Self { match value { Datum::Number(number) => Self::new_number(*number), Datum::String(string) => Self::new_user_text(string.decode(encoding).into_owned()), @@ -1869,6 +1870,12 @@ impl From<&str> for Value { } } +impl From for Value { + fn from(value: String) -> Self { + Self::new_text(value) + } +} + impl From<&Variable> for Value { fn from(variable: &Variable) -> Self { Self::new_variable(variable) diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 53f80a1b90..eb3e0f4a0d 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -2109,6 +2109,7 @@ impl MultipleResponseSet { return Err(Warning::MultipleResponseSyntaxError); }; let (name, input) = input.split_at(equals); + let input = input.strip_prefix(b"=").unwrap(); let (mr_type, input) = MultipleResponseType::parse(input)?; let Some(input) = input.strip_prefix(b" ") else { return Err(Warning::MultipleResponseSyntaxError); @@ -2186,7 +2187,13 @@ impl ExtensionRecord for MultipleResponseRecord { let mut input = &ext.data[..]; let mut sets = Vec::new(); - while !input.is_empty() { + loop { + while let Some(suffix) = input.strip_prefix(b"\n") { + input = suffix; + } + if input.is_empty() { + break; + } let (set, rest) = MultipleResponseSet::parse(input)?; sets.push(set); input = rest; diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index a489ddf514..7a7801e27b 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -106,6 +106,9 @@ fn test_sysfile(name: &str) { if let Some(pt) = dictionary.output_value_labels().to_pivot_table() { output.push(Arc::new(pt.into())); } + if let Some(pt) = dictionary.output_mrsets().to_pivot_table() { + output.push(Arc::new(pt.into())); + } if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() { output.push(Arc::new(pt.into())); } diff --git a/rust/pspp/src/sys/testdata/multiple_response_sets.expected b/rust/pspp/src/sys/testdata/multiple_response_sets.expected index e69de29bb2..47c8a9b46c 100644 --- a/rust/pspp/src/sys/testdata/multiple_response_sets.expected +++ b/rust/pspp/src/sys/testdata/multiple_response_sets.expected @@ -0,0 +1,56 @@ +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 0│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬────────────────────────╮ +│Label │PSPP synthetic test file│ +│Variables│ 16│ +╰─────────┴────────────────────────╯ + +╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│あ│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│b │ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│c │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│d │ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│e │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│f │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│g │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│h │ 8│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│i │ 9│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│j │ 10│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│k │ 11│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│l │ 12│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│m │ 13│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│n │ 14│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +│o │ 15│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +│p │ 16│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬─────────────┬───────────┬─────────────┬────────────────╮ +│Name│ Label │ Encoding │Counted Value│Member Variables│ +├────┼─────────────┼───────────┼─────────────┼────────────────┤ +│$a │my mcgroup │Categories │ │あ │ +│ │ │ │ │b │ +│ │ │ │ │c │ +│$b │ │Dichotomies│ 55.00│g │ +│ │ │ │ │e │ +│ │ │ │ │f │ +│ │ │ │ │d │ +│$c │mdgroup #2 │Dichotomies│はい │h │ +│ │ │ │ │i │ +│ │ │ │ │j │ +│$d │third mdgroup│Dichotomies│ 34.00│k │ +│ │ │ │ │l │ +│ │ │ │ │m │ +│$e │ │Dichotomies│choice │n │ +│ │ │ │ │o │ +│ │ │ │ │p │ +╰────┴─────────────┴───────────┴─────────────┴────────────────╯ -- 2.30.2