From 3088c14720ef0d5ed44f60df4afce020b027f7ec Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 9 Jun 2025 08:13:51 -0700 Subject: [PATCH] attributes in system files --- rust/pspp/src/dictionary.rs | 98 ++++++++++++++++++- rust/pspp/src/identifier.rs | 2 +- rust/pspp/src/sys/raw.rs | 4 +- rust/pspp/src/sys/test.rs | 8 ++ .../pspp/src/sys/testdata/attributes.expected | 40 ++++++++ rust/pspp/src/sys/testdata/attributes.sack | 52 ++++++++++ 6 files changed, 197 insertions(+), 7 deletions(-) create mode 100644 rust/pspp/src/sys/testdata/attributes.expected create mode 100644 rust/pspp/src/sys/testdata/attributes.sack diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index caaba5402e..7b33658ad4 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -4,7 +4,7 @@ use core::str; use std::{ borrow::Cow, cmp::Ordering, - collections::{BTreeSet, HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, fmt::{Debug, Formatter, Result as FmtResult}, hash::Hash, ops::{Bound, RangeBounds, RangeInclusive}, @@ -577,6 +577,10 @@ impl Dictionary { OutputMrsets::new(self) } + pub fn output_attributes(&self) -> OutputAttributes { + OutputAttributes::new(self) + } + pub fn to_pivot_rows(&self) -> (Group, Vec) { let mut group = Group::new("Dictionary Information"); let mut values = Vec::new(); @@ -868,6 +872,78 @@ impl<'a> OutputMrsets<'a> { } } +pub struct OutputAttributes<'a> { + dictionary: &'a Dictionary, + + /// Include attributes whose names begin with `@`? + include_at: bool, +} + +impl<'a> OutputAttributes<'a> { + fn new(dictionary: &'a Dictionary) -> Self { + Self { + dictionary, + include_at: true, + } + } + pub fn without_at(self) -> Self { + Self { + include_at: false, + ..self + } + } + pub fn any_attributes(&self) -> bool { + self.attribute_sets().next().is_some() + } + fn attribute_sets(&self) -> impl Iterator, &Attributes)> { + std::iter::once((None, &self.dictionary.attributes)) + .chain( + self.dictionary + .variables + .iter() + .map(|var| (Some(&**var), &var.attributes)), + ) + .filter(|(_name, attributes)| attributes.has_any(self.include_at)) + } + pub fn to_pivot_table(&self) -> Option { + if !self.any_attributes() { + return None; + } + + let mut variables = Group::new("Variable and Name").with_label_shown(); + let mut data = Vec::new(); + for (variable, attributes) in self.attribute_sets() { + let group_name = match variable { + Some(variable) => Value::new_variable(variable), + None => Value::new_text("(dataset)"), + }; + let mut group = Group::new(group_name); + for (name, values) in &attributes.0 { + if values.len() == 1 { + group.push(Value::new_user_text(name.as_str())); + } else { + for index in 1..=values.len() { + group.push(Value::new_user_text(format!("{name}[{index}]"))); + } + } + for value in values { + data.push(Value::new_user_text(value)); + } + } + variables.push(group); + } + let values = Group::new("Value").with("Value"); + let mut pt = PivotTable::new([ + (Axis3::X, Dimension::new(values)), + (Axis3::Y, Dimension::new(variables)), + ]); + for (row, datum) in data.into_iter().enumerate() { + pt.insert(&[0, row], datum); + } + Some(pt) + } +} + fn update_dict_index_vec(dict_indexes: &mut Vec, f: F) where F: Fn(DictIndex) -> Option, @@ -943,20 +1019,34 @@ impl TryFrom for Role { } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub struct Attributes(pub HashMap>); +pub struct Attributes(pub BTreeMap>); impl Attributes { pub fn new() -> Self { - Self(HashMap::new()) + Self(BTreeMap::new()) } pub fn append(&mut self, other: &mut Self) { - self.0.extend(other.0.drain()) + self.0.append(&mut other.0) } pub fn role(&self) -> Result, InvalidRole> { self.try_into() } + + pub fn iter(&self, include_at: bool) -> impl Iterator { + self.0.iter().filter_map(move |(name, values)| { + if include_at || !name.0.starts_with('@') { + Some((name, values.as_slice())) + } else { + None + } + }) + } + + pub fn has_any(&self, include_at: bool) -> bool { + self.iter(include_at).next().is_some() + } } #[derive(Clone, Debug, ThisError, PartialEq, Eq)] diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index 50b4892f3c..c6b1fc54c0 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -345,7 +345,7 @@ pub fn id_match_n_nonstatic(keyword: &str, token: &str, n: usize) -> bool { impl Display for Identifier { fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "{:?}", self.0) + write!(f, "{}", self.0) } } diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 09b19cd7a7..014134c305 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -12,7 +12,7 @@ use num::Integer; use std::{ borrow::{Borrow, Cow}, cell::RefCell, - collections::{HashMap, VecDeque}, + collections::{BTreeMap, VecDeque}, fmt::{Debug, Display, Formatter, Result as FmtResult}, io::{Error as IoError, Read, Seek, SeekFrom}, mem::take, @@ -2664,7 +2664,7 @@ impl Attributes { mut input: &'a str, sentinel: Option, ) -> Result<(Attributes, &'a str), Warning> { - let mut attributes = HashMap::new(); + let mut attributes = BTreeMap::new(); let rest = loop { match input.chars().next() { None => break input, diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index e2ab17a50c..69e1af3e1c 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -82,6 +82,11 @@ fn very_long_strings() { test_sysfile("very_long_strings"); } +#[test] +fn attributes() { + test_sysfile("attributes"); +} + fn test_sysfile(name: &str) { let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) .join("src/sys/testdata") @@ -136,6 +141,9 @@ fn test_sysfile(name: &str) { if let Some(pt) = dictionary.output_mrsets().to_pivot_table() { output.push(Arc::new(pt.into())); } + if let Some(pt) = dictionary.output_attributes().to_pivot_table() { + output.push(Arc::new(pt.into())); + } if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() { output.push(Arc::new(pt.into())); } diff --git a/rust/pspp/src/sys/testdata/attributes.expected b/rust/pspp/src/sys/testdata/attributes.expected new file mode 100644 index 0000000000..f0daafed95 --- /dev/null +++ b/rust/pspp/src/sys/testdata/attributes.expected @@ -0,0 +1,40 @@ +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 0│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬────────────────────────╮ +│Label │PSPP synthetic test file│ +│Variables│ 3│ +╰─────────┴────────────────────────╯ + +╭──────────────┬────────┬─────┬─────────────────┬──────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role │Width│Alignment│Print Format│Write Format│Missing Values│ +├──────────────┼────────┼─────┼─────────────────┼──────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│FirstVariable │ 1│ │ │Target│ 8│Right │F8.0 │F8.0 │ │ +│SécondVariable│ 2│ │ │Input │ 8│Right │F8.0 │F8.0 │ │ +│ThirdVariable │ 3│ │ │Input │ 8│Right │F8.0 │F8.0 │ │ +╰──────────────┴────────┴─────┴─────────────────┴──────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭───────────────────────────────┬─────────────╮ +│Variable and Name │ Value │ +├───────────────────────────────┼─────────────┤ +│(dataset) Attr1[1] │Value1 │ +│ Attr1[2] │'déclaration'│ +│ SécondAttr[1]│123 │ +│ SécondAttr[2]│456 │ +├───────────────────────────────┼─────────────┤ +│FirstVariable $@Role │1 │ +│ adèle[1] │23 │ +│ adèle[2] │34 │ +│ bert │123 │ +├───────────────────────────────┼─────────────┤ +│SécondVariable xyzzy │quux │ +├───────────────────────────────┼─────────────┤ +│ThirdVariable fizz │buzz │ +╰───────────────────────────────┴─────────────╯ diff --git a/rust/pspp/src/sys/testdata/attributes.sack b/rust/pspp/src/sys/testdata/attributes.sack new file mode 100644 index 0000000000..b11d9cc7e3 --- /dev/null +++ b/rust/pspp/src/sys/testdata/attributes.sack @@ -0,0 +1,52 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +3; # Nominal case size +0; # Not compressed +0; # Not weighted +0; # 1 case. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Variables. +2; 0; 0; 0; 0x050800 *2; s8 "FIRSTVAR"; +2; 0; 0; 0; 0x050800 *2; s8 "SECONDVA"; +2; 0; 0; 0; 0x050800 *2; s8 "THIRDVAR"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + +# Long variable names. +7; 13; 1; COUNT ( +"FIRSTVAR=FirstVariable"; i8 9; +"SECONDVA=S"; i8 233; "condVariable"; i8 9; +"THIRDVAR=ThirdVariable"; i8 9 +); + +# Data file attributes record. +7; 17; 1; COUNT ( +"Attr1('Value1'"; i8 10; "''d"; i8 233; "claration''"; i8 10; ")"; +"S"; i8 233; "condAttr('123'"; i8 10; "'456'"; i8 10; ")"; +); + +# Variable attributes record. +7; 18; 1; COUNT ( +"FirstVariable:"; + "ad"; i8 232; "le('23'"; i8 10; "'34'"; i8 10; ")"; + "bert('123'"; i8 10; ")"; + "$@Role('1'"; i8 10; ")"; +"/S"; i8 233; "condVariable:"; + "xyzzy('quux'"; i8 10; ")"; +); + +# Another variable attributes record. +# Only system files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin" +# include multiple variable attributes records. +7; 18; 1; COUNT ("ThirdVariable:fizz('buzz'"; i8 10; ")";); + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# Dictionary termination record. +999; 0; -- 2.30.2