From 3642b03ce164235d6333b6ec1df86a0b8fb2b52b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 23 May 2025 08:29:39 -0700 Subject: [PATCH] work on tests --- rust/pspp/src/dictionary.rs | 28 ------ rust/pspp/src/sys/cooked.rs | 53 ++++++++++- rust/pspp/src/sys/test.rs | 184 +++++++++++++++++++++++++++++++----- 3 files changed, 211 insertions(+), 54 deletions(-) diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 9653f05778..99f91e3dbb 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -678,34 +678,6 @@ impl<'a> OutputValueLabels<'a> { } Some(pt) } - - fn get_field_value(index: usize, variable: &Variable, field: VariableField) -> Option { - match field { - VariableField::Position => Some(Value::new_integer(Some(index as f64 + 1.0))), - VariableField::Label => variable.label().map(|label| Value::new_user_text(label)), - VariableField::Measure => variable - .measure - .map(|measure| Value::new_text(measure.as_str())), - VariableField::Role => Some(Value::new_text(variable.role.as_str())), - VariableField::Width => Some(Value::new_integer(Some(variable.display_width as f64))), - VariableField::Alignment => Some(Value::new_text(variable.alignment.as_str())), - VariableField::PrintFormat => { - Some(Value::new_user_text(variable.print_format.to_string())) - } - VariableField::WriteFormat => { - Some(Value::new_user_text(variable.write_format.to_string())) - } - VariableField::MissingValues if !variable.missing_values.is_empty() => { - Some(Value::new_user_text( - variable - .missing_values - .display(variable.encoding) - .to_string(), - )) - } - VariableField::MissingValues => None, - } - } } #[derive(Copy, Clone, Debug, Enum)] diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 139a191682..17548623e6 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -3,12 +3,13 @@ use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc}; use crate::{ dictionary::{ - Dictionary, InvalidRole, MultipleResponseSet, MultipleResponseType, Datum, VarWidth, + Datum, Dictionary, InvalidRole, MultipleResponseSet, MultipleResponseType, VarWidth, Variable, VariableSet, }, endian::Endian, format::{Error as FormatError, Format, UncheckedFormat}, identifier::{ByIdentifier, Error as IdError, Identifier}, + output::pivot::{Group, Value}, sys::{ encoding::Error as EncodingError, raw::{ @@ -377,6 +378,50 @@ pub struct Metadata { } impl Metadata { + fn to_pivot_rows(&self) -> (Group, Vec) { + let mut group = Group::new("File Metadata"); + let mut values = Vec::new(); + + group.push("Created"); + // XXX use datetime format + values.push(Value::new_text( + self.creation.format("%e %b %y %H:%M:%S").to_string(), + )); + + group.push("Product"); + if let Some(product_ext) = &self.product_ext { + values.push(Value::new_text(format!( + "{} ({})", + self.product, product_ext + ))); + } else { + values.push(Value::new_text(&self.product)); + }; + + if let Some(version) = &self.version { + group.push("Product Version"); + values.push(Value::new_text(format!( + "{}.{}.{}", + version.0, version.1, version.2 + ))); + } + + group.push("Compression"); + values.push(Value::new_text(match self.compression { + Some(Compression::Simple) => "SAV", + Some(Compression::ZLib) => "ZSAV", + None => "None", + })); + + group.push("Number of Cases"); + values.push(match self.n_cases { + Some(n_cases) => Value::new_integer(Some(n_cases as f64)), + None => Value::new_text("Unknown"), + }); + + (group, values) + } + fn decode(headers: &Headers, mut warn: impl FnMut(Error)) -> Self { let header = &headers.header; let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %y") @@ -600,7 +645,11 @@ pub fn decode( for dict_index in dict_indexes { let variable = dictionary.variables.get_index_mut2(dict_index).unwrap(); - for ValueLabel { datum: value, label } in record.labels.iter().cloned() { + for ValueLabel { + datum: value, + label, + } in record.labels.iter().cloned() + { let value = value.decode(variable.width); variable.value_labels.insert(value, label); } diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index a25e2370dd..76e4f476ea 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -153,15 +153,8 @@ s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC"; assert_eq!( metadata, Metadata { - creation: NaiveDate::from_ymd_opt(2011, 1, 5) - .unwrap() - .and_time(NaiveTime::from_hms_opt(20, 53, 52).unwrap()), - endian, - compression: None, - n_cases: Some(1), product: "$(#) SPSS DATA FILE PSPP synthetic test file".into(), - product_ext: None, - version: Some((1, 2, 3)), + ..test_metadata(endian) } ); assert_eq!( @@ -203,6 +196,163 @@ s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC"; } } +fn test_metadata(endian: Endian) -> Metadata { + Metadata { + creation: NaiveDate::from_ymd_opt(2011, 1, 5) + .unwrap() + .and_time(NaiveTime::from_hms_opt(20, 53, 52).unwrap()), + endian, + compression: None, + n_cases: Some(1), + product: "$(#) SPSS DATA FILE PSPP synthetic test file".into(), + product_ext: None, + version: Some((1, 2, 3)), + } +} + +#[test] +fn unspecified_number_of_variable_positions() { + for endian in all::() { + let input = r#" +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +-1; # Nominal case size (unspecified) +0; # Not compressed +0; # Not weighted +1; # 1 case. +100.0; # Bias. +"05 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Numeric variable, variable label. +2; 0; 1; 0; 0x050800 *2; s8 "NUM2"; +26; "Numeric variable 2's label"; i8 0 *2; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# Dictionary termination record. +999; 0; + +# Data. +1.0; 2.0; +"#; + let sysfile = sack(input, None, endian).unwrap(); + let cursor = Cursor::new(sysfile); + let reader = Reader::new(cursor, |warning| println!("{warning}")).unwrap(); + let headers: Vec = reader.collect::, _>>().unwrap(); + let encoding = encoding_from_headers(&headers, &|e| eprintln!("{e}")).unwrap(); + let decoder = Decoder::new(encoding, |e| eprintln!("{e}")); + let mut decoded_records = Vec::new(); + for header in headers { + decoded_records.push(header.decode(&decoder).unwrap()); + } + + let mut errors = Vec::new(); + let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); + let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap(); + assert_eq!(errors, vec![]); + assert_eq!( + metadata, + Metadata { + version: None, + ..test_metadata(endian) + } + ); + assert_eq!( + dictionary.file_label.as_ref().map(|s| s.as_str()), + Some("PSPP synthetic test file") + ); + assert!(dictionary.output_value_labels().to_pivot_table().is_none()); + let pt = dictionary.output_variables().to_pivot_table(); + assert_rendering("value_labels_dictionary", &pt, "\ +╭──────────────────────────┬────────┬──────────────────────────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├──────────────────────────┼────────┼──────────────────────────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│Numeric variable 2's label│ 2│Numeric variable 2's label│ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰──────────────────────────┴────────┴──────────────────────────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ +"); + } +} + +#[test] +fn wrong_variable_positions_but_v13() { + for endian in all::() { + let input = r#" +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +-1; # Nominal case size (unspecified) +0; # Not compressed +0; # Not weighted +1; # 1 case. +100.0; # Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Numeric variable, variable label. +2; 0; 1; 0; 0x050800 *2; s8 "NUM2"; +26; "Numeric variable 2's label"; i8 0 *2; + +# Machine integer info record (SPSS 13). +7; 3; 4; 8; 13; 2; 3; -1; 1; 1; ENDIAN; 1252; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# Dictionary termination record. +999; 0; + +# Data. +1.0; 2.0; +"#; + let sysfile = sack(input, None, endian).unwrap(); + let cursor = Cursor::new(sysfile); + let reader = Reader::new(cursor, |warning| println!("{warning}")).unwrap(); + let headers: Vec = reader.collect::, _>>().unwrap(); + let encoding = encoding_from_headers(&headers, &|e| eprintln!("{e}")).unwrap(); + let decoder = Decoder::new(encoding, |e| eprintln!("{e}")); + let mut decoded_records = Vec::new(); + for header in headers { + decoded_records.push(header.decode(&decoder).unwrap()); + } + + let mut errors = Vec::new(); + let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); + let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap(); + assert_eq!(errors, vec![]); + assert_eq!( + metadata, + Metadata { + version: None, + ..test_metadata(endian) + } + ); + assert_eq!( + dictionary.file_label.as_ref().map(|s| s.as_str()), + Some("PSPP synthetic test file") + ); + assert!(dictionary.output_value_labels().to_pivot_table().is_none()); + let pt = dictionary.output_variables().to_pivot_table(); + assert_rendering("value_labels_dictionary", &pt, "\ +╭──────────────────────────┬────────┬──────────────────────────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├──────────────────────────┼────────┼──────────────────────────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│Numeric variable 2's label│ 2│Numeric variable 2's label│ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰──────────────────────────┴────────┴──────────────────────────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ +"); + } +} + #[test] fn value_labels() { for endian in all::() { @@ -332,28 +482,14 @@ COUNT("abcdefghijklmnopq"); COUNT("value label for `abcdefghijklmnopq'"); let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap(); assert_eq!(errors, vec![]); - assert_eq!( - metadata, - Metadata { - creation: NaiveDate::from_ymd_opt(2011, 1, 5) - .unwrap() - .and_time(NaiveTime::from_hms_opt(20, 53, 52).unwrap()), - endian, - compression: None, - n_cases: Some(1), - product: "$(#) SPSS DATA FILE PSPP synthetic test file".into(), - product_ext: None, - version: Some((1, 2, 3)), - } - ); + assert_eq!(metadata, test_metadata(endian)); assert_eq!( dictionary.file_label.as_ref().map(|s| s.as_str()), Some("PSPP synthetic test file") ); - let pt = dictionary.output_value_labels().to_pivot_table().unwrap(); assert_rendering( "value_labels_value_labels", - &pt, + &dictionary.output_value_labels().to_pivot_table().unwrap(), "\ ╭────────────────────────────────┬───────────────────────────────────────────────────────────────╮ │Variable Value │ │ -- 2.30.2