From: Ben Pfaff Date: Wed, 21 May 2025 16:19:54 +0000 (-0700) Subject: value labels test X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5078360b9a2823308fe9253a16e97befd1b00c22;p=pspp value labels test --- diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 12af9e6549..9653f05778 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -2,6 +2,7 @@ use core::str; use std::{ + borrow::Cow, cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Debug, Formatter, Result as FmtResult}, @@ -645,19 +646,37 @@ impl<'a> OutputValueLabels<'a> { let mut values = Group::new("Variable Value").with_label_shown(); let mut footnotes = Footnotes::new(); let missing_footnote = footnotes.push(Footnote::new("User-missing value")); - for variable in &self.dictionary.variables { + let mut data = Vec::new(); + for variable in self + .dictionary + .variables + .iter() + .filter(|var| !var.value_labels.is_empty()) + { let mut group = Group::new(&**variable); - let mut values = variable.value_labels.iter().collect::>(); - values.sort(); - for (value, label) in values { - let value = Value::new_variable_value(variable, value) + let mut sorted_value_labels = variable.value_labels.iter().collect::>(); + sorted_value_labels.sort(); + for (datum, label) in sorted_value_labels { + let mut value = Value::new_variable_value(variable, datum) .with_show_value_label(Some(Show::Value)); - //group.push(); - todo!() + if variable.missing_values.contains(datum) { + value.add_footnote(&missing_footnote); + } + group.push(value); + + data.push( + Value::new_variable_value(variable, datum) + .with_show_value_label(Some(Show::Label)) + .with_value_label(Some(escape_value_label(label.as_str()).into())), + ); } + values.push(group); } - - todo!() + let mut pt = PivotTable::new(vec![(Axis3::Y, Dimension::new(values))]); + for (row, datum) in data.into_iter().enumerate() { + pt.insert(&[row], datum); + } + Some(pt) } fn get_field_value(index: usize, variable: &Variable, field: VariableField) -> Option { @@ -898,6 +917,22 @@ pub struct Variable { pub encoding: &'static Encoding, } +pub fn escape_value_label(unescaped: &str) -> Cow<'_, str> { + if unescaped.contains("\n") { + unescaped.replace("\n", "\\n").into() + } else { + unescaped.into() + } +} + +pub fn unescape_value_label(escaped: &str) -> Cow<'_, str> { + if escaped.contains("\\n") { + escaped.replace("\\n", "\n").into() + } else { + escaped.into() + } +} + impl Variable { pub fn new(name: Identifier, width: VarWidth, encoding: &'static Encoding) -> Self { let var_type = VarType::from(width); diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 9973422221..cd9f0b12f2 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -1798,12 +1798,11 @@ impl Value { footnotes.sort_by_key(|f| f.index); } pub fn with_show_value_label(mut self, show: Option) -> Self { + let new_show = show; match &mut self.inner { - ValueInner::Number(number_value) => { - number_value.show = show; - } - ValueInner::String(string_value) => { - string_value.show = show; + ValueInner::Number(NumberValue { show, .. }) + | ValueInner::String(StringValue { show, .. }) => { + *show = new_show; } _ => (), } @@ -1815,6 +1814,14 @@ impl Value { } self } + pub fn with_value_label(mut self, label: Option) -> Self { + match &mut self.inner { + ValueInner::Number(NumberValue { value_label, .. }) + | ValueInner::String(StringValue { value_label, .. }) => *value_label = label.clone(), + _ => (), + } + self + } pub const fn empty() -> Self { Value { inner: ValueInner::Empty, diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index 800ba97b44..a25e2370dd 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -168,6 +168,7 @@ s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC"; dictionary.file_label.as_ref().map(|s| s.as_str()), Some("PSPP synthetic test file: ôõöø") ); + assert!(dictionary.output_value_labels().to_pivot_table().is_none()); let pt = dictionary.output_variables().to_pivot_table(); assert_rendering( "variable_labels_and_missing_values", @@ -201,3 +202,240 @@ s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC"; ); } } + +#[test] +fn value_labels() { + for endian in all::() { + let input = r#" +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; # Layout code +22; # Nominal case size +0; # Not compressed +0; # Not weighted +1; # 1 case. +100.0; # Bias. +"05 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +# Numeric variables. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; +2; 0; 0; 0; 0x050800 *2; s8 "NUM2"; +2; 0; 0; 0; 0x050800 *2; s8 "NUM3"; +2; 0; 0; 0; 0x050800 *2; s8 "NUM4"; +2; 0; 0; 0; 0x050800 *2; s8 "NUM5"; + +# String variables. +2; 1; 0; 0; 0x010100 *2; s8 "STR1"; # index 6 +2; 2; 0; 0; 0x010200 *2; s8 "STR2"; # index 7 +2; 3; 0; 0; 0x010300 *2; s8 "STR3"; # index 8 +2; 4; 0; 0; 0x010400 *2; s8 "STR4"; # index 9 +2; 4; 0; 0; 0x010400 *2; s8 "STR5"; # index 10 +2; 6; 0; 0; 0x010600 *2; s8 "STR6"; # index 11 +2; 7; 0; 0; 0x010700 *2; s8 "STR7"; # index 12 +2; 8; 0; 0; 0x010800 *2; s8 "STR8"; # index 13 +2; 9; 0; 0; 0x010900 *2; "STR9"; i8 230; s3 ""; # index 14 +2; -1; 0; 0; 0; 0; s8 ""; +2; 12; 0; 0; 0x010c00 *2; s8 "STR12"; # index 16 +2; -1; 0; 0; 0; 0; s8 ""; +2; 16; 0; 0; 0x011000 *2; s8 "STR16"; # index 18 +2; -1; 0; 0; 0; 0; s8 ""; +2; 17; 0; 0; 0x011100 *2; s8 "STR17"; # index 20 +( 2; -1; 0; 0; 0; 0; s8 ""; ) * 2; + +# One value label for NUM1. +3; 1; 1.0; i8 17; i8 238; i8 228; i8 232; i8 237; s19 " (in Russian)"; 4; 1; 1; + +# Two value labels for NUM2, as a single pair of type 3 and type 4 records. +3; 2; 1.0; i8 3; s7 "one"; 2.0; i8 3; s7 "two"; 4; 1; 2; + +# Two value labels for NUM3, as two pairs of type 3 and type 4 records. +3; 1; 3.0; i8 5; s7 "three"; 4; 1; 3; +3; 1; 4.0; i8 4; s7 "four"; 4; 1; 3; + +# Two common value labels for NUM4 and NUM5, plus two different ones for each. +3; 1; 5.0; i8 4; s7 "five"; 4; 1; 4; +3; 1; 6.0; i8 3; s7 "six"; 4; 1; 5; +3; 2; 7.0; i8 5; s7 "seven"; 8.0; i8 5; s7 "eight"; 4; 2; 4; 5; +3; 1; 9.0; i8 4; s7 "nine"; 4; 1; 4; +3; 1; 10.0; i8 3; s7 "ten"; 4; 1; 5; + +# One value label for STR1. +3; 1; s8 "a"; i8 19; s23 "value label for `a'"; 4; 1; 6; + +# Two value labels for STR2, as a single pair of type 3 and type 4 records. +3; 2; +s8 "bc"; i8 20; s23 "value label for `bc'"; +s8 "de"; i8 20; s23 "value label for `de'"; +4; 1; 7; + +# Two value labels for STR3, as two pairs of type 3 and type 4 records. +3; 1; s8 "fgh"; i8 21; s23 "value label for `fgh'"; 4; 1; 8; +3; 1; s8 "ijk"; i8 21; s23 "value label for `ijk'"; 4; 1; 8; + +# Two common value labels for STR4 and STR5, plus two different ones for each. +3; 1; s8 "lmno"; i8 22; s23 "value label for `lmno'"; 4; 1; 9; +3; 1; s8 "pqrs"; i8 22; s23 "value label for `pqrs'"; 4; 1; 10; +3; 2; +s8 "tuvw"; i8 22; s23 "value label for `tuvw'"; +s8 "xyzA"; i8 22; s23 "value label for `xyzA'"; +4; 2; 9; 10; +3; 1; s8 "BCDE"; i8 22; s23 "value label for `BCDE'"; 4; 1; 9; +3; 1; s8 "FGHI"; i8 22; s23 "value label for `FGHI'"; 4; 1; 10; + +# One value label for STR6, STR7, STR8. +3; 1; s8 "JKLMNO"; i8 24; s31 "value label for `JKLMNO'"; 4; 1; 11; +3; 1; s8 "JKLMNOP"; i8 25; s31 "value label for `JKLMNOP'"; 4; 1; 12; +3; 1; s8 "JKLMNOPQ"; i8 26; s31 "value label for `JKLMNOPQ'"; 4; 1; 13; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1251; + +# Character encoding record. +7; 20; 1; 12; "windows-1251"; + +7; 21; 1; COUNT ( +# One value label for STR9ж, +COUNT("STR9"; i8 230); 9; 1; COUNT("RSTUVWXYZ"); COUNT("value label for `RSTUVWXYZ'"); + +# Two value labels for STR12. +COUNT("STR12"); 12; 2; +COUNT("0123456789ab"); COUNT("value label for `0123456789ab'"); +COUNT("cdefghijklmn"); COUNT("value label for `cdefghijklmn'"); + +# Three value labels for STR16. +COUNT("STR16"); 16; 3; +COUNT("opqrstuvwxyzABCD"); COUNT("value label for `opqrstuvwxyzABCD'"); +COUNT("EFGHIJKLMNOPQRST"); COUNT("value label for `EFGHIJKLMNOPQRST'"); +COUNT("UVWXYZ0123456789"); COUNT("value label for `UVWXYZ0123456789' with Cyrillic letters: `"; i8 244; i8 245; i8 246; "'"); + +# One value label for STR17. +COUNT("STR17"); 17; 1; +COUNT("abcdefghijklmnopq"); COUNT("value label for `abcdefghijklmnopq'"); +); + +# Dictionary termination record. +999; 0; +"#; + let sysfile = sack(input, None, endian).unwrap(); + let cursor = Cursor::new(sysfile); + let reader = Reader::new(cursor, |warning| println!("{warning}")).unwrap(); + let headers: Vec = reader.collect::, _>>().unwrap(); + let encoding = encoding_from_headers(&headers, &|e| eprintln!("{e}")).unwrap(); + let decoder = Decoder::new(encoding, |e| eprintln!("{e}")); + let mut decoded_records = Vec::new(); + for header in headers { + decoded_records.push(header.decode(&decoder).unwrap()); + } + + let mut errors = Vec::new(); + let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); + let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap(); + assert_eq!(errors, vec![]); + assert_eq!( + metadata, + Metadata { + creation: NaiveDate::from_ymd_opt(2011, 1, 5) + .unwrap() + .and_time(NaiveTime::from_hms_opt(20, 53, 52).unwrap()), + endian, + compression: None, + n_cases: Some(1), + product: "$(#) SPSS DATA FILE PSPP synthetic test file".into(), + product_ext: None, + version: Some((1, 2, 3)), + } + ); + assert_eq!( + dictionary.file_label.as_ref().map(|s| s.as_str()), + Some("PSPP synthetic test file") + ); + let pt = dictionary.output_value_labels().to_pivot_table().unwrap(); + assert_rendering( + "value_labels_value_labels", + &pt, + "\ +╭────────────────────────────────┬───────────────────────────────────────────────────────────────╮ +│Variable Value │ │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│num1 1 │один (in Russian) │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│num2 1 │one │ +│ 2 │two │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│num3 3 │three │ +│ 4 │four │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│num4 5 │five │ +│ 7 │seven │ +│ 8 │eight │ +│ 9 │nine │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│num5 6 │six │ +│ 7 │seven │ +│ 8 │eight │ +│ 10 │ten │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str1 a │value label for `a' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str2 bc │value label for `bc' │ +│ de │value label for `de' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str3 fgh │value label for `fgh' │ +│ ijk │value label for `ijk' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str4 BCDE │value label for `BCDE' │ +│ lmno │value label for `lmno' │ +│ tuvw │value label for `tuvw' │ +│ xyzA │value label for `xyzA' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str5 FGHI │value label for `FGHI' │ +│ pqrs │value label for `pqrs' │ +│ tuvw │value label for `tuvw' │ +│ xyzA │value label for `xyzA' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str6 JKLMNO │value label for `JKLMNO' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str7 JKLMNOP │value label for `JKLMNOP' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str8 JKLMNOPQ │value label for `JKLMNOPQ' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str9ж RSTUVWXYZ │value label for `RSTUVWXYZ' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str12 0123456789ab │value label for `0123456789ab' │ +│ cdefghijklmn │value label for `cdefghijklmn' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str16 EFGHIJKLMNOPQRST │value label for `EFGHIJKLMNOPQRST' │ +│ UVWXYZ0123456789 │value label for `UVWXYZ0123456789' with Cyrillic letters: `фхц'│ +│ opqrstuvwxyzABCD │value label for `opqrstuvwxyzABCD' │ +├────────────────────────────────┼───────────────────────────────────────────────────────────────┤ +│str17 abcdefghijklmnopq│value label for `abcdefghijklmnopq' │ +╰────────────────────────────────┴───────────────────────────────────────────────────────────────╯ +", + ); + let pt = dictionary.output_variables().to_pivot_table(); + assert_rendering("value_labels_dictionary", &pt, "\ +╭─────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├─────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│num2 │ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│num3 │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│num4 │ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│num5 │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│str1 │ 6│ │Nominal │Input│ 1│Left │A1 │A1 │ │ +│str2 │ 7│ │Nominal │Input│ 2│Left │A2 │A2 │ │ +│str3 │ 8│ │Nominal │Input│ 3│Left │A3 │A3 │ │ +│str4 │ 9│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│str5 │ 10│ │Nominal │Input│ 4│Left │A4 │A4 │ │ +│str6 │ 11│ │Nominal │Input│ 6│Left │A6 │A6 │ │ +│str7 │ 12│ │Nominal │Input│ 7│Left │A7 │A7 │ │ +│str8 │ 13│ │Nominal │Input│ 8│Left │A8 │A8 │ │ +│str9ж│ 14│ │Nominal │Input│ 9│Left │A9 │A9 │ │ +│str12│ 15│ │Nominal │Input│ 12│Left │A12 │A12 │ │ +│str16│ 16│ │Nominal │Input│ 16│Left │A16 │A16 │ │ +│str17│ 17│ │Nominal │Input│ 17│Left │A17 │A17 │ │ +╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ +"); + } +}