add some spv legacy tests rust
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 21 Dec 2025 22:55:26 +0000 (14:55 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 21 Dec 2025 22:55:26 +0000 (14:55 -0800)
rust/pspp/src/spv/read.rs
rust/pspp/src/spv/read/legacy_xml.rs
rust/pspp/src/spv/read/tests.rs [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy1.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy1.spv [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy2.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy2.spv [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy3.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy3.spv [new file with mode: 0644]

index 485f859ef51219e8b5b132168f600c2c83775c3e..3450e352e1858a28cf265c3eddc1b154d9d3bbe9 100644 (file)
@@ -50,6 +50,8 @@ pub mod html;
 pub mod legacy_bin;
 mod legacy_xml;
 mod light;
+#[cfg(test)]
+mod tests;
 
 /// A warning encountered reading an SPV file.
 #[derive(Clone, Debug)]
@@ -828,16 +830,3 @@ struct TableStructure {
     #[serde(rename = "csvPath")]
     _csv_path: Option<String>,
 }
-
-#[cfg(test)]
-#[test]
-fn test_spv() {
-    let items = ReadOptions::new(|e| println!("{e}"))
-        .open_file("/home/blp/pspp/rust/tests/utilities/regress.spv")
-        .unwrap()
-        .into_items();
-    for item in items {
-        println!("{item}");
-    }
-    todo!()
-}
index f343a00a1dc67f4dbd09d14515122b3ee31b6651..4bbc608860928559ad740301d30f5f233ba4b3e8 100644 (file)
@@ -27,7 +27,6 @@ use std::{
 use chrono::{NaiveDateTime, NaiveTime};
 use enum_map::{Enum, EnumMap};
 use hashbrown::HashSet;
-use itertools::Itertools;
 use ordered_float::OrderedFloat;
 use serde::Deserialize;
 
@@ -286,12 +285,18 @@ impl Visualization {
                     let entry = footnote_builder
                         .entry(uses_reference.get() - 1)
                         .or_default();
-                    if index % 2 == 0 {
+                    if index % 2 == 1 {
                         entry.content = text.text.strip_suffix('\n').unwrap_or(&text.text).into();
                     } else {
-                        entry.marker =
-                            Some(text.text.strip_suffix('.').unwrap_or(&text.text).into());
+                        entry.marker = Some(
+                            text.text
+                                .trim_end()
+                                .strip_suffix('.')
+                                .unwrap_or(&text.text)
+                                .into(),
+                        );
                     }
+                    dbg!(entry);
                 }
             }
         }
@@ -305,6 +310,7 @@ impl Visualization {
                     .with_marker(footnote.marker.map(|s| Value::new_user_text(s))),
             );
         }
+        dbg!(&footnotes);
         let footnotes = pivot::Footnotes::from_iter(footnotes);
 
         for (purpose, area) in [
@@ -317,8 +323,8 @@ impl Visualization {
                 label.decode_style(&mut look.areas[area], &styles);
             }
         }
-        let title = LabelFrame::decode_label(&labels[Purpose::Title]);
-        let caption = LabelFrame::decode_label(&labels[Purpose::SubTitle]);
+        let title = LabelFrame::decode_label(&labels[Purpose::Title], &footnotes);
+        let caption = LabelFrame::decode_label(&labels[Purpose::SubTitle], &footnotes);
         if let Some(style) = &graph.interval.labeling.style
             && let Some(style) = styles.get(style.references.as_str())
         {
@@ -473,13 +479,16 @@ impl Visualization {
                 let mut coordinate_to_index = HashMap::new();
                 let mut next_cats = Vec::with_capacity(cats.len());
                 let mut start = 0;
+                dbg!(&variable.name, &variable.values);
                 for end in 1..=cats.len() {
                     let dv1 = &variable.values[cats[start].index];
                     if end < cats.len() && &variable.values[cats[end].index].value == &dv1.value {
                         println!("categories {start}..={end} have same value {dv1:?}");
                     } else {
+                        dbg!(start..end);
                         let name = variable.map.lookup(dv1);
                         if name.is_number_or(|s| !s.is_empty()) {
+                            dbg!();
                             let name = variable.new_name(dv1, footnotes);
                             let mut group = Group::new(name);
                             for i in start..end {
@@ -496,6 +505,7 @@ impl Visualization {
                             next_cats.push(next_cat);
                         } else {
                             // XXX coordinate_to_index?
+                            dbg!();
                             for cat in &cats[start..end] {
                                 next_cats.push(cat.clone());
                             }
@@ -505,6 +515,7 @@ impl Visualization {
                 }
                 *variable.coordinate_to_index.borrow_mut() = coordinate_to_index;
                 cats = next_cats;
+                dbg!(&cats);
             }
 
             let dimension = Dimension::new(
@@ -696,11 +707,13 @@ impl Visualization {
                 // XXX indexing
                 let dv = &cell_footnotes.values[i];
                 if let Some(s) = dv.value.as_string() {
+                    dbg!(dv);
                     for part in s.split(',') {
                         if let Ok(index) = part.parse::<usize>()
                             && let Some(index) = index.checked_sub(1)
                             && let Some(footnote) = footnotes.get(index)
                         {
+                            dbg!(footnote);
                             value = value.with_footnote(footnote);
                         }
                     }
@@ -1002,6 +1015,7 @@ impl Visualization {
             .collect::<Vec<_>>();
         let mut pivot_table = PivotTable::new(dimensions)
             .with_look(Arc::new(look))
+            .with_footnotes(footnotes)
             .with_data(data);
         if let Some(title) = title {
             pivot_table = pivot_table.with_title(title);
@@ -2622,20 +2636,28 @@ struct LabelFrame {
 }
 
 impl LabelFrame {
-    fn decode_label(labels: &[&Label]) -> Option<Value> {
+    fn decode_label(labels: &[&Label], footnotes: &pivot::Footnotes) -> Option<Value> {
         if !labels.is_empty() {
             let mut s = String::new();
+            let mut f = Vec::new();
             for t in labels {
                 if let LabelChild::Text(text) = &t.child {
                     for t in text {
-                        if let Some(_defines_reference) = t.defines_reference {
-                            // XXX footnote
+                        if let Some(defines_reference) = t.defines_reference
+                            && let Some(footnote) = footnotes.get(defines_reference.get() - 1)
+                        {
+                            f.push(footnote);
+                        } else {
+                            s += &t.text;
                         }
-                        s += &t.text;
                     }
                 }
             }
-            Some(Value::new_user_text(s))
+            let mut value = Value::new_user_text(s);
+            for footnote in f {
+                value = value.with_footnote(footnote);
+            }
+            Some(value)
         } else {
             None
         }
@@ -2656,6 +2678,7 @@ struct Container {
     extensions: Option<ContainerExtension>,
     #[serde(default)]
     locations: Vec<Location>,
+    #[serde(rename = "labelFrame")]
     #[serde(default)]
     label_frames: Vec<LabelFrame>,
 }
diff --git a/rust/pspp/src/spv/read/tests.rs b/rust/pspp/src/spv/read/tests.rs
new file mode 100644 (file)
index 0000000..d7f68d2
--- /dev/null
@@ -0,0 +1,70 @@
+use std::{
+    fs::File,
+    io::{BufRead, BufReader, Seek},
+    path::Path,
+};
+
+use crate::{
+    output::{Text, pivot::tests::assert_lines_eq},
+    spv::ReadOptions,
+};
+
+#[test]
+fn legacy1() {
+    test_raw_spvfile("legacy1");
+}
+
+#[test]
+fn legacy2() {
+    test_raw_spvfile("legacy2");
+}
+
+#[test]
+fn legacy3() {
+    test_raw_spvfile("legacy3");
+}
+
+fn test_raw_spvfile(name: &str) {
+    let input_filename = Path::new("src/spv/testdata")
+        .join(name)
+        .with_extension("spv");
+    let spvfile = BufReader::new(File::open(&input_filename).unwrap());
+    let expected_filename = input_filename.with_extension("expected");
+    let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap();
+    test_spvfile(spvfile, &expected, &expected_filename);
+}
+
+fn test_spvfile<R>(spvfile: R, expected: &str, expected_filename: &Path)
+where
+    R: BufRead + Seek + 'static,
+{
+    let mut warnings = Vec::new();
+    let output = match ReadOptions::new(move |warning| warnings.push(warning)).open_reader(spvfile)
+    {
+        Ok(spv_file) => {
+            let (items, _page_setup /*XXX*/) = spv_file.into_contents();
+
+            let mut output = Vec::new();
+            /* XXX
+            output.extend(
+                warnings
+                    .into_iter()
+                    .map(|warning| Item::from(Text::new_log(warning.to_string()))),
+            );*/
+            output.extend(items);
+            output.into_iter().collect()
+        }
+        Err(error) => Text::new_log(error.to_string()).into_item(),
+    };
+
+    let actual = output.to_string();
+    if expected != actual {
+        if std::env::var("PSPP_REFRESH_EXPECTED").is_ok() {
+            std::fs::write(expected_filename, actual).unwrap();
+            panic!("{}: refreshed output", expected_filename.display());
+        } else {
+            eprintln!("note: rerun with PSPP_REFRESH_EXPECTED=1 to refresh expected output");
+        }
+    }
+    assert_lines_eq(&expected, expected_filename.display(), &actual, "actual");
+}
diff --git a/rust/pspp/src/spv/testdata/legacy1.expected b/rust/pspp/src/spv/testdata/legacy1.expected
new file mode 100644 (file)
index 0000000..6a72b49
--- /dev/null
@@ -0,0 +1,16 @@
+                               Testcase 1
+╭───────────────────┬─────────┬───────┬─────────────┬──────────────────╮
+│                   │Frequency│Percent│Valid Percent│Cumulative Percent│
+├───────────────────┼─────────┼───────┼─────────────┼──────────────────┤
+│Valid   >2 years   │        1│     .5│           .5│                .5│
+│        2-5 years  │       17│    7.9│          8.0│               8.5│
+│        6-10 years │      122│   57.0│         57.3│              65.7│
+│        11-15 years│       45│   21.0│         21.1│              86.9│
+│        16-20 years│       18│    8.4│          8.5│              95.3│
+│        >20 years  │       10│    4.7│          4.7│             100.0│
+│        Total      │      213│   99.5│        100.0│                  │
+├───────────────────┼─────────┼───────┼─────────────┼──────────────────┤
+│Missing System     │        1│     .5│             │                  │
+├───────────────────┼─────────┼───────┼─────────────┼──────────────────┤
+│Total              │      214│  100.0│             │                  │
+╰───────────────────┴─────────┴───────┴─────────────┴──────────────────╯
diff --git a/rust/pspp/src/spv/testdata/legacy1.spv b/rust/pspp/src/spv/testdata/legacy1.spv
new file mode 100644 (file)
index 0000000..2726003
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy1.spv differ
diff --git a/rust/pspp/src/spv/testdata/legacy2.expected b/rust/pspp/src/spv/testdata/legacy2.expected
new file mode 100644 (file)
index 0000000..660bb30
--- /dev/null
@@ -0,0 +1,19 @@
+T-Test
+
+                 One-Sample Statistics
+╭─────────────┬──┬─────┬──────────────┬───────────────╮
+│             │ N│ Mean│Std. Deviation│Std. Error Mean│
+├─────────────┼──┼─────┼──────────────┼───────────────┤
+│Variable Name│10│47.30│         2.669│           .844│
+╰─────────────┴──┴─────┴──────────────┴───────────────╯
+
+                                           One-Sample Test
+╭─────────────┬────────────────────────────────────────────────────────────────────────────────────╮
+│             │                Test Value = 50                                                     │
+│             ├──────┬──┬───────────────┬───────────────┬──────────────────────────────────────────┤
+│             │      │  │               │               │ 95% Confidence Interval of the Difference│
+│             │      │  │               │               ├─────────────────────┬────────────────────┤
+│             │   t  │df│Sig. (2-tailed)│Mean Difference│        Lower        │        Upper       │
+├─────────────┼──────┼──┼───────────────┼───────────────┼─────────────────────┼────────────────────┤
+│Variable Name│-3.199│ 9│           .011│         -2.700│                -4.61│                -.79│
+╰─────────────┴──────┴──┴───────────────┴───────────────┴─────────────────────┴────────────────────╯
diff --git a/rust/pspp/src/spv/testdata/legacy2.spv b/rust/pspp/src/spv/testdata/legacy2.spv
new file mode 100644 (file)
index 0000000..b8c07a4
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy2.spv differ
diff --git a/rust/pspp/src/spv/testdata/legacy3.expected b/rust/pspp/src/spv/testdata/legacy3.expected
new file mode 100644 (file)
index 0000000..c295ec4
--- /dev/null
@@ -0,0 +1,27 @@
+                         Excluded Variables[d]
+╭───┬────────┬──────┬────┬───────────────────┬───────────────────────╮
+│   │        │      │    │                   │Collinearity Statistics│
+│   │        │      │    │                   ├───────────────────────┤
+│   │ Beta In│   t  │Sig.│Partial Correlation│       Tolerance       │
+├───┼────────┼──────┼────┼───────────────────┼───────────────────────┤
+│1 A│-.304[a]│-2.216│.032│              -.317│                   .987│
+│  B│ .611[a]│ 5.532│.000│               .641│                   .999│
+│  C│ .394[a]│ 2.964│.005│               .408│                   .975│
+│  D│ .535[a]│ 4.415│.000│               .554│                   .976│
+│  E│-.239[a]│-1.704│.095│              -.249│                   .982│
+│  F│ .615[a]│ 5.558│.000│               .642│                   .990│
+│  G│ .531[a]│ 4.390│.000│               .552│                   .981│
+├───┼────────┼──────┼────┼───────────────────┼───────────────────────┤
+│2 D│ .399[b]│ 1.908│.063│               .286│                   .270│
+│  E│ .762[b]│ 1.598│.118│               .242│                   .053│
+│  F│ .512[b]│ 1.713│.094│               .258│                   .135│
+│  G│ .648[b]│ 2.154│.037│               .319│                   .128│
+├───┼────────┼──────┼────┼───────────────────┼───────────────────────┤
+│3 E│ .598[c]│ 1.250│.219│               .194│                   .051│
+│  F│-.053[c]│ -.070│.945│              -.011│                   .021│
+│  G│ .784[c]│  .964│.341│               .151│                   .018│
+╰───┴────────┴──────┴────┴───────────────────┴───────────────────────╯
+a. Footnote content a
+b. Footnote content b
+c. Footnote content c
+d. Footnote content d
diff --git a/rust/pspp/src/spv/testdata/legacy3.spv b/rust/pspp/src/spv/testdata/legacy3.spv
new file mode 100644 (file)
index 0000000..f7368ef
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy3.spv differ