legacy table subscripts
authorBen Pfaff <blp@cs.stanford.edu>
Fri, 2 Jan 2026 23:55:02 +0000 (15:55 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Fri, 2 Jan 2026 23:55:02 +0000 (15:55 -0800)
rust/pspp/src/spv/read/legacy_bin.rs
rust/pspp/src/spv/read/legacy_xml.rs
rust/pspp/src/spv/read/tests.rs
rust/pspp/src/spv/testdata/legacy16.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy16.spv [new file with mode: 0644]

index 813dd9d4db717c6308670512e011d1c3889f84ce..acd43b751bd50eb7ec885cd7748964a02ccfb948 100644 (file)
@@ -202,8 +202,10 @@ impl DataValue {
             && let Ok(time) = NaiveTime::parse_from_str(s.as_str(), "%H:%M:%S%.3f")
         {
             Value::new_time(time)
-        } else {
+        } else if !self.value.is_sysmis() {
             Value::new_datum(&self.value)
+        } else {
+            Value::new_empty()
         }
         .with_format(format)
     }
index f5f39a3449a4639fa04e4cb21e16264bf9be288b..325afa44adf63fd340fbc7a372d08b4c8eb6fc5e 100644 (file)
@@ -256,6 +256,7 @@ impl Visualization {
                         .chain(derived_variables.iter().map(|dv| dv.id.clone()))
                         .collect(),
                 ));
+                break;
             }
         }
         series
@@ -378,6 +379,21 @@ impl Visualization {
             return HashMap::default();
         };
 
+        let markers = if let Some(markers) = graph.interval.footnotes(false)
+            && let Some(series) = series.get(markers.variable.as_str())
+        {
+            Some((
+                series,
+                markers
+                    .mappings
+                    .iter()
+                    .map(|m| (m.from.get(), m.to.as_str()))
+                    .collect::<HashMap<_, _>>(),
+            ))
+        } else {
+            None
+        };
+
         let mut data = HashMap::new();
         let mut coords = Vec::with_capacity(dims.len());
         let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series);
@@ -420,13 +436,15 @@ impl Visualization {
                     }
                 }
             }
-
-            if let Some(datum) = value.datum()
-                && datum.is_sysmis()
-                && value.footnotes().is_empty()
+            if let Some((series, mappings)) = &markers
+                && let Some(dv) = series.values.get(i)
+                && let Some(category) = dv.category()
+                && let Some(marker) = mappings.get(&category)
             {
-                // A system-missing value without a footnote represents an empty cell.
-            } else {
+                value.add_subscript(*marker);
+            }
+
+            if !value.is_empty() {
                 data.insert(coords.clone(), value);
             }
         }
@@ -674,7 +692,7 @@ impl Visualization {
 
         let cell_footnotes = graph
             .interval
-            .footnotes()
+            .footnotes(true)
             .and_then(|footnotes| series.get(footnotes.variable.as_str()));
         let mut data = self.decode_data(graph, &footnotes, cell_footnotes, &dims, &series, warn);
 
@@ -1829,14 +1847,17 @@ struct Interval {
 }
 
 impl Interval {
-    fn footnotes(&self) -> Option<&Footnotes> {
-        if let Some(footnotes) = &self.footnotes {
+    fn footnotes(&self, superscript: bool) -> Option<&Footnotes> {
+        if let Some(footnotes) = &self.footnotes
+            && footnotes.superscript == superscript
+        {
             Some(footnotes)
         } else {
             self.labeling
                 .children
                 .iter()
-                .find_map(|child| child.as_footnotes())
+                .flat_map(|child| child.as_footnotes())
+                .find(|child| child.superscript == superscript)
         }
     }
 }
@@ -1918,6 +1939,9 @@ struct Footnotes {
     #[serde(rename = "@variable")]
     variable: String,
 
+    #[serde(default, rename = "@superscript")]
+    superscript: bool,
+
     #[serde(default, rename = "footnoteMapping")]
     mappings: Vec<FootnoteMapping>,
 }
index c1860153ec867ddd75630cb0578593f1ac8250da..65de04f68a79355149bf365548bbc7b403683807 100644 (file)
@@ -102,6 +102,12 @@ fn legacy15() {
     test_raw_spvfile("legacy15");
 }
 
+/// Subscript support.
+#[test]
+fn legacy16() {
+    test_raw_spvfile("legacy16");
+}
+
 fn test_raw_spvfile(name: &str) {
     let input_filename = Path::new("src/spv/testdata")
         .join(name)
diff --git a/rust/pspp/src/spv/testdata/legacy16.expected b/rust/pspp/src/spv/testdata/legacy16.expected
new file mode 100644 (file)
index 0000000..b8cf789
--- /dev/null
@@ -0,0 +1,17 @@
+                                                 Table 1
+╭────────────────────────┬─────────────────────────────────────────────────────────────────────────────╮
+│                        │                                    GENDER                                   │
+│                        ├──────────────────────────┬──────────────────────────┬───────────────────────┤
+│                        │           Male           │          Female          │         Total         │
+│                        ├───────┬──────────────────┼───────┬──────────────────┼────┬──────────────────┤
+│                        │  Mean │Standard Deviation│  Mean │Standard Deviation│Mean│Standard Deviation│
+├────────────────────────┼───────┼──────────────────┼───────┼──────────────────┼────┼──────────────────┤
+│STATUS Examined      AGE│ 52.3_a│               9.2│ 51.8_a│              10.7│52.0│              10.1│
+│       Not Available AGE│ 50.8_a│               7.4│ 49.4_a│               8.2│50.6│               7.4│
+│       Refused       AGE│59.0[1]│.                 │50.0[1]│.                 │54.5│               6.4│
+│       Total         AGE│   52.1│               9.1│   51.8│              10.7│51.9│              10.0│
+╰────────────────────────┴───────┴──────────────────┴───────┴──────────────────┴────┴──────────────────╯
+Note: Values in the same row and subtable not sharing the same subscript are significantly different at p< 0.05 in the two-sided test of equality for column means. Cells with no subscript are not included in the test. Tests assume equal variances.[2][3]
+1.  This category is not used in comparisons because the sum of case weights is less than two.
+2.  Tests are adjusted for all pairwise comparisons within a row of each innermost subtable using the Bonferroni correction.
+3.  Pairwise comparisons are not performed for some subtables because of numerical problems.
diff --git a/rust/pspp/src/spv/testdata/legacy16.spv b/rust/pspp/src/spv/testdata/legacy16.spv
new file mode 100644 (file)
index 0000000..47f844f
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy16.spv differ