first version of xml decoder that might do something
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 4 Nov 2025 17:17:39 +0000 (09:17 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 4 Nov 2025 17:17:39 +0000 (09:17 -0800)
rust/doc/src/spv/legacy-detail-xml.md
rust/pspp/src/output/spv/legacy_bin.rs
rust/pspp/src/output/spv/legacy_xml.rs

index 59f429c3de3a3c687e004447cccbb8c7153c4f4f..1f8449e8dc2c3ed0bfd77d42ae4038cfcbf18970 100644 (file)
@@ -1474,3 +1474,14 @@ printingProperties
 The `name` attribute appears only in [standalone `.stt`
 files](../tablelook.md#the-tlo-format).
 
+## Cell Data
+
+A variable named `cell` always exists.  This variable holds the data
+displayed in the table.
+
+`cell` is taken along with the `categories` variables from [`nest` and
+`layer`], which specify the values of the dimensions.
+
+XXX example
+
+[`nest` and `layer`]: #the-faceting-element
index 8af6c7f6fcc6b01b535cbbe7a9923097b3f1c48d..f1e0e4634207eada99d51fcaed2f8986e778de4a 100644 (file)
@@ -91,15 +91,16 @@ impl DataValue {
         .and_then(|v| (v >= 0.0 && v < usize::MAX as f64).then_some(v as usize))
     }
 
-    pub fn as_format(&self, format_map: &HashMap<u32, Format>) -> Format {
+    // This should probably be a method on some hypothetical FormatMap.
+    pub fn as_format(&self, format_map: &HashMap<i64, Format>) -> Format {
         let f = match &self.value {
-            Datum::Number(Some(number)) => *number as u32,
+            Datum::Number(Some(number)) => *number as i64,
             Datum::Number(None) => 0,
             Datum::String(s) => s.parse().unwrap_or_default(),
         };
         match format_map.get(&f) {
             Some(format) => *format,
-            None => decode_format(f),
+            None => decode_format(f as u32),
         }
     }
 
index db02c0f5c79eee5c5eed394fcbe68d42a8c32c76..5042e47b8736b0f498cae0104490b077ee69badc 100644 (file)
@@ -27,11 +27,12 @@ use serde::Deserialize;
 
 use crate::{
     data::Datum,
-    format::{Decimal::Dot, F8_0, Type, UncheckedFormat},
+    format::{Decimal::Dot, F8_0, F40_2, Type, UncheckedFormat},
     output::{
         pivot::{
-            self, Area, AreaStyle, Axis2, Axis3, Color, HeadingRegion, HorzAlign, Leaf, Length,
-            Look, PivotTable, RowParity, Value, VertAlign,
+            self, Area, AreaStyle, Axis2, Axis3, Category, Color, Dimension, Group, HeadingRegion,
+            HorzAlign, Leaf, Length, Look, NumberValue, PivotTable, RowParity, Value, ValueInner,
+            VertAlign,
         },
         spv::legacy_bin::DataValue,
     },
@@ -154,6 +155,16 @@ impl Map {
             }
         }
     }
+
+    fn lookup<'a>(&'a self, dv: &'a DataValue) -> &'a Datum<String> {
+        if let Datum::Number(Some(number)) = &dv.value
+            && let Some(value) = self.0.get(&OrderedFloat(*number))
+        {
+            value
+        } else {
+            &dv.value
+        }
+    }
 }
 
 #[derive(Deserialize, Debug)]
@@ -365,20 +376,24 @@ impl Visualization {
             rotate_inner_column_labels: &mut bool,
             rotate_outer_row_labels: &mut bool,
             footnotes: &pivot::Footnotes,
-        ) {
+        ) -> Dimension {
             let base_level = variables[0].1;
-            if let Ok(a) = Axis2::try_from(a)
+            let show_label = if let Ok(a) = Axis2::try_from(a)
                 && let Some(axis) = axes.get(&(base_level + variables.len()))
                 && let Some(label) = &axis.label
             {
                 let out = &mut look.areas[Area::Labels(a)];
                 *out = Area::Labels(a).default_area_style();
+                let style = label.style.get(&styles);
                 Style::decode(
-                    label.style.get(&styles),
+                    style,
                     label.text_frame_style.as_ref().and_then(|r| r.get(styles)),
                     out,
                 );
-            }
+                style.is_some_and(|s| s.visible.unwrap_or_default())
+            } else {
+                false
+            };
             if a == Axis3::Y
                 && let Some(axis) = axes.get(&(base_level + variables.len() - 1))
             {
@@ -399,46 +414,80 @@ impl Visualization {
                 }
             }
 
-            // Find the first row for each category.
-            let max_cat = variables[0].0.max_category().unwrap()/*XXX*/;
+            let variables = variables
+                .into_iter()
+                .map(|(series, _level)| *series)
+                .collect::<Vec<_>>();
+
+            // Find the first row for each category, then drop missing
+            // categories and count what's left.
+            let max_cat = variables[0].max_category().unwrap()/*XXX*/;
             let mut cat_rows = vec![None; max_cat + 1];
-            for (index, value) in variables[0].0.values.iter().enumerate() {
+            for (index, value) in variables[0].values.iter().enumerate() {
                 if let Some(row) = value.category() {
                     cat_rows[row].get_or_insert(index);
                 }
             }
-
-            // Drop missing categories and count what's left.
             let cat_rows = cat_rows.into_iter().flatten().collect::<Vec<_>>();
 
             // Make leaf categories.
             let mut cats = Vec::with_capacity(cat_rows.len());
-            for row in cat_rows.iter().copied() {
-                let dv = &variables[0].0.values[row];
-                let name = Value::new_datum(&dv.value);
-                let name = variables[0].0.add_affixes(name, &footnotes);
-                cats.push(Leaf::new(name));
+            for (index, cat_row) in cat_rows.into_iter().enumerate() {
+                let dv = &variables[0].values[cat_row];
+                let name = variables[0].new_name(dv, footnotes);
+                cats.push((Category::from(Leaf::new(name)), index..index + 1));
+            }
+            if cats.is_empty() {
+                todo!()
             }
 
             // Now group them, in one pass per grouping variable, innermost first.
             for j in 1..variables.len() {
-                // Find a sequence of categories `cat1...cat2`, that all have
-                // the same value in series `j`.  (This might be only a single
-                // category.) */
-                let series = variables[j].0;
-                let mut cat1 = 0;
-                while cat1 < cats.len() {
-                    let mut cat2 = cat1 + 1;
-                    while cat2 < cats.len() {}
+                let mut next_cats = Vec::with_capacity(cats.len());
+                let mut start = 0;
+                for end in 1..=cats.len() {
+                    let dv1 = &variables[j].values[cats[start].1.start];
+                    if end < cats.len()
+                        && variables[j].values[cats[end].1.clone()]
+                            .iter()
+                            .all(|dv| &dv.value == &dv1.value)
+                    {
+                    } else {
+                        let name = variables[j].map.lookup(dv1);
+                        if end - start > 1 || name.is_number_or(|s| s.is_empty()) {
+                            let name = variables[j].new_name(dv1, footnotes);
+                            let mut group = Group::new(name);
+                            for i in start..end {
+                                group.push(cats[i].0.clone());
+                            }
+                            next_cats.push((
+                                Category::from(group),
+                                cats[start].1.start..cats[end - 1].1.end,
+                            ));
+                        } else {
+                            next_cats.push(cats[start].clone());
+                        }
+                        start = end;
+                    }
                 }
+                cats = next_cats;
             }
 
-            todo!()
+            Dimension::new(
+                Group::new(
+                    variables[0]
+                        .label
+                        .as_ref()
+                        .map_or_else(|| Value::empty(), |label| Value::new_user_text(label)),
+                )
+                .with_multiple(cats.into_iter().map(|(category, _range)| category))
+                .with_show_label(show_label),
+            )
         }
 
-        fn decode_dimensions(
-            variables: &[VariableReference],
-            series: &HashMap<&str, Series>,
+        fn decode_dimensions<'a, 'b>(
+            variables: impl IntoIterator<Item = &'a str>,
+            series: &'b HashMap<&str, Series>,
             axes: &HashMap<usize, &Axis>,
             styles: &HashMap<&str, &Style>,
             a: Axis3,
@@ -446,15 +495,16 @@ impl Visualization {
             rotate_inner_column_labels: &mut bool,
             rotate_outer_row_labels: &mut bool,
             footnotes: &pivot::Footnotes,
-
             level_ofs: usize,
-        ) -> Vec<pivot::Dimension> {
+            dimensions: &mut Vec<(Axis3, pivot::Dimension)>,
+            coordinates: &mut Vec<&'b Series>,
+        ) {
             let variables = variables
                 .into_iter()
                 .zip(level_ofs..)
-                .map(|(vr, level)| {
+                .map(|(variable_name, level)| {
                     series
-                        .get(vr.reference.as_str())
+                        .get(variable_name)
                         .filter(|s| !s.values.is_empty())
                         .map(|s| (s, level))
                 })
@@ -464,6 +514,27 @@ impl Visualization {
                 if let Some((var, level)) = var {
                     dim_vars.push((var, level));
                 } else if !dim_vars.is_empty() {
+                    coordinates.push(dim_vars[0].0);
+                    dimensions.push((
+                        a,
+                        decode_dimension(
+                            &dim_vars,
+                            axes,
+                            styles,
+                            a,
+                            look,
+                            rotate_inner_column_labels,
+                            rotate_outer_row_labels,
+                            footnotes,
+                        ),
+                    ));
+                    dim_vars.clear();
+                }
+            }
+            if !dim_vars.is_empty() {
+                coordinates.push(dim_vars[0].0);
+                dimensions.push((
+                    a,
                     decode_dimension(
                         &dim_vars,
                         axes,
@@ -473,23 +544,9 @@ impl Visualization {
                         rotate_inner_column_labels,
                         rotate_outer_row_labels,
                         footnotes,
-                    );
-                    dim_vars.clear();
-                }
-            }
-            if !dim_vars.is_empty() {
-                decode_dimension(
-                    &dim_vars,
-                    axes,
-                    styles,
-                    a,
-                    look,
-                    rotate_inner_column_labels,
-                    rotate_outer_row_labels,
-                    footnotes,
-                );
+                    ),
+                ));
             }
-            todo!()
         }
 
         let mut rotate_inner_column_labels = false;
@@ -499,8 +556,10 @@ impl Visualization {
             .first()
             .map(|child| child.variables())
             .unwrap_or_default();
+        let mut dimensions = Vec::new();
+        let mut coordinates = Vec::new();
         decode_dimensions(
-            columns,
+            columns.into_iter().map(|vr| vr.reference.as_str()),
             &series,
             &axes,
             &styles,
@@ -510,13 +569,15 @@ impl Visualization {
             &mut rotate_outer_row_labels,
             &footnotes,
             1,
+            &mut dimensions,
+            &mut coordinates,
         );
         let rows = cross
             .get(1)
             .map(|child| child.variables())
             .unwrap_or_default();
         decode_dimensions(
-            rows,
+            rows.into_iter().map(|vr| vr.reference.as_str()),
             &series,
             &axes,
             &styles,
@@ -526,9 +587,87 @@ impl Visualization {
             &mut rotate_outer_row_labels,
             &footnotes,
             1 + columns.len(),
+            &mut dimensions,
+            &mut coordinates,
         );
 
-        todo!()
+        let mut level_ofs = columns.len() + rows.len() + 1;
+        for layers in [&graph.faceting.layers1, &graph.faceting.layers2] {
+            decode_dimensions(
+                layers.iter().map(|layer| layer.variable.as_str()),
+                &series,
+                &axes,
+                &styles,
+                Axis3::Y,
+                &mut look,
+                &mut rotate_inner_column_labels,
+                &mut rotate_outer_row_labels,
+                &footnotes,
+                level_ofs,
+                &mut dimensions,
+                &mut coordinates,
+            );
+            level_ofs += layers.len();
+        }
+
+        let mut pivot_table = PivotTable::new(dimensions);
+
+        let cell = series.get("cell").unwrap()/*XXX*/;
+        let mut coords = Vec::with_capacity(coordinates.len());
+        let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series);
+        let cell_footnotes =
+            graph
+                .interval
+                .labeling
+                .children
+                .iter()
+                .find_map(|child| match child {
+                    LabelingChild::Footnotes(footnotes) => series.get(footnotes.variable.as_str()),
+                    _ => None,
+                });
+        for (i, cell) in cell.values.iter().enumerate() {
+            coords.clear();
+            for series in &coordinates {
+                // XXX indexing of values, and unwrap
+                coords.push(series.values[i].category().unwrap());
+            }
+
+            let format = if let Some(cell_formats) = &cell_formats {
+                // XXX indexing of values
+                cell_formats.values[i].as_format(&format_map)
+            } else {
+                F40_2
+            };
+            let mut value = cell.as_pivot_value(format);
+
+            if let Some(cell_footnotes) = &cell_footnotes {
+                // XXX indexing
+                let dv = &cell_footnotes.values[i];
+                if let Some(s) = dv.value.as_string() {
+                    for part in s.split(',') {
+                        if let Ok(index) = part.parse::<usize>()
+                            && let Some(index) = index.checked_sub(1)
+                            && let Some(footnote) = footnotes.get(index)
+                        {
+                            value = value.with_footnote(footnote);
+                        }
+                    }
+                }
+            }
+            if let Value {
+                inner: ValueInner::Number(NumberValue { value: None, .. }),
+                styling: None,
+            } = &value
+            {
+                // A system-missing value without a footnote represents an empty cell.
+            } else {
+                // XXX cell_index might be invalid?
+                pivot_table.insert(coords.as_slice(), value);
+            }
+        }
+        // XXX decode_set_cell_properties
+
+        Ok(pivot_table)
     }
 }
 
@@ -560,6 +699,12 @@ impl Series {
             .filter_map(|value| value.category())
             .max()
     }
+
+    fn new_name(&self, dv: &DataValue, footnotes: &pivot::Footnotes) -> Value {
+        let dv = self.map.lookup(dv);
+        let name = Value::new_datum(dv);
+        self.add_affixes(name, &footnotes)
+    }
 }
 
 #[derive(Deserialize, Debug)]
@@ -1795,6 +1940,27 @@ struct Labeling {
     children: Vec<LabelingChild>,
 }
 
+impl Labeling {
+    fn decode_format_map<'a>(
+        &self,
+        series: &'a HashMap<&str, Series>,
+    ) -> (Option<&'a Series>, HashMap<i64, crate::format::Format>) {
+        let mut map = HashMap::new();
+        let mut cell_format = None;
+        for child in &self.children {
+            if let LabelingChild::Formatting(formatting) = child {
+                cell_format = series.get(formatting.variable.as_str());
+                for mapping in &formatting.mappings {
+                    if let Some(format) = &mapping.format {
+                        map.insert(mapping.from, format.decode());
+                    }
+                }
+            }
+        }
+        (cell_format, map)
+    }
+}
+
 #[derive(Deserialize, Debug)]
 #[serde(rename_all = "camelCase")]
 enum LabelingChild {