cleanup
authorBen Pfaff <blp@cs.stanford.edu>
Fri, 2 Jan 2026 20:57:52 +0000 (12:57 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Fri, 2 Jan 2026 20:57:52 +0000 (12:57 -0800)
rust/pspp/src/spv/read/legacy_xml.rs

index 51e4480e6759c230e80c101b33ad8bc9bc69530c..ad02c37f658d7c9e02272f464ba29d039dd0a7df 100644 (file)
@@ -308,6 +308,131 @@ impl Visualization {
         pivot::Footnotes::from_iter(footnotes)
     }
 
+    fn decode_dimensions<'a>(
+        &self,
+        graph: &Graph,
+        series: &'a BTreeMap<&str, Series>,
+        footnotes: &pivot::Footnotes,
+    ) -> (Vec<Dim<'a>>, Vec<usize>) {
+        let axes = graph
+            .facet_layout
+            .children
+            .iter()
+            .filter_map(|child| child.facet_level())
+            .map(|facet_level| (facet_level.level, &facet_level.axis))
+            .collect::<HashMap<_, _>>();
+        let styles = self
+            .children
+            .iter()
+            .filter_map(|child| child.style())
+            .filter_map(|style| style.id.as_ref().map(|id| (id.as_str(), style)))
+            .collect::<HashMap<_, _>>();
+        let mut dims = Vec::new();
+        let mut level_ofs = 1;
+        let mut current_layer = Vec::new();
+
+        for (axis, dimension) in graph.faceting.dimensions() {
+            let dim_series = decode_axis_dimensions(
+                dimension.iter().copied(),
+                &series,
+                &axes,
+                &styles,
+                axis,
+                &footnotes,
+                level_ofs,
+                &mut dims,
+            );
+            if axis == Axis3::Z {
+                current_layer = dim_series
+                    .into_iter()
+                    .map(|series| {
+                        let name = &series.name;
+                        let coordinate = graph.faceting.layer_value(&name).unwrap();
+                        series
+                            .coordinate_to_index
+                            .borrow()
+                            .get(&coordinate)
+                            .unwrap()
+                            .as_leaf()
+                            .unwrap()
+                    })
+                    .collect();
+            }
+            level_ofs += dimension.len();
+        }
+
+        (dims, current_layer)
+    }
+
+    fn decode_data(
+        &self,
+        graph: &Graph,
+        footnotes: &pivot::Footnotes,
+        cell_footnotes: Option<&Series>,
+        dims: &[Dim],
+        series: &BTreeMap<&str, Series>,
+        warn: &mut dyn FnMut(LegacyXmlWarning),
+    ) -> HashMap<Vec<usize>, Value> {
+        let Some(cell) = series.get("cell") else {
+            warn(LegacyXmlWarning::MissingData);
+            return HashMap::default();
+        };
+
+        let mut data = HashMap::new();
+        let mut coords = Vec::with_capacity(dims.len());
+        let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series);
+        'outer: for (i, cell) in cell.values.iter().enumerate() {
+            coords.clear();
+            for dim in dims {
+                if let Some(coordinate) = dim.coordinate.values.get(i)
+                    && let Some(coordinate) = coordinate.category()
+                    && let Some(locator) =
+                        dim.coordinate.coordinate_to_index.borrow().get(&coordinate)
+                    && let Some(index) = locator.as_leaf()
+                {
+                    coords.push(index);
+                } else {
+                    // XXX warn
+                    continue 'outer;
+                }
+            }
+
+            let format = if let Some(cell_formats) = &cell_formats
+                && let Some(value) = cell_formats.values.get(i)
+            {
+                value.as_format(&format_map)
+            } else {
+                F40_2
+            };
+            let mut value = cell.as_pivot_value(format);
+
+            if let Some(cell_footnotes) = &cell_footnotes
+                && let Some(dv) = cell_footnotes.values.get(i)
+            {
+                if let Some(s) = dv.value.as_string() {
+                    for part in s.split(',') {
+                        if let Ok(index) = part.parse::<usize>()
+                            && let Some(index) = index.checked_sub(1)
+                            && let Some(footnote) = footnotes.get(index)
+                        {
+                            value.add_footnote(footnote);
+                        }
+                    }
+                }
+            }
+
+            if let Some(datum) = value.datum()
+                && datum.is_sysmis()
+                && value.footnotes().is_empty()
+            {
+                // A system-missing value without a footnote represents an empty cell.
+            } else {
+                data.insert(coords.clone(), value);
+            }
+        }
+        data
+    }
+
     pub fn decode(
         &self,
         data: IndexMap<String, IndexMap<String, Vec<DataValue>>>,
@@ -354,306 +479,13 @@ impl Visualization {
         let caption = LabelFrame::decode_label(caption_labels, &footnotes);
 
         let series = self.decode_series(data, warn);
-
-        fn decode_dimension<'a>(
-            variables: &[(&'a Series, usize)],
-            axes: &HashMap<usize, &Axis>,
-            styles: &HashMap<&str, &Style>,
-            a: Axis3,
-
-            footnotes: &pivot::Footnotes,
-            dims: &mut Vec<Dim<'a>>,
-        ) {
-            let base_level = variables[0].1;
-            let (show_label, dim_cell, dim_font, dim_label) = if let Ok(a) = Axis2::try_from(a)
-                && let Some(axis) = axes.get(&(base_level + variables.len()))
-                && let Some(label) = &axis.label
-            {
-                let mut dimension_style = AreaStyle::default_for_area(Area::Labels(a));
-                let style = label.style.get(&styles);
-                let fg = style;
-                let bg = label.text_frame_style.as_ref().and_then(|r| r.get(styles));
-                (
-                    style.is_some_and(|s| s.visible.unwrap_or(true)),
-                    Style::decode_cell_style(fg, &mut dimension_style.cell_style)
-                        .then_some(dimension_style.cell_style),
-                    Style::decode_font_style(fg, bg, &mut dimension_style.font_style)
-                        .then_some(dimension_style.font_style),
-                    LabelFrame::decode_label(&[label], footnotes),
-                )
-            } else {
-                (false, None, None, None)
-            };
-
-            let hide_all_labels = if let Some(axis) = axes.get(&base_level)
-                && let Some(style) = axis.major_ticks.style.get(styles)
-                && style.visible == Some(false)
-            {
-                true
-            } else {
-                false
-            };
-
-            let variables = variables
-                .into_iter()
-                .map(|(series, _level)| *series)
-                .collect::<Vec<_>>();
-
-            #[derive(Clone, Debug)]
-            struct CatBuilder {
-                /// The category we've built so far.
-                category: Category,
-
-                /// The index in the series of one example of this category.
-                index: usize,
-
-                /// The range of leaf indexes covered by `category`.
-                ///
-                /// If `category` is a leaf, the range has a length of 1.
-                /// If `category` is a group, the length is at least 1.
-                leaves: Range<usize>,
-
-                /// How to find this category in its dimension.
-                location: CategoryLocator,
-            }
-
-            // Make leaf categories.
-            let mut map = BTreeMap::new();
-            for (index, value) in variables[0].values.iter().enumerate() {
-                if let Some(coordinate) = value.category() {
-                    map.entry(coordinate).or_insert(index);
-                }
-            }
-            let mut coordinate_to_index = BTreeMap::new();
-            let mut cats = Vec::<CatBuilder>::new();
-            for (coordinate, index) in map {
-                let value = &variables[0].values[index];
-                coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len()));
-                cats.push(CatBuilder {
-                    category: Category::from(Leaf::new(variables[0].new_name(value, footnotes))),
-                    index,
-                    leaves: cats.len()..cats.len() + 1,
-                    location: CategoryLocator::new_leaf(cats.len()),
-                });
-            }
-            *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index;
-
-            // Now group them, in one pass per grouping variable, innermost first.
-            for variable in &variables[1..] {
-                let mut coordinate_to_index = BTreeMap::new();
-                let mut next_cats = Vec::with_capacity(cats.len());
-                let mut start = 0;
-                for end in 1..=cats.len() {
-                    let dv1 = &variable.values[cats[start].index];
-                    if end >= cats.len() || &variable.values[cats[end].index].value != &dv1.value {
-                        let name = variable.map.lookup(dv1);
-                        if name.is_number_or(|s| !s.is_empty()) {
-                            let name = variable.new_name(dv1, footnotes);
-                            let mut group = Group::new(name);
-                            for i in start..end {
-                                group.push(cats[i].category.clone());
-                            }
-                            let next_cat = CatBuilder {
-                                category: Category::from(group),
-                                index: cats[start].index,
-                                leaves: cats[start].leaves.start..cats[end - 1].leaves.end,
-                                location: cats[start].location.parent(),
-                            };
-                            coordinate_to_index
-                                .insert(dv1.category().unwrap() /*XXX?*/, next_cat.location);
-                            next_cats.push(next_cat);
-                        } else {
-                            // XXX coordinate_to_index?
-                            for cat in &cats[start..end] {
-                                next_cats.push(cat.clone());
-                            }
-                        };
-                        start = end;
-                    }
-                }
-                *variable.coordinate_to_index.borrow_mut() = coordinate_to_index;
-                cats = next_cats;
-            }
-
-            let mut dimension_label = if let Some(dim_label) = dim_label {
-                dim_label
-            } else if let Some(label) = &variables[0].label {
-                Value::new_user_text(label)
-            } else {
-                Value::new_empty()
-            };
-            if let Some(dim_cell) = dim_cell {
-                dimension_label.set_cell_style(dim_cell);
-            }
-            if let Some(dim_font) = dim_font {
-                dimension_label.set_font_style(dim_font);
-            }
-
-            let dimension = Dimension::new(
-                Group::new(dimension_label)
-                    .with_multiple(cats.into_iter().map(|cb| cb.category))
-                    .with_show_label(show_label),
-            )
-            .with_hide_all_labels(hide_all_labels);
-
-            for variable in &variables {
-                variable.dimension_index.set(Some(dims.len()));
-            }
-            dims.push(Dim {
-                axis: a,
-                dimension,
-                coordinate: variables[0],
-            });
-        }
-
-        fn decode_dimensions<'a, 'b>(
-            variables: impl IntoIterator<Item = &'a str>,
-            series: &'b BTreeMap<&str, Series>,
-            axes: &HashMap<usize, &Axis>,
-            styles: &HashMap<&str, &Style>,
-            a: Axis3,
-            footnotes: &pivot::Footnotes,
-            level_ofs: &mut usize,
-            dims: &mut Vec<Dim<'b>>,
-        ) -> Vec<&'b Series> {
-            let variables = variables
-                .into_iter()
-                .zip(*level_ofs..)
-                .map(|(variable_name, level)| {
-                    series
-                        .get(variable_name)
-                        .filter(|s| !s.values.is_empty())
-                        .map(|s| (s, level))
-                })
-                .collect::<Vec<_>>();
-            *level_ofs += variables.len();
-            let mut dim_vars = Vec::new();
-            let mut categorical_vars = Vec::new();
-            for var in variables {
-                if let Some((var, level)) = var {
-                    dim_vars.push((var, level));
-                } else if !dim_vars.is_empty() {
-                    categorical_vars.push(dim_vars[0].0);
-                    decode_dimension(&dim_vars, axes, styles, a, footnotes, dims);
-                    dim_vars.clear();
-                }
-            }
-            if !dim_vars.is_empty() {
-                categorical_vars.push(&dim_vars[0].0);
-                decode_dimension(&dim_vars, axes, styles, a, footnotes, dims);
-            }
-            categorical_vars
-        }
-
-        struct Dim<'a> {
-            axis: Axis3,
-            dimension: pivot::Dimension,
-            coordinate: &'a Series,
-        }
-
-        let axes = graph
-            .facet_layout
-            .children
-            .iter()
-            .filter_map(|child| child.facet_level())
-            .map(|facet_level| (facet_level.level, &facet_level.axis))
-            .collect::<HashMap<_, _>>();
-        let styles = self
-            .children
-            .iter()
-            .filter_map(|child| child.style())
-            .filter_map(|style| style.id.as_ref().map(|id| (id.as_str(), style)))
-            .collect::<HashMap<_, _>>();
-        let mut dims = Vec::new();
-        let mut level_ofs = 1;
-        let dim_series = graph.faceting.dimensions().map(|axis, dimension| {
-            decode_dimensions(
-                dimension.iter().copied(),
-                &series,
-                &axes,
-                &styles,
-                axis,
-                &footnotes,
-                &mut level_ofs,
-                &mut dims,
-            )
-        });
-
-        let current_layer = dim_series[Axis3::Z]
-            .iter()
-            .map(|series| {
-                let name = &series.name;
-                let coordinate = graph.faceting.layer_value(&name).unwrap();
-                series
-                    .coordinate_to_index
-                    .borrow()
-                    .get(&coordinate)
-                    .unwrap()
-                    .as_leaf()
-                    .unwrap()
-            })
-            .collect::<Vec<_>>();
+        let (mut dims, current_layer) = self.decode_dimensions(graph, &series, &footnotes);
 
         let cell_footnotes = graph
             .interval
             .footnotes()
             .and_then(|footnotes| series.get(footnotes.variable.as_str()));
-        let mut data = HashMap::new();
-        if let Some(cell) = series.get("cell") {
-            let mut coords = Vec::with_capacity(dims.len());
-            let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series);
-            'outer: for (i, cell) in cell.values.iter().enumerate() {
-                coords.clear();
-                for dim in &dims {
-                    if let Some(coordinate) = dim.coordinate.values.get(i)
-                        && let Some(coordinate) = coordinate.category()
-                        && let Some(locator) =
-                            dim.coordinate.coordinate_to_index.borrow().get(&coordinate)
-                        && let Some(index) = locator.as_leaf()
-                    {
-                        coords.push(index);
-                    } else {
-                        // XXX warn
-                        continue 'outer;
-                    }
-                }
-
-                let format = if let Some(cell_formats) = &cell_formats
-                    && let Some(value) = cell_formats.values.get(i)
-                {
-                    value.as_format(&format_map)
-                } else {
-                    F40_2
-                };
-                let mut value = cell.as_pivot_value(format);
-
-                if let Some(cell_footnotes) = &cell_footnotes
-                    && let Some(dv) = cell_footnotes.values.get(i)
-                {
-                    if let Some(s) = dv.value.as_string() {
-                        for part in s.split(',') {
-                            if let Ok(index) = part.parse::<usize>()
-                                && let Some(index) = index.checked_sub(1)
-                                && let Some(footnote) = footnotes.get(index)
-                            {
-                                value.add_footnote(footnote);
-                            }
-                        }
-                    }
-                }
-
-                if let Some(datum) = value.datum()
-                    && datum.is_sysmis()
-                    && value.footnotes().is_empty()
-                {
-                    // A system-missing value without a footnote represents an empty cell.
-                } else {
-                    data.insert(coords.clone(), value);
-                }
-            }
-        } else {
-            warn(LegacyXmlWarning::MissingData);
-        }
+        let mut data = self.decode_data(graph, &footnotes, cell_footnotes, &dims, &series, warn);
 
         for scp in graph
             .facet_layout
@@ -2212,3 +2044,197 @@ struct Container {
     #[serde(default)]
     label_frames: Vec<LabelFrame>,
 }
+fn decode_dimension<'a>(
+    variables: &[(&'a Series, usize)],
+    axes: &HashMap<usize, &Axis>,
+    styles: &HashMap<&str, &Style>,
+    a: Axis3,
+
+    footnotes: &pivot::Footnotes,
+    dims: &mut Vec<Dim<'a>>,
+) {
+    let base_level = variables[0].1;
+    let (show_label, dim_cell, dim_font, dim_label) = if let Ok(a) = Axis2::try_from(a)
+        && let Some(axis) = axes.get(&(base_level + variables.len()))
+        && let Some(label) = &axis.label
+    {
+        let mut dimension_style = AreaStyle::default_for_area(Area::Labels(a));
+        let style = label.style.get(&styles);
+        let fg = style;
+        let bg = label.text_frame_style.as_ref().and_then(|r| r.get(styles));
+        (
+            style.is_some_and(|s| s.visible.unwrap_or(true)),
+            Style::decode_cell_style(fg, &mut dimension_style.cell_style)
+                .then_some(dimension_style.cell_style),
+            Style::decode_font_style(fg, bg, &mut dimension_style.font_style)
+                .then_some(dimension_style.font_style),
+            LabelFrame::decode_label(&[label], footnotes),
+        )
+    } else {
+        (false, None, None, None)
+    };
+
+    let hide_all_labels = if let Some(axis) = axes.get(&base_level)
+        && let Some(style) = axis.major_ticks.style.get(styles)
+        && style.visible == Some(false)
+    {
+        true
+    } else {
+        false
+    };
+
+    let variables = variables
+        .into_iter()
+        .map(|(series, _level)| *series)
+        .collect::<Vec<_>>();
+
+    #[derive(Clone, Debug)]
+    struct CatBuilder {
+        /// The category we've built so far.
+        category: Category,
+
+        /// The index in the series of one example of this category.
+        index: usize,
+
+        /// The range of leaf indexes covered by `category`.
+        ///
+        /// If `category` is a leaf, the range has a length of 1.
+        /// If `category` is a group, the length is at least 1.
+        leaves: Range<usize>,
+
+        /// How to find this category in its dimension.
+        location: CategoryLocator,
+    }
+
+    // Make leaf categories.
+    let mut map = BTreeMap::new();
+    for (index, value) in variables[0].values.iter().enumerate() {
+        if let Some(coordinate) = value.category() {
+            map.entry(coordinate).or_insert(index);
+        }
+    }
+    let mut coordinate_to_index = BTreeMap::new();
+    let mut cats = Vec::<CatBuilder>::new();
+    for (coordinate, index) in map {
+        let value = &variables[0].values[index];
+        coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len()));
+        cats.push(CatBuilder {
+            category: Category::from(Leaf::new(variables[0].new_name(value, footnotes))),
+            index,
+            leaves: cats.len()..cats.len() + 1,
+            location: CategoryLocator::new_leaf(cats.len()),
+        });
+    }
+    *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index;
+
+    // Now group them, in one pass per grouping variable, innermost first.
+    for variable in &variables[1..] {
+        let mut coordinate_to_index = BTreeMap::new();
+        let mut next_cats = Vec::with_capacity(cats.len());
+        let mut start = 0;
+        for end in 1..=cats.len() {
+            let dv1 = &variable.values[cats[start].index];
+            if end >= cats.len() || &variable.values[cats[end].index].value != &dv1.value {
+                let name = variable.map.lookup(dv1);
+                if name.is_number_or(|s| !s.is_empty()) {
+                    let name = variable.new_name(dv1, footnotes);
+                    let mut group = Group::new(name);
+                    for i in start..end {
+                        group.push(cats[i].category.clone());
+                    }
+                    let next_cat = CatBuilder {
+                        category: Category::from(group),
+                        index: cats[start].index,
+                        leaves: cats[start].leaves.start..cats[end - 1].leaves.end,
+                        location: cats[start].location.parent(),
+                    };
+                    coordinate_to_index
+                        .insert(dv1.category().unwrap() /*XXX?*/, next_cat.location);
+                    next_cats.push(next_cat);
+                } else {
+                    // XXX coordinate_to_index?
+                    for cat in &cats[start..end] {
+                        next_cats.push(cat.clone());
+                    }
+                };
+                start = end;
+            }
+        }
+        *variable.coordinate_to_index.borrow_mut() = coordinate_to_index;
+        cats = next_cats;
+    }
+
+    let mut dimension_label = if let Some(dim_label) = dim_label {
+        dim_label
+    } else if let Some(label) = &variables[0].label {
+        Value::new_user_text(label)
+    } else {
+        Value::new_empty()
+    };
+    if let Some(dim_cell) = dim_cell {
+        dimension_label.set_cell_style(dim_cell);
+    }
+    if let Some(dim_font) = dim_font {
+        dimension_label.set_font_style(dim_font);
+    }
+
+    let dimension = Dimension::new(
+        Group::new(dimension_label)
+            .with_multiple(cats.into_iter().map(|cb| cb.category))
+            .with_show_label(show_label),
+    )
+    .with_hide_all_labels(hide_all_labels);
+
+    for variable in &variables {
+        variable.dimension_index.set(Some(dims.len()));
+    }
+    dims.push(Dim {
+        axis: a,
+        dimension,
+        coordinate: variables[0],
+    });
+}
+
+fn decode_axis_dimensions<'a, 'b>(
+    variables: impl IntoIterator<Item = &'a str>,
+    series: &'b BTreeMap<&str, Series>,
+    axes: &HashMap<usize, &Axis>,
+    styles: &HashMap<&str, &Style>,
+    a: Axis3,
+    footnotes: &pivot::Footnotes,
+    level_ofs: usize,
+    dims: &mut Vec<Dim<'b>>,
+) -> Vec<&'b Series> {
+    let variables = variables
+        .into_iter()
+        .zip(level_ofs..)
+        .map(|(variable_name, level)| {
+            series
+                .get(variable_name)
+                .filter(|s| !s.values.is_empty())
+                .map(|s| (s, level))
+        })
+        .collect::<Vec<_>>();
+    let mut dim_vars = Vec::new();
+    let mut categorical_vars = Vec::new();
+    for var in variables {
+        if let Some((var, level)) = var {
+            dim_vars.push((var, level));
+        } else if !dim_vars.is_empty() {
+            categorical_vars.push(dim_vars[0].0);
+            decode_dimension(&dim_vars, axes, styles, a, footnotes, dims);
+            dim_vars.clear();
+        }
+    }
+    if !dim_vars.is_empty() {
+        categorical_vars.push(&dim_vars[0].0);
+        decode_dimension(&dim_vars, axes, styles, a, footnotes, dims);
+    }
+    categorical_vars
+}
+
+struct Dim<'a> {
+    axis: Axis3,
+    dimension: pivot::Dimension,
+    coordinate: &'a Series,
+}