From 6f98a46d3fd5da6cd7d3370e8130ca381030dedf Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 2 Jan 2026 12:57:52 -0800 Subject: [PATCH] cleanup --- rust/pspp/src/spv/read/legacy_xml.rs | 616 ++++++++++++++------------- 1 file changed, 321 insertions(+), 295 deletions(-) diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 51e4480e67..ad02c37f65 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -308,6 +308,131 @@ impl Visualization { pivot::Footnotes::from_iter(footnotes) } + fn decode_dimensions<'a>( + &self, + graph: &Graph, + series: &'a BTreeMap<&str, Series>, + footnotes: &pivot::Footnotes, + ) -> (Vec>, Vec) { + let axes = graph + .facet_layout + .children + .iter() + .filter_map(|child| child.facet_level()) + .map(|facet_level| (facet_level.level, &facet_level.axis)) + .collect::>(); + let styles = self + .children + .iter() + .filter_map(|child| child.style()) + .filter_map(|style| style.id.as_ref().map(|id| (id.as_str(), style))) + .collect::>(); + let mut dims = Vec::new(); + let mut level_ofs = 1; + let mut current_layer = Vec::new(); + + for (axis, dimension) in graph.faceting.dimensions() { + let dim_series = decode_axis_dimensions( + dimension.iter().copied(), + &series, + &axes, + &styles, + axis, + &footnotes, + level_ofs, + &mut dims, + ); + if axis == Axis3::Z { + current_layer = dim_series + .into_iter() + .map(|series| { + let name = &series.name; + let coordinate = graph.faceting.layer_value(&name).unwrap(); + series + .coordinate_to_index + .borrow() + .get(&coordinate) + .unwrap() + .as_leaf() + .unwrap() + }) + .collect(); + } + level_ofs += dimension.len(); + } + + (dims, current_layer) + } + + fn decode_data( + &self, + graph: &Graph, + footnotes: &pivot::Footnotes, + cell_footnotes: Option<&Series>, + dims: &[Dim], + series: &BTreeMap<&str, Series>, + warn: &mut dyn FnMut(LegacyXmlWarning), + ) -> HashMap, Value> { + let Some(cell) = series.get("cell") else { + warn(LegacyXmlWarning::MissingData); + return HashMap::default(); + }; + + let mut data = HashMap::new(); + let mut coords = Vec::with_capacity(dims.len()); + let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series); + 'outer: for (i, cell) in cell.values.iter().enumerate() { + coords.clear(); + for dim in dims { + if let Some(coordinate) = dim.coordinate.values.get(i) + && let Some(coordinate) = coordinate.category() + && let Some(locator) = + dim.coordinate.coordinate_to_index.borrow().get(&coordinate) + && let Some(index) = locator.as_leaf() + { + coords.push(index); + } else { + // XXX warn + continue 'outer; + } + } + + let format = if let Some(cell_formats) = &cell_formats + && let Some(value) = cell_formats.values.get(i) + { + value.as_format(&format_map) + } else { + F40_2 + }; + let mut value = cell.as_pivot_value(format); + + if let Some(cell_footnotes) = &cell_footnotes + && let Some(dv) = cell_footnotes.values.get(i) + { + if let Some(s) = dv.value.as_string() { + for part in s.split(',') { + if let Ok(index) = part.parse::() + && let Some(index) = index.checked_sub(1) + && let Some(footnote) = footnotes.get(index) + { + value.add_footnote(footnote); + } + } + } + } + + if let Some(datum) = value.datum() + && datum.is_sysmis() + && value.footnotes().is_empty() + { + // A system-missing value without a footnote represents an empty cell. + } else { + data.insert(coords.clone(), value); + } + } + data + } + pub fn decode( &self, data: IndexMap>>, @@ -354,306 +479,13 @@ impl Visualization { let caption = LabelFrame::decode_label(caption_labels, &footnotes); let series = self.decode_series(data, warn); - - fn decode_dimension<'a>( - variables: &[(&'a Series, usize)], - axes: &HashMap, - styles: &HashMap<&str, &Style>, - a: Axis3, - - footnotes: &pivot::Footnotes, - dims: &mut Vec>, - ) { - let base_level = variables[0].1; - let (show_label, dim_cell, dim_font, dim_label) = if let Ok(a) = Axis2::try_from(a) - && let Some(axis) = axes.get(&(base_level + variables.len())) - && let Some(label) = &axis.label - { - let mut dimension_style = AreaStyle::default_for_area(Area::Labels(a)); - let style = label.style.get(&styles); - let fg = style; - let bg = label.text_frame_style.as_ref().and_then(|r| r.get(styles)); - ( - style.is_some_and(|s| s.visible.unwrap_or(true)), - Style::decode_cell_style(fg, &mut dimension_style.cell_style) - .then_some(dimension_style.cell_style), - Style::decode_font_style(fg, bg, &mut dimension_style.font_style) - .then_some(dimension_style.font_style), - LabelFrame::decode_label(&[label], footnotes), - ) - } else { - (false, None, None, None) - }; - - let hide_all_labels = if let Some(axis) = axes.get(&base_level) - && let Some(style) = axis.major_ticks.style.get(styles) - && style.visible == Some(false) - { - true - } else { - false - }; - - let variables = variables - .into_iter() - .map(|(series, _level)| *series) - .collect::>(); - - #[derive(Clone, Debug)] - struct CatBuilder { - /// The category we've built so far. - category: Category, - - /// The index in the series of one example of this category. - index: usize, - - /// The range of leaf indexes covered by `category`. - /// - /// If `category` is a leaf, the range has a length of 1. - /// If `category` is a group, the length is at least 1. - leaves: Range, - - /// How to find this category in its dimension. - location: CategoryLocator, - } - - // Make leaf categories. - let mut map = BTreeMap::new(); - for (index, value) in variables[0].values.iter().enumerate() { - if let Some(coordinate) = value.category() { - map.entry(coordinate).or_insert(index); - } - } - let mut coordinate_to_index = BTreeMap::new(); - let mut cats = Vec::::new(); - for (coordinate, index) in map { - let value = &variables[0].values[index]; - coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len())); - cats.push(CatBuilder { - category: Category::from(Leaf::new(variables[0].new_name(value, footnotes))), - index, - leaves: cats.len()..cats.len() + 1, - location: CategoryLocator::new_leaf(cats.len()), - }); - } - *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index; - - // Now group them, in one pass per grouping variable, innermost first. - for variable in &variables[1..] { - let mut coordinate_to_index = BTreeMap::new(); - let mut next_cats = Vec::with_capacity(cats.len()); - let mut start = 0; - for end in 1..=cats.len() { - let dv1 = &variable.values[cats[start].index]; - if end >= cats.len() || &variable.values[cats[end].index].value != &dv1.value { - let name = variable.map.lookup(dv1); - if name.is_number_or(|s| !s.is_empty()) { - let name = variable.new_name(dv1, footnotes); - let mut group = Group::new(name); - for i in start..end { - group.push(cats[i].category.clone()); - } - let next_cat = CatBuilder { - category: Category::from(group), - index: cats[start].index, - leaves: cats[start].leaves.start..cats[end - 1].leaves.end, - location: cats[start].location.parent(), - }; - coordinate_to_index - .insert(dv1.category().unwrap() /*XXX?*/, next_cat.location); - next_cats.push(next_cat); - } else { - // XXX coordinate_to_index? - for cat in &cats[start..end] { - next_cats.push(cat.clone()); - } - }; - start = end; - } - } - *variable.coordinate_to_index.borrow_mut() = coordinate_to_index; - cats = next_cats; - } - - let mut dimension_label = if let Some(dim_label) = dim_label { - dim_label - } else if let Some(label) = &variables[0].label { - Value::new_user_text(label) - } else { - Value::new_empty() - }; - if let Some(dim_cell) = dim_cell { - dimension_label.set_cell_style(dim_cell); - } - if let Some(dim_font) = dim_font { - dimension_label.set_font_style(dim_font); - } - - let dimension = Dimension::new( - Group::new(dimension_label) - .with_multiple(cats.into_iter().map(|cb| cb.category)) - .with_show_label(show_label), - ) - .with_hide_all_labels(hide_all_labels); - - for variable in &variables { - variable.dimension_index.set(Some(dims.len())); - } - dims.push(Dim { - axis: a, - dimension, - coordinate: variables[0], - }); - } - - fn decode_dimensions<'a, 'b>( - variables: impl IntoIterator, - series: &'b BTreeMap<&str, Series>, - axes: &HashMap, - styles: &HashMap<&str, &Style>, - a: Axis3, - footnotes: &pivot::Footnotes, - level_ofs: &mut usize, - dims: &mut Vec>, - ) -> Vec<&'b Series> { - let variables = variables - .into_iter() - .zip(*level_ofs..) - .map(|(variable_name, level)| { - series - .get(variable_name) - .filter(|s| !s.values.is_empty()) - .map(|s| (s, level)) - }) - .collect::>(); - *level_ofs += variables.len(); - let mut dim_vars = Vec::new(); - let mut categorical_vars = Vec::new(); - for var in variables { - if let Some((var, level)) = var { - dim_vars.push((var, level)); - } else if !dim_vars.is_empty() { - categorical_vars.push(dim_vars[0].0); - decode_dimension(&dim_vars, axes, styles, a, footnotes, dims); - dim_vars.clear(); - } - } - if !dim_vars.is_empty() { - categorical_vars.push(&dim_vars[0].0); - decode_dimension(&dim_vars, axes, styles, a, footnotes, dims); - } - categorical_vars - } - - struct Dim<'a> { - axis: Axis3, - dimension: pivot::Dimension, - coordinate: &'a Series, - } - - let axes = graph - .facet_layout - .children - .iter() - .filter_map(|child| child.facet_level()) - .map(|facet_level| (facet_level.level, &facet_level.axis)) - .collect::>(); - let styles = self - .children - .iter() - .filter_map(|child| child.style()) - .filter_map(|style| style.id.as_ref().map(|id| (id.as_str(), style))) - .collect::>(); - let mut dims = Vec::new(); - let mut level_ofs = 1; - let dim_series = graph.faceting.dimensions().map(|axis, dimension| { - decode_dimensions( - dimension.iter().copied(), - &series, - &axes, - &styles, - axis, - &footnotes, - &mut level_ofs, - &mut dims, - ) - }); - - let current_layer = dim_series[Axis3::Z] - .iter() - .map(|series| { - let name = &series.name; - let coordinate = graph.faceting.layer_value(&name).unwrap(); - series - .coordinate_to_index - .borrow() - .get(&coordinate) - .unwrap() - .as_leaf() - .unwrap() - }) - .collect::>(); + let (mut dims, current_layer) = self.decode_dimensions(graph, &series, &footnotes); let cell_footnotes = graph .interval .footnotes() .and_then(|footnotes| series.get(footnotes.variable.as_str())); - let mut data = HashMap::new(); - if let Some(cell) = series.get("cell") { - let mut coords = Vec::with_capacity(dims.len()); - let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series); - 'outer: for (i, cell) in cell.values.iter().enumerate() { - coords.clear(); - for dim in &dims { - if let Some(coordinate) = dim.coordinate.values.get(i) - && let Some(coordinate) = coordinate.category() - && let Some(locator) = - dim.coordinate.coordinate_to_index.borrow().get(&coordinate) - && let Some(index) = locator.as_leaf() - { - coords.push(index); - } else { - // XXX warn - continue 'outer; - } - } - - let format = if let Some(cell_formats) = &cell_formats - && let Some(value) = cell_formats.values.get(i) - { - value.as_format(&format_map) - } else { - F40_2 - }; - let mut value = cell.as_pivot_value(format); - - if let Some(cell_footnotes) = &cell_footnotes - && let Some(dv) = cell_footnotes.values.get(i) - { - if let Some(s) = dv.value.as_string() { - for part in s.split(',') { - if let Ok(index) = part.parse::() - && let Some(index) = index.checked_sub(1) - && let Some(footnote) = footnotes.get(index) - { - value.add_footnote(footnote); - } - } - } - } - - if let Some(datum) = value.datum() - && datum.is_sysmis() - && value.footnotes().is_empty() - { - // A system-missing value without a footnote represents an empty cell. - } else { - data.insert(coords.clone(), value); - } - } - } else { - warn(LegacyXmlWarning::MissingData); - } + let mut data = self.decode_data(graph, &footnotes, cell_footnotes, &dims, &series, warn); for scp in graph .facet_layout @@ -2212,3 +2044,197 @@ struct Container { #[serde(default)] label_frames: Vec, } +fn decode_dimension<'a>( + variables: &[(&'a Series, usize)], + axes: &HashMap, + styles: &HashMap<&str, &Style>, + a: Axis3, + + footnotes: &pivot::Footnotes, + dims: &mut Vec>, +) { + let base_level = variables[0].1; + let (show_label, dim_cell, dim_font, dim_label) = if let Ok(a) = Axis2::try_from(a) + && let Some(axis) = axes.get(&(base_level + variables.len())) + && let Some(label) = &axis.label + { + let mut dimension_style = AreaStyle::default_for_area(Area::Labels(a)); + let style = label.style.get(&styles); + let fg = style; + let bg = label.text_frame_style.as_ref().and_then(|r| r.get(styles)); + ( + style.is_some_and(|s| s.visible.unwrap_or(true)), + Style::decode_cell_style(fg, &mut dimension_style.cell_style) + .then_some(dimension_style.cell_style), + Style::decode_font_style(fg, bg, &mut dimension_style.font_style) + .then_some(dimension_style.font_style), + LabelFrame::decode_label(&[label], footnotes), + ) + } else { + (false, None, None, None) + }; + + let hide_all_labels = if let Some(axis) = axes.get(&base_level) + && let Some(style) = axis.major_ticks.style.get(styles) + && style.visible == Some(false) + { + true + } else { + false + }; + + let variables = variables + .into_iter() + .map(|(series, _level)| *series) + .collect::>(); + + #[derive(Clone, Debug)] + struct CatBuilder { + /// The category we've built so far. + category: Category, + + /// The index in the series of one example of this category. + index: usize, + + /// The range of leaf indexes covered by `category`. + /// + /// If `category` is a leaf, the range has a length of 1. + /// If `category` is a group, the length is at least 1. + leaves: Range, + + /// How to find this category in its dimension. + location: CategoryLocator, + } + + // Make leaf categories. + let mut map = BTreeMap::new(); + for (index, value) in variables[0].values.iter().enumerate() { + if let Some(coordinate) = value.category() { + map.entry(coordinate).or_insert(index); + } + } + let mut coordinate_to_index = BTreeMap::new(); + let mut cats = Vec::::new(); + for (coordinate, index) in map { + let value = &variables[0].values[index]; + coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len())); + cats.push(CatBuilder { + category: Category::from(Leaf::new(variables[0].new_name(value, footnotes))), + index, + leaves: cats.len()..cats.len() + 1, + location: CategoryLocator::new_leaf(cats.len()), + }); + } + *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index; + + // Now group them, in one pass per grouping variable, innermost first. + for variable in &variables[1..] { + let mut coordinate_to_index = BTreeMap::new(); + let mut next_cats = Vec::with_capacity(cats.len()); + let mut start = 0; + for end in 1..=cats.len() { + let dv1 = &variable.values[cats[start].index]; + if end >= cats.len() || &variable.values[cats[end].index].value != &dv1.value { + let name = variable.map.lookup(dv1); + if name.is_number_or(|s| !s.is_empty()) { + let name = variable.new_name(dv1, footnotes); + let mut group = Group::new(name); + for i in start..end { + group.push(cats[i].category.clone()); + } + let next_cat = CatBuilder { + category: Category::from(group), + index: cats[start].index, + leaves: cats[start].leaves.start..cats[end - 1].leaves.end, + location: cats[start].location.parent(), + }; + coordinate_to_index + .insert(dv1.category().unwrap() /*XXX?*/, next_cat.location); + next_cats.push(next_cat); + } else { + // XXX coordinate_to_index? + for cat in &cats[start..end] { + next_cats.push(cat.clone()); + } + }; + start = end; + } + } + *variable.coordinate_to_index.borrow_mut() = coordinate_to_index; + cats = next_cats; + } + + let mut dimension_label = if let Some(dim_label) = dim_label { + dim_label + } else if let Some(label) = &variables[0].label { + Value::new_user_text(label) + } else { + Value::new_empty() + }; + if let Some(dim_cell) = dim_cell { + dimension_label.set_cell_style(dim_cell); + } + if let Some(dim_font) = dim_font { + dimension_label.set_font_style(dim_font); + } + + let dimension = Dimension::new( + Group::new(dimension_label) + .with_multiple(cats.into_iter().map(|cb| cb.category)) + .with_show_label(show_label), + ) + .with_hide_all_labels(hide_all_labels); + + for variable in &variables { + variable.dimension_index.set(Some(dims.len())); + } + dims.push(Dim { + axis: a, + dimension, + coordinate: variables[0], + }); +} + +fn decode_axis_dimensions<'a, 'b>( + variables: impl IntoIterator, + series: &'b BTreeMap<&str, Series>, + axes: &HashMap, + styles: &HashMap<&str, &Style>, + a: Axis3, + footnotes: &pivot::Footnotes, + level_ofs: usize, + dims: &mut Vec>, +) -> Vec<&'b Series> { + let variables = variables + .into_iter() + .zip(level_ofs..) + .map(|(variable_name, level)| { + series + .get(variable_name) + .filter(|s| !s.values.is_empty()) + .map(|s| (s, level)) + }) + .collect::>(); + let mut dim_vars = Vec::new(); + let mut categorical_vars = Vec::new(); + for var in variables { + if let Some((var, level)) = var { + dim_vars.push((var, level)); + } else if !dim_vars.is_empty() { + categorical_vars.push(dim_vars[0].0); + decode_dimension(&dim_vars, axes, styles, a, footnotes, dims); + dim_vars.clear(); + } + } + if !dim_vars.is_empty() { + categorical_vars.push(&dim_vars[0].0); + decode_dimension(&dim_vars, axes, styles, a, footnotes, dims); + } + categorical_vars +} + +struct Dim<'a> { + axis: Axis3, + dimension: pivot::Dimension, + coordinate: &'a Series, +} -- 2.30.2