From: Ben Pfaff Date: Sun, 4 Jan 2026 00:58:35 +0000 (-0800) Subject: cleanup X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8bdc7086b53ad1e3037bfced2d40894620e3c76a;p=pspp cleanup --- diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 08bf53706e..fe5bfd8e9d 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -335,417 +335,139 @@ impl Visualization { (dims, current_layer) } - fn decode_data( + fn graph(&self) -> Result<&Graph, super::Error> { + for child in &self.children { + match child { + VisChild::Graph(g) => return Ok(g), + _ => (), + } + } + Err(super::Error::LegacyMissingGraph) + } + + pub fn decode( &self, - graph: &Graph, - footnotes: &pivot::Footnotes, - dims: &[Dim], - series: &BTreeMap<&str, Series>, + binary_data: IndexMap>>>, + look: Look, warn: &mut dyn FnMut(LegacyXmlWarning), - ) -> HashMap, Value> { - let Some(cell) = series.get("cell") else { - warn(LegacyXmlWarning::MissingData); - return HashMap::default(); - }; + ) -> Result { + let graph = self.graph()?; + let (title, caption, footnotes) = self.decode_labels(graph); - let markers = if let Some(markers) = graph.interval.footnotes(false) - && let Some(series) = series.get(markers.variable.as_str()) - { - Some(( - series, - markers - .mappings - .iter() - .map(|m| (m.from.get(), m.to.as_str())) - .collect::>(), - )) - } else { - None - }; + let series = self.decode_series(binary_data, warn); + let (mut dims, current_layer) = self.decode_dimensions(graph, &series, &footnotes); - let mut data = HashMap::new(); - let mut coords = Vec::with_capacity(dims.len()); - let cell_formats = graph.interval.labeling.decode_format_map(&series); - let cell_footnotes = series.get("footnotes"); - 'outer: for (i, cell) in cell.values.iter().enumerate() { - coords.clear(); - for dim in dims { - if let Some(Some(coordinate)) = dim.coordinate.categories.get(i) - && let Some(locator) = - dim.coordinate.coordinate_to_index.borrow().get(&coordinate) - && let Some(index) = locator.as_leaf() - { - coords.push(index); - } else { - // XXX warn - continue 'outer; - } - } + let mut data = graph.decode_data(&footnotes, &dims, &series, warn); + graph.decode_styles(&look, &series, &mut dims, &mut data, &footnotes, warn); - let format = if let Some(cell_formats) = &cell_formats - && let Some(value) = cell_formats.values.get(i) - { - datum_as_format(value) - } else { - F40_2 - }; - let mut value = datum_as_pivot_value(cell, format); + Ok(PivotTable::new( + dims.into_iter() + .map(|dim| (dim.axis, dim.dimension)) + .collect::>(), + ) + .with_look(Arc::new(look)) + .with_footnotes(footnotes) + .with_data(data) + .with_layer(¤t_layer) + .with_decimal(Decimal::for_lang(&self.lang)) + .with_date(self.decode_date(warn)) + .with_optional_title(title) + .with_optional_caption(caption)) + } +} - if let Some(cell_footnotes) = &cell_footnotes - && let Some(dv) = cell_footnotes.values.get(i) - { - if let Some(s) = dv.as_string() { - for part in s.split(',') { - if let Ok(index) = part.parse::() - && let Some(index) = index.checked_sub(1) - && let Some(footnote) = footnotes.get(index) - { - value.add_footnote(footnote); - } - } - } - } - if let Some((series, mappings)) = &markers - && let Some(Some(category)) = series.categories.get(i) - && let Some(marker) = mappings.get(category) - { - value.add_subscript(*marker); - } +pub struct Series { + pub name: String, + label: Option, + categories: Vec>, + pub values: Vec>, + affixes: Vec, + coordinate_to_index: RefCell>, + dimension_index: Cell>, +} - if !value.is_empty() { - data.insert(coords.clone(), value); +impl Debug for Series { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Series") + .field("name", &self.name) + .finish_non_exhaustive() + } +} + +impl Series { + fn new(name: String, categories: Vec>, values: Vec>) -> Self { + Self { + name, + label: None, + categories, + values, + affixes: Vec::new(), + coordinate_to_index: Default::default(), + dimension_index: Default::default(), + } + } + fn with_label(self, label: Option) -> Self { + Self { label, ..self } + } + fn with_affixes(self, affixes: Vec) -> Self { + Self { affixes, ..self } + } + + fn new_name(&self, datum: &Datum, footnotes: &pivot::Footnotes) -> Value { + let mut name = Value::new_datum(datum); + for affix in &self.affixes { + if let Some(footnote) = footnotes.get(affix.defines_reference.get() - 1) { + name.add_footnote(footnote); } } - data + name } +} - fn decode_styles( - graph: &Graph, - look: &Look, - series: &BTreeMap<&str, Series>, - dims: &mut [Dim], - data: &mut HashMap, Value>, - footnotes: &pivot::Footnotes, +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +enum VisChild { + SourceVariable(SourceVariable), + DerivedVariable(DerivedVariable), + Graph(Graph), + LabelFrame(LabelFrame), + Container(Container), + Style(Style), + #[serde(other)] + Other, +} + +impl VisChild { + fn variable(&self) -> Option<&dyn Variable> { + match self { + VisChild::SourceVariable(source_variable) => Some(source_variable), + VisChild::DerivedVariable(derived_variable) => Some(derived_variable), + _ => None, + } + } + fn style(&self) -> Option<&Style> { + match self { + Self::Style(style) => Some(style), + _ => None, + } + } +} + +trait Variable { + fn decode<'a>( + &'a self, + data: &IndexMap>>>, + series: &mut BTreeMap<&'a str, Series>, warn: &mut dyn FnMut(LegacyXmlWarning), - ) { - let has_cell_footnotes = series.contains_key("footnotes"); - for scp in graph - .facet_layout - .children - .iter() - .filter_map(|child| child.set_cell_properties()) - { - #[derive(Copy, Clone, Debug, PartialEq)] - enum TargetType { - Labeling, - MajorTicks, - } + ) -> bool; + fn name(&self) -> &str; +} - impl TargetType { - fn from_id(target: &str) -> Option { - if target == "labeling" { - Some(Self::Labeling) - } else if target.ends_with("majorTicks") { - Some(Self::MajorTicks) - } else { - None - } - } - } - - #[derive(Debug)] - struct Target<'a> { - sf: &'a SetFormat, - target_type: TargetType, - } - impl<'a> Target<'a> { - fn decode( - &self, - intersect: &Intersect, - look: &Look, - series: &BTreeMap<&str, Series>, - dims: &mut [Dim], - data: &mut HashMap, Value>, - footnotes: &pivot::Footnotes, - has_cell_footnotes: bool, - ) { - match self.target_type { - TargetType::MajorTicks => { - // Formatting for individual row or column labels. - for w in intersect.wheres() { - let Some(s) = series.get(w.variable.as_str()) else { - continue; - }; - let Some(dim_index) = s.dimension_index.get() else { - continue; - }; - let dimension = &mut dims[dim_index].dimension; - let Ok(axis) = Axis2::try_from(dims[dim_index].axis) else { - continue; - }; - for index in - w.include.split(';').filter_map(|s| s.parse::().ok()) - { - if let Some(locator) = - s.coordinate_to_index.borrow().get(&index).copied() - && let Some(category) = dimension.root.category_mut(locator) - { - Style::apply_to_value( - category.name_mut(), - Some(&self.sf), - None, - None, - &look.areas[Area::Labels(axis)], - footnotes, - has_cell_footnotes, - ); - } - } - } - } - TargetType::Labeling => { - // Formatting for individual cells or groups of them - // with some dimensions in common. - let mut include = vec![HashSet::new(); dims.len()]; - for w in intersect.wheres() { - let Some(s) = series.get(w.variable.as_str()) else { - continue; - }; - let Some(dim_index) = s.dimension_index.get() else { - // Group indexes may be included even though - // they are redundant. Ignore them. - continue; - }; - for index in - w.include.split(';').filter_map(|s| s.parse::().ok()) - { - if let Some(locator) = - s.coordinate_to_index.borrow().get(&index).copied() - && let Some(leaf_index) = locator.as_leaf() - { - include[dim_index].insert(leaf_index); - } - } - } - - // XXX This is inefficient in the common case where - // all of the dimensions are matched. We should use - // a heuristic where if all of the dimensions are - // matched and the product of n[*] is less than the - // number of cells then iterate through all the - // possibilities rather than all the cells. Or even - // only do it if there is just one possibility. - for (indexes, value) in data { - let mut skip = false; - for (dimension, index) in indexes.iter().enumerate() { - if !include[dimension].is_empty() - && !include[dimension].contains(index) - { - skip = true; - break; - } - } - if !skip { - Style::apply_to_value( - value, - Some(&self.sf), - None, - None, - &look.areas[Area::Data(RowParity::Even)], - footnotes, - has_cell_footnotes, - ); - } - } - } - } - } - } - - let targets = scp - .sets - .iter() - .filter_map(|set| set.as_set_format()) - .filter_map(|sf| { - TargetType::from_id(&sf.target).map(|target_type| Target { sf, target_type }) - }) - .collect::>(); - - if let Some(union_) = &scp.union_ { - if !scp.apply_to_converse { - for intersect in &union_.intersects { - for target in &targets { - target.decode( - intersect, - &look, - &series, - dims, - data, - &footnotes, - has_cell_footnotes, - ); - } - } - } else { - // Not seen in the corpus. - warn(LegacyXmlWarning::UnsupportedApplyToConverse); - } - } else if scp.apply_to_converse { - for target in &targets { - if target.target_type == TargetType::Labeling { - for value in data.values_mut() { - Style::apply_to_value( - value, - Some(&target.sf), - None, - None, - &look.areas[Area::Data(RowParity::Even)], - &footnotes, - has_cell_footnotes, - ); - } - } - } - } - } - } - - fn graph(&self) -> Result<&Graph, super::Error> { - for child in &self.children { - match child { - VisChild::Graph(g) => return Ok(g), - _ => (), - } - } - Err(super::Error::LegacyMissingGraph) - } - - pub fn decode( - &self, - binary_data: IndexMap>>>, - look: Look, - warn: &mut dyn FnMut(LegacyXmlWarning), - ) -> Result { - let graph = self.graph()?; - let (title, caption, footnotes) = self.decode_labels(graph); - - let series = self.decode_series(binary_data, warn); - let (mut dims, current_layer) = self.decode_dimensions(graph, &series, &footnotes); - - let mut data = self.decode_data(graph, &footnotes, &dims, &series, warn); - - Self::decode_styles( - graph, &look, &series, &mut dims, &mut data, &footnotes, warn, - ); - - Ok(PivotTable::new( - dims.into_iter() - .map(|dim| (dim.axis, dim.dimension)) - .collect::>(), - ) - .with_look(Arc::new(look)) - .with_footnotes(footnotes) - .with_data(data) - .with_layer(¤t_layer) - .with_decimal(Decimal::for_lang(&self.lang)) - .with_date(self.decode_date(warn)) - .with_optional_title(title) - .with_optional_caption(caption)) - } -} - -pub struct Series { - pub name: String, - label: Option, - categories: Vec>, - pub values: Vec>, - affixes: Vec, - coordinate_to_index: RefCell>, - dimension_index: Cell>, -} - -impl Debug for Series { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Series") - .field("name", &self.name) - .finish_non_exhaustive() - } -} - -impl Series { - fn new(name: String, categories: Vec>, values: Vec>) -> Self { - Self { - name, - label: None, - categories, - values, - affixes: Vec::new(), - coordinate_to_index: Default::default(), - dimension_index: Default::default(), - } - } - fn with_label(self, label: Option) -> Self { - Self { label, ..self } - } - fn with_affixes(self, affixes: Vec) -> Self { - Self { affixes, ..self } - } - - fn new_name(&self, datum: &Datum, footnotes: &pivot::Footnotes) -> Value { - let mut name = Value::new_datum(datum); - for affix in &self.affixes { - if let Some(footnote) = footnotes.get(affix.defines_reference.get() - 1) { - name.add_footnote(footnote); - } - } - name - } -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -enum VisChild { - SourceVariable(SourceVariable), - DerivedVariable(DerivedVariable), - Graph(Graph), - LabelFrame(LabelFrame), - Container(Container), - Style(Style), - #[serde(other)] - Other, -} - -impl VisChild { - fn variable(&self) -> Option<&dyn Variable> { - match self { - VisChild::SourceVariable(source_variable) => Some(source_variable), - VisChild::DerivedVariable(derived_variable) => Some(derived_variable), - _ => None, - } - } - fn style(&self) -> Option<&Style> { - match self { - Self::Style(style) => Some(style), - _ => None, - } - } -} - -trait Variable { - fn decode<'a>( - &'a self, - data: &IndexMap>>>, - series: &mut BTreeMap<&'a str, Series>, - warn: &mut dyn FnMut(LegacyXmlWarning), - ) -> bool; - fn name(&self) -> &str; -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -struct SourceVariable { - #[serde(rename = "@id")] - id: String, +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct SourceVariable { + #[serde(rename = "@id")] + id: String, /// The `source-name` in the `tableData.bin` member. #[serde(rename = "@source")] @@ -1618,6 +1340,282 @@ struct Graph { interval: Interval, } +impl Graph { + fn decode_data( + &self, + footnotes: &pivot::Footnotes, + dims: &[Dim], + series: &BTreeMap<&str, Series>, + warn: &mut dyn FnMut(LegacyXmlWarning), + ) -> HashMap, Value> { + let Some(cell) = series.get("cell") else { + warn(LegacyXmlWarning::MissingData); + return HashMap::default(); + }; + + let markers = if let Some(markers) = self.interval.footnotes(false) + && let Some(series) = series.get(markers.variable.as_str()) + { + Some(( + series, + markers + .mappings + .iter() + .map(|m| (m.from.get(), m.to.as_str())) + .collect::>(), + )) + } else { + None + }; + + let mut data = HashMap::new(); + let mut coords = Vec::with_capacity(dims.len()); + let cell_formats = self.interval.labeling.decode_format_map(&series); + let cell_footnotes = series.get("footnotes"); + 'outer: for (i, cell) in cell.values.iter().enumerate() { + coords.clear(); + for dim in dims { + if let Some(Some(coordinate)) = dim.coordinate.categories.get(i) + && let Some(locator) = + dim.coordinate.coordinate_to_index.borrow().get(&coordinate) + && let Some(index) = locator.as_leaf() + { + coords.push(index); + } else { + // XXX warn + continue 'outer; + } + } + + let format = if let Some(cell_formats) = &cell_formats + && let Some(value) = cell_formats.values.get(i) + { + datum_as_format(value) + } else { + F40_2 + }; + let mut value = datum_as_pivot_value(cell, format); + + if let Some(cell_footnotes) = &cell_footnotes + && let Some(dv) = cell_footnotes.values.get(i) + { + if let Some(s) = dv.as_string() { + for part in s.split(',') { + if let Ok(index) = part.parse::() + && let Some(index) = index.checked_sub(1) + && let Some(footnote) = footnotes.get(index) + { + value.add_footnote(footnote); + } + } + } + } + if let Some((series, mappings)) = &markers + && let Some(Some(category)) = series.categories.get(i) + && let Some(marker) = mappings.get(category) + { + value.add_subscript(*marker); + } + + if !value.is_empty() { + data.insert(coords.clone(), value); + } + } + data + } + + fn decode_styles( + &self, + look: &Look, + series: &BTreeMap<&str, Series>, + dims: &mut [Dim], + data: &mut HashMap, Value>, + footnotes: &pivot::Footnotes, + warn: &mut dyn FnMut(LegacyXmlWarning), + ) { + let has_cell_footnotes = series.contains_key("footnotes"); + for scp in self + .facet_layout + .children + .iter() + .filter_map(|child| child.set_cell_properties()) + { + #[derive(Copy, Clone, Debug, PartialEq)] + enum TargetType { + Labeling, + MajorTicks, + } + + impl TargetType { + fn from_id(target: &str) -> Option { + if target == "labeling" { + Some(Self::Labeling) + } else if target.ends_with("majorTicks") { + Some(Self::MajorTicks) + } else { + None + } + } + } + + #[derive(Debug)] + struct Target<'a> { + sf: &'a SetFormat, + target_type: TargetType, + } + impl<'a> Target<'a> { + fn decode( + &self, + intersect: &Intersect, + look: &Look, + series: &BTreeMap<&str, Series>, + dims: &mut [Dim], + data: &mut HashMap, Value>, + footnotes: &pivot::Footnotes, + has_cell_footnotes: bool, + ) { + match self.target_type { + TargetType::MajorTicks => { + // Formatting for individual row or column labels. + for w in intersect.wheres() { + let Some(s) = series.get(w.variable.as_str()) else { + continue; + }; + let Some(dim_index) = s.dimension_index.get() else { + continue; + }; + let dimension = &mut dims[dim_index].dimension; + let Ok(axis) = Axis2::try_from(dims[dim_index].axis) else { + continue; + }; + for index in + w.include.split(';').filter_map(|s| s.parse::().ok()) + { + if let Some(locator) = + s.coordinate_to_index.borrow().get(&index).copied() + && let Some(category) = dimension.root.category_mut(locator) + { + Style::apply_to_value( + category.name_mut(), + Some(&self.sf), + None, + None, + &look.areas[Area::Labels(axis)], + footnotes, + has_cell_footnotes, + ); + } + } + } + } + TargetType::Labeling => { + // Formatting for individual cells or groups of them + // with some dimensions in common. + let mut include = vec![HashSet::new(); dims.len()]; + for w in intersect.wheres() { + let Some(s) = series.get(w.variable.as_str()) else { + continue; + }; + let Some(dim_index) = s.dimension_index.get() else { + // Group indexes may be included even though + // they are redundant. Ignore them. + continue; + }; + for index in + w.include.split(';').filter_map(|s| s.parse::().ok()) + { + if let Some(locator) = + s.coordinate_to_index.borrow().get(&index).copied() + && let Some(leaf_index) = locator.as_leaf() + { + include[dim_index].insert(leaf_index); + } + } + } + + // XXX This is inefficient in the common case where + // all of the dimensions are matched. We should use + // a heuristic where if all of the dimensions are + // matched and the product of n[*] is less than the + // number of cells then iterate through all the + // possibilities rather than all the cells. Or even + // only do it if there is just one possibility. + for (indexes, value) in data { + let mut skip = false; + for (dimension, index) in indexes.iter().enumerate() { + if !include[dimension].is_empty() + && !include[dimension].contains(index) + { + skip = true; + break; + } + } + if !skip { + Style::apply_to_value( + value, + Some(&self.sf), + None, + None, + &look.areas[Area::Data(RowParity::Even)], + footnotes, + has_cell_footnotes, + ); + } + } + } + } + } + } + + let targets = scp + .sets + .iter() + .filter_map(|set| set.as_set_format()) + .filter_map(|sf| { + TargetType::from_id(&sf.target).map(|target_type| Target { sf, target_type }) + }) + .collect::>(); + + if let Some(union_) = &scp.union_ { + if !scp.apply_to_converse { + for intersect in &union_.intersects { + for target in &targets { + target.decode( + intersect, + &look, + &series, + dims, + data, + &footnotes, + has_cell_footnotes, + ); + } + } + } else { + // Not seen in the corpus. + warn(LegacyXmlWarning::UnsupportedApplyToConverse); + } + } else if scp.apply_to_converse { + for target in &targets { + if target.target_type == TargetType::Labeling { + for value in data.values_mut() { + Style::apply_to_value( + value, + Some(&target.sf), + None, + None, + &look.areas[Area::Data(RowParity::Even)], + &footnotes, + has_cell_footnotes, + ); + } + } + } + } + } + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct Faceting {