From f647243e81a0011eb973dc7759d57aab676cf93a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 4 Nov 2025 09:17:39 -0800 Subject: [PATCH] first version of xml decoder that might do something --- rust/doc/src/spv/legacy-detail-xml.md | 11 + rust/pspp/src/output/spv/legacy_bin.rs | 7 +- rust/pspp/src/output/spv/legacy_xml.rs | 270 ++++++++++++++++++++----- 3 files changed, 233 insertions(+), 55 deletions(-) diff --git a/rust/doc/src/spv/legacy-detail-xml.md b/rust/doc/src/spv/legacy-detail-xml.md index 59f429c3de..1f8449e8dc 100644 --- a/rust/doc/src/spv/legacy-detail-xml.md +++ b/rust/doc/src/spv/legacy-detail-xml.md @@ -1474,3 +1474,14 @@ printingProperties The `name` attribute appears only in [standalone `.stt` files](../tablelook.md#the-tlo-format). +## Cell Data + +A variable named `cell` always exists. This variable holds the data +displayed in the table. + +`cell` is taken along with the `categories` variables from [`nest` and +`layer`], which specify the values of the dimensions. + +XXX example + +[`nest` and `layer`]: #the-faceting-element diff --git a/rust/pspp/src/output/spv/legacy_bin.rs b/rust/pspp/src/output/spv/legacy_bin.rs index 8af6c7f6fc..f1e0e46342 100644 --- a/rust/pspp/src/output/spv/legacy_bin.rs +++ b/rust/pspp/src/output/spv/legacy_bin.rs @@ -91,15 +91,16 @@ impl DataValue { .and_then(|v| (v >= 0.0 && v < usize::MAX as f64).then_some(v as usize)) } - pub fn as_format(&self, format_map: &HashMap) -> Format { + // This should probably be a method on some hypothetical FormatMap. + pub fn as_format(&self, format_map: &HashMap) -> Format { let f = match &self.value { - Datum::Number(Some(number)) => *number as u32, + Datum::Number(Some(number)) => *number as i64, Datum::Number(None) => 0, Datum::String(s) => s.parse().unwrap_or_default(), }; match format_map.get(&f) { Some(format) => *format, - None => decode_format(f), + None => decode_format(f as u32), } } diff --git a/rust/pspp/src/output/spv/legacy_xml.rs b/rust/pspp/src/output/spv/legacy_xml.rs index db02c0f5c7..5042e47b87 100644 --- a/rust/pspp/src/output/spv/legacy_xml.rs +++ b/rust/pspp/src/output/spv/legacy_xml.rs @@ -27,11 +27,12 @@ use serde::Deserialize; use crate::{ data::Datum, - format::{Decimal::Dot, F8_0, Type, UncheckedFormat}, + format::{Decimal::Dot, F8_0, F40_2, Type, UncheckedFormat}, output::{ pivot::{ - self, Area, AreaStyle, Axis2, Axis3, Color, HeadingRegion, HorzAlign, Leaf, Length, - Look, PivotTable, RowParity, Value, VertAlign, + self, Area, AreaStyle, Axis2, Axis3, Category, Color, Dimension, Group, HeadingRegion, + HorzAlign, Leaf, Length, Look, NumberValue, PivotTable, RowParity, Value, ValueInner, + VertAlign, }, spv::legacy_bin::DataValue, }, @@ -154,6 +155,16 @@ impl Map { } } } + + fn lookup<'a>(&'a self, dv: &'a DataValue) -> &'a Datum { + if let Datum::Number(Some(number)) = &dv.value + && let Some(value) = self.0.get(&OrderedFloat(*number)) + { + value + } else { + &dv.value + } + } } #[derive(Deserialize, Debug)] @@ -365,20 +376,24 @@ impl Visualization { rotate_inner_column_labels: &mut bool, rotate_outer_row_labels: &mut bool, footnotes: &pivot::Footnotes, - ) { + ) -> Dimension { let base_level = variables[0].1; - if let Ok(a) = Axis2::try_from(a) + let show_label = if let Ok(a) = Axis2::try_from(a) && let Some(axis) = axes.get(&(base_level + variables.len())) && let Some(label) = &axis.label { let out = &mut look.areas[Area::Labels(a)]; *out = Area::Labels(a).default_area_style(); + let style = label.style.get(&styles); Style::decode( - label.style.get(&styles), + style, label.text_frame_style.as_ref().and_then(|r| r.get(styles)), out, ); - } + style.is_some_and(|s| s.visible.unwrap_or_default()) + } else { + false + }; if a == Axis3::Y && let Some(axis) = axes.get(&(base_level + variables.len() - 1)) { @@ -399,46 +414,80 @@ impl Visualization { } } - // Find the first row for each category. - let max_cat = variables[0].0.max_category().unwrap()/*XXX*/; + let variables = variables + .into_iter() + .map(|(series, _level)| *series) + .collect::>(); + + // Find the first row for each category, then drop missing + // categories and count what's left. + let max_cat = variables[0].max_category().unwrap()/*XXX*/; let mut cat_rows = vec![None; max_cat + 1]; - for (index, value) in variables[0].0.values.iter().enumerate() { + for (index, value) in variables[0].values.iter().enumerate() { if let Some(row) = value.category() { cat_rows[row].get_or_insert(index); } } - - // Drop missing categories and count what's left. let cat_rows = cat_rows.into_iter().flatten().collect::>(); // Make leaf categories. let mut cats = Vec::with_capacity(cat_rows.len()); - for row in cat_rows.iter().copied() { - let dv = &variables[0].0.values[row]; - let name = Value::new_datum(&dv.value); - let name = variables[0].0.add_affixes(name, &footnotes); - cats.push(Leaf::new(name)); + for (index, cat_row) in cat_rows.into_iter().enumerate() { + let dv = &variables[0].values[cat_row]; + let name = variables[0].new_name(dv, footnotes); + cats.push((Category::from(Leaf::new(name)), index..index + 1)); + } + if cats.is_empty() { + todo!() } // Now group them, in one pass per grouping variable, innermost first. for j in 1..variables.len() { - // Find a sequence of categories `cat1...cat2`, that all have - // the same value in series `j`. (This might be only a single - // category.) */ - let series = variables[j].0; - let mut cat1 = 0; - while cat1 < cats.len() { - let mut cat2 = cat1 + 1; - while cat2 < cats.len() {} + let mut next_cats = Vec::with_capacity(cats.len()); + let mut start = 0; + for end in 1..=cats.len() { + let dv1 = &variables[j].values[cats[start].1.start]; + if end < cats.len() + && variables[j].values[cats[end].1.clone()] + .iter() + .all(|dv| &dv.value == &dv1.value) + { + } else { + let name = variables[j].map.lookup(dv1); + if end - start > 1 || name.is_number_or(|s| s.is_empty()) { + let name = variables[j].new_name(dv1, footnotes); + let mut group = Group::new(name); + for i in start..end { + group.push(cats[i].0.clone()); + } + next_cats.push(( + Category::from(group), + cats[start].1.start..cats[end - 1].1.end, + )); + } else { + next_cats.push(cats[start].clone()); + } + start = end; + } } + cats = next_cats; } - todo!() + Dimension::new( + Group::new( + variables[0] + .label + .as_ref() + .map_or_else(|| Value::empty(), |label| Value::new_user_text(label)), + ) + .with_multiple(cats.into_iter().map(|(category, _range)| category)) + .with_show_label(show_label), + ) } - fn decode_dimensions( - variables: &[VariableReference], - series: &HashMap<&str, Series>, + fn decode_dimensions<'a, 'b>( + variables: impl IntoIterator, + series: &'b HashMap<&str, Series>, axes: &HashMap, styles: &HashMap<&str, &Style>, a: Axis3, @@ -446,15 +495,16 @@ impl Visualization { rotate_inner_column_labels: &mut bool, rotate_outer_row_labels: &mut bool, footnotes: &pivot::Footnotes, - level_ofs: usize, - ) -> Vec { + dimensions: &mut Vec<(Axis3, pivot::Dimension)>, + coordinates: &mut Vec<&'b Series>, + ) { let variables = variables .into_iter() .zip(level_ofs..) - .map(|(vr, level)| { + .map(|(variable_name, level)| { series - .get(vr.reference.as_str()) + .get(variable_name) .filter(|s| !s.values.is_empty()) .map(|s| (s, level)) }) @@ -464,6 +514,27 @@ impl Visualization { if let Some((var, level)) = var { dim_vars.push((var, level)); } else if !dim_vars.is_empty() { + coordinates.push(dim_vars[0].0); + dimensions.push(( + a, + decode_dimension( + &dim_vars, + axes, + styles, + a, + look, + rotate_inner_column_labels, + rotate_outer_row_labels, + footnotes, + ), + )); + dim_vars.clear(); + } + } + if !dim_vars.is_empty() { + coordinates.push(dim_vars[0].0); + dimensions.push(( + a, decode_dimension( &dim_vars, axes, @@ -473,23 +544,9 @@ impl Visualization { rotate_inner_column_labels, rotate_outer_row_labels, footnotes, - ); - dim_vars.clear(); - } - } - if !dim_vars.is_empty() { - decode_dimension( - &dim_vars, - axes, - styles, - a, - look, - rotate_inner_column_labels, - rotate_outer_row_labels, - footnotes, - ); + ), + )); } - todo!() } let mut rotate_inner_column_labels = false; @@ -499,8 +556,10 @@ impl Visualization { .first() .map(|child| child.variables()) .unwrap_or_default(); + let mut dimensions = Vec::new(); + let mut coordinates = Vec::new(); decode_dimensions( - columns, + columns.into_iter().map(|vr| vr.reference.as_str()), &series, &axes, &styles, @@ -510,13 +569,15 @@ impl Visualization { &mut rotate_outer_row_labels, &footnotes, 1, + &mut dimensions, + &mut coordinates, ); let rows = cross .get(1) .map(|child| child.variables()) .unwrap_or_default(); decode_dimensions( - rows, + rows.into_iter().map(|vr| vr.reference.as_str()), &series, &axes, &styles, @@ -526,9 +587,87 @@ impl Visualization { &mut rotate_outer_row_labels, &footnotes, 1 + columns.len(), + &mut dimensions, + &mut coordinates, ); - todo!() + let mut level_ofs = columns.len() + rows.len() + 1; + for layers in [&graph.faceting.layers1, &graph.faceting.layers2] { + decode_dimensions( + layers.iter().map(|layer| layer.variable.as_str()), + &series, + &axes, + &styles, + Axis3::Y, + &mut look, + &mut rotate_inner_column_labels, + &mut rotate_outer_row_labels, + &footnotes, + level_ofs, + &mut dimensions, + &mut coordinates, + ); + level_ofs += layers.len(); + } + + let mut pivot_table = PivotTable::new(dimensions); + + let cell = series.get("cell").unwrap()/*XXX*/; + let mut coords = Vec::with_capacity(coordinates.len()); + let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series); + let cell_footnotes = + graph + .interval + .labeling + .children + .iter() + .find_map(|child| match child { + LabelingChild::Footnotes(footnotes) => series.get(footnotes.variable.as_str()), + _ => None, + }); + for (i, cell) in cell.values.iter().enumerate() { + coords.clear(); + for series in &coordinates { + // XXX indexing of values, and unwrap + coords.push(series.values[i].category().unwrap()); + } + + let format = if let Some(cell_formats) = &cell_formats { + // XXX indexing of values + cell_formats.values[i].as_format(&format_map) + } else { + F40_2 + }; + let mut value = cell.as_pivot_value(format); + + if let Some(cell_footnotes) = &cell_footnotes { + // XXX indexing + let dv = &cell_footnotes.values[i]; + if let Some(s) = dv.value.as_string() { + for part in s.split(',') { + if let Ok(index) = part.parse::() + && let Some(index) = index.checked_sub(1) + && let Some(footnote) = footnotes.get(index) + { + value = value.with_footnote(footnote); + } + } + } + } + if let Value { + inner: ValueInner::Number(NumberValue { value: None, .. }), + styling: None, + } = &value + { + // A system-missing value without a footnote represents an empty cell. + } else { + // XXX cell_index might be invalid? + pivot_table.insert(coords.as_slice(), value); + } + } + // XXX decode_set_cell_properties + + Ok(pivot_table) } } @@ -560,6 +699,12 @@ impl Series { .filter_map(|value| value.category()) .max() } + + fn new_name(&self, dv: &DataValue, footnotes: &pivot::Footnotes) -> Value { + let dv = self.map.lookup(dv); + let name = Value::new_datum(dv); + self.add_affixes(name, &footnotes) + } } #[derive(Deserialize, Debug)] @@ -1795,6 +1940,27 @@ struct Labeling { children: Vec, } +impl Labeling { + fn decode_format_map<'a>( + &self, + series: &'a HashMap<&str, Series>, + ) -> (Option<&'a Series>, HashMap) { + let mut map = HashMap::new(); + let mut cell_format = None; + for child in &self.children { + if let LabelingChild::Formatting(formatting) = child { + cell_format = series.get(formatting.variable.as_str()); + for mapping in &formatting.mappings { + if let Some(format) = &mapping.format { + map.insert(mapping.from, format.decode()); + } + } + } + } + (cell_format, map) + } +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] enum LabelingChild { -- 2.30.2