From fbf3dc8d073985ad4fec223fbbc33da0aa980408 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 1 Jan 2026 16:00:57 -0800 Subject: [PATCH] cleanup --- rust/pspp/src/spv/read.rs | 23 +-- rust/pspp/src/spv/read/legacy_bin.rs | 4 +- rust/pspp/src/spv/read/legacy_xml.rs | 239 +++++++++++++++------------ rust/pspp/src/spv/read/structure.rs | 8 +- 4 files changed, 154 insertions(+), 120 deletions(-) diff --git a/rust/pspp/src/spv/read.rs b/rust/pspp/src/spv/read.rs index 5b7de67df2..0bceac8833 100644 --- a/rust/pspp/src/spv/read.rs +++ b/rust/pspp/src/spv/read.rs @@ -27,11 +27,15 @@ use zip::{ZipArchive, result::ZipError}; use crate::{ crypto::EncryptedReader, - output::{page::PageSetup, Item}, - spv::{legacy_bin::LegacyBinWarning, read::{ - light::LightWarning, - structure::{OutlineItem, StructureMember}, - }}, + output::{Item, page::PageSetup}, + spv::{ + legacy_bin::LegacyBinWarning, + read::{ + legacy_xml::LegacyXmlWarning, + light::LightWarning, + structure::{OutlineItem, StructureMember}, + }, + }, }; mod css; @@ -71,6 +75,9 @@ pub enum WarningDetails { /// {0} LegacyBinWarning(LegacyBinWarning), + /// {0} + LegacyXmlWarning(LegacyXmlWarning), + /// Unknown page orientation {0:?}. UnknownOrientation(String), } @@ -244,12 +251,6 @@ pub enum Error { error: serde_path_to_error::Error, }, - /* - /// Error reading {member_name}: {error} - LegacyTableError { - /// The name of the file inside the ZIP file that caused the error. - member_name: String, - },*/ /// Graphs not yet implemented. GraphTodo, diff --git a/rust/pspp/src/spv/read/legacy_bin.rs b/rust/pspp/src/spv/read/legacy_bin.rs index 78c5d14857..813dd9d4db 100644 --- a/rust/pspp/src/spv/read/legacy_bin.rs +++ b/rust/pspp/src/spv/read/legacy_bin.rs @@ -158,8 +158,8 @@ pub struct DataValue { } impl DataValue { - /// Category index, if any. This is either the numeric value of the datum, - /// if there is one, falling back to the index. + /// Category index, if any. This is the numeric value of the datum, if + /// there is one, falling back to the index. pub fn category(&self) -> Option { match &self.value { Datum::Number(number) => *number, diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 5b27c48eb3..6b59198e43 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -19,13 +19,13 @@ use std::{ collections::{BTreeMap, HashMap}, fmt::Debug, marker::PhantomData, - mem::take, num::NonZeroUsize, ops::Range, sync::Arc, }; use chrono::{NaiveDateTime, NaiveTime}; +use displaydoc::Display; use enum_map::{Enum, EnumMap}; use hashbrown::HashSet; use indexmap::IndexMap; @@ -153,13 +153,16 @@ impl Map { fn remap_vmes(&mut self, value_map: &[ValueMapEntry]) { for vme in value_map { for from in vme.from.split(';') { - let from = from.trim().parse::().unwrap(); // XXX - let to = if let Ok(to) = vme.to.trim().parse::() { - Datum::Number(Some(to)) + if let Ok(from) = from.trim().parse::() { + let to = if let Ok(to) = vme.to.trim().parse::() { + Datum::Number(Some(to)) + } else { + Datum::String(vme.to.clone()) + }; + self.0.insert(OrderedFloat(from), to); } else { - Datum::String(vme.to.clone()) - }; - self.0.insert(OrderedFloat(from), to); + // XXX warn + } } } } @@ -175,6 +178,24 @@ impl Map { } } +/// A warning decoding a legacy XML member. +#[derive(Clone, Debug, Display, thiserror::Error)] +pub enum LegacyXmlWarning { + /// Table has no data. + MissingData, + + /// Table has variables with unresolved dependencies: {0:?}. + UnresolvedDependencies(Vec), + + /// Derived variable {variable:?} has unsupported value expression {value:?}. + UnsupportedValue { + /// Name of derived variable. + variable: String, + /// Value expression. + value: String, + }, +} + #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct Visualization { @@ -200,6 +221,7 @@ impl Visualization { &self, data: IndexMap>>, mut look: Look, + warn: &mut dyn FnMut(LegacyXmlWarning), ) -> Result { let mut extension = None; let mut source_variables = Vec::new(); @@ -335,30 +357,20 @@ impl Visualization { } let mut series = BTreeMap::<&str, Series>::new(); - while let n_source = source_variables.len() - && let n_derived = derived_variables.len() - && (n_source > 0 || n_derived > 0) + while let n = source_variables.len() + derived_variables.len() + && n > 0 { - for sv in take(&mut source_variables) { - match sv.decode(&data, &series) { - Ok(s) => { - series.insert(&sv.id, s); - } - Err(()) => source_variables.push(sv), - } - } - - for dv in take(&mut derived_variables) { - match dv.decode(&series) { - Ok(s) => { - series.insert(&dv.id, s); - } - Err(()) => derived_variables.push(dv), - } - } - - if n_source == source_variables.len() && n_derived == derived_variables.len() { - unreachable!(); + source_variables.retain(|sv| !sv.decode(&data, &mut series)); + derived_variables.retain(|dv| !dv.decode(&mut series, warn)); + + if n == source_variables.len() + derived_variables.len() { + warn(LegacyXmlWarning::UnresolvedDependencies( + source_variables + .iter() + .map(|sv| sv.variable.clone()) + .chain(derived_variables.iter().map(|dv| dv.id.clone())) + .collect(), + )); } } @@ -652,7 +664,6 @@ impl Visualization { }) .collect::>(); - let cell = series.get("cell").unwrap()/*XXX*/; let mut coords = Vec::with_capacity(dims.len()); let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series); let cell_footnotes = graph @@ -660,62 +671,58 @@ impl Visualization { .footnotes() .and_then(|footnotes| series.get(footnotes.variable.as_str())); let mut data = HashMap::new(); - 'outer: for (i, cell) in cell.values.iter().enumerate() { - coords.clear(); - for dim in &dims { - // XXX indexing of values - let Some(coordinate) = dim.coordinate.values[i].category() else { - // XXX issue warning? - continue 'outer; - }; - let Some(index) = dim - .coordinate - .coordinate_to_index - .borrow() - .get(&coordinate) - .and_then(CategoryLocator::as_leaf) - else { - panic!("can't find {coordinate}") // XXX - }; - debug_assert!( - index < dim.dimension.len(), - "{index}, {}", - dim.dimension.len() - ); - coords.push(index); - } + if let Some(cell) = series.get("cell") { + 'outer: for (i, cell) in cell.values.iter().enumerate() { + coords.clear(); + for dim in &dims { + if let Some(coordinate) = dim.coordinate.values.get(i) + && let Some(coordinate) = coordinate.category() + && let Some(locator) = + dim.coordinate.coordinate_to_index.borrow().get(&coordinate) + && let Some(index) = locator.as_leaf() + { + coords.push(index); + } else { + // XXX warn + continue 'outer; + } + } - let format = if let Some(cell_formats) = &cell_formats { - // XXX indexing of values - cell_formats.values[i].as_format(&format_map) - } else { - F40_2 - }; - let mut value = cell.as_pivot_value(format); + let format = if let Some(cell_formats) = &cell_formats + && let Some(value) = cell_formats.values.get(i) + { + value.as_format(&format_map) + } else { + F40_2 + }; + let mut value = cell.as_pivot_value(format); - if let Some(cell_footnotes) = &cell_footnotes - && let Some(dv) = cell_footnotes.values.get(i) - { - if let Some(s) = dv.value.as_string() { - for part in s.split(',') { - if let Ok(index) = part.parse::() - && let Some(index) = index.checked_sub(1) - && let Some(footnote) = footnotes.get(index) - { - value.add_footnote(footnote); + if let Some(cell_footnotes) = &cell_footnotes + && let Some(dv) = cell_footnotes.values.get(i) + { + if let Some(s) = dv.value.as_string() { + for part in s.split(',') { + if let Ok(index) = part.parse::() + && let Some(index) = index.checked_sub(1) + && let Some(footnote) = footnotes.get(index) + { + value.add_footnote(footnote); + } } } } + + if let Some(datum) = value.datum() + && datum.is_sysmis() + && value.footnotes().is_empty() + { + // A system-missing value without a footnote represents an empty cell. + } else { + data.insert(coords.clone(), value); + } } - if let Some(datum) = value.datum() - && datum.is_sysmis() - && value.footnotes().is_empty() - { - // A system-missing value without a footnote represents an empty cell. - } else { - // XXX cell_index might be invalid? - data.insert(coords.clone(), value); - } + } else { + warn(LegacyXmlWarning::MissingData); } for child in &graph.facet_layout.children { @@ -1064,7 +1071,7 @@ struct SourceVariable { /// The name of a variable within the source, corresponding to the /// `variable-name` in the `tableData.bin` member. #[serde(rename = "@sourceName")] - source_name: String, + variable: String, /// Variable label, if any. #[serde(rename = "@label")] @@ -1082,14 +1089,14 @@ struct SourceVariable { } impl SourceVariable { - fn decode( - &self, + fn decode<'a>( + &'a self, data: &IndexMap>>, - series: &BTreeMap<&str, Series>, - ) -> Result { + series: &mut BTreeMap<&'a str, Series>, + ) -> bool { let label_series = if let Some(label_variable) = &self.label_variable { let Some(label_series) = series.get(label_variable.references.as_str()) else { - return Err(()); + return false; }; Some(label_series) } else { @@ -1097,7 +1104,7 @@ impl SourceVariable { }; let data = if let Some(source) = data.get(&self.source) - && let Some(values) = source.get(&self.source_name) + && let Some(values) = source.get(&self.variable) { values.as_slice() } else { @@ -1114,10 +1121,14 @@ impl SourceVariable { } else if let Some(label_series) = label_series { map.insert_labels(&data, label_series, format); } - Ok(Series::new(self.id.clone(), data, map) - .with_format(format) - .with_affixes(affixes) - .with_label(self.label.clone())) + series.insert( + &self.id, + Series::new(self.id.clone(), data, map) + .with_format(format) + .with_affixes(affixes) + .with_label(self.label.clone()), + ); + true } } @@ -1141,7 +1152,11 @@ struct DerivedVariable { } impl DerivedVariable { - fn decode(&self, series: &BTreeMap<&str, Series>) -> Result { + fn decode<'a>( + &'a self, + series: &mut BTreeMap<&'a str, Series>, + warn: &mut dyn FnMut(LegacyXmlWarning), + ) -> bool { let mut values = if self.id == "column" || self.id == "row" { vec![] } else if let Some(rest) = self.id.strip_prefix("dimension") @@ -1149,33 +1164,44 @@ impl DerivedVariable { { vec![] } else if self.value == "constant(0)" { - let n_values = series - .values() - .find_map(|series| { + /// All the series have the same length, except for series with + /// length zero. This returns the length of the first nonempty + /// series, or `None` if there isn't one yet. + fn series_len(series: &mut BTreeMap<&str, Series>) -> Option { + series.values().find_map(|series| { if !series.values.is_empty() { Some(series.values.len()) } else { None } }) - .ok_or(())?; - (0..n_values) - .map(|_| DataValue { - index: Some(0.0), - value: Datum::Number(Some(0.0)), - }) - .collect() + } + + if let Some(n_values) = series_len(series) { + (0..n_values) + .map(|_| DataValue { + index: Some(0.0), + value: Datum::Number(Some(0.0)), + }) + .collect() + } else { + return false; + } } else if self.value.starts_with("constant") { vec![] } else if let Some(rest) = self.value.strip_prefix("map(") && let Some(var_name) = rest.strip_suffix(")") { let Some(dependency) = series.get(var_name) else { - return Err(()); + return false; }; dependency.values.clone() } else { - unreachable!() + warn(LegacyXmlWarning::UnsupportedValue { + variable: self.id.clone(), + value: self.value.clone(), + }); + vec![] }; let mut map = Map::new(); map.remap_vmes(&self.value_map); @@ -1187,7 +1213,8 @@ impl DerivedVariable { { values.clear(); } - Ok(Series::new(self.id.clone(), values, map)) + series.insert(&self.id, Series::new(self.id.clone(), values, map)); + true } } diff --git a/rust/pspp/src/spv/read/structure.rs b/rust/pspp/src/spv/read/structure.rs index ae7c11fce9..0c204914bb 100644 --- a/rust/pspp/src/spv/read/structure.rs +++ b/rust/pspp/src/spv/read/structure.rs @@ -327,7 +327,13 @@ impl Table { Ok(result) => result, Err(error) => panic!("{error:?}"), }; - let pivot_table = visualization.decode(data, *self.look.unwrap_or_default())?; + let pivot_table = + visualization.decode(data, *self.look.unwrap_or_default(), &mut |w| { + warn(Warning { + member: bin_member_name.clone(), + details: WarningDetails::LegacyXmlWarning(w), + }) + })?; Ok(pivot_table.into_item()) } else { -- 2.30.2