cleanup
authorBen Pfaff <blp@cs.stanford.edu>
Fri, 2 Jan 2026 00:00:57 +0000 (16:00 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Fri, 2 Jan 2026 00:00:57 +0000 (16:00 -0800)
rust/pspp/src/spv/read.rs
rust/pspp/src/spv/read/legacy_bin.rs
rust/pspp/src/spv/read/legacy_xml.rs
rust/pspp/src/spv/read/structure.rs

index 5b7de67df281da2beac9875be30cb840d590a27f..0bceac883397c85bc289be25e455ff209c5e1fe5 100644 (file)
@@ -27,11 +27,15 @@ use zip::{ZipArchive, result::ZipError};
 
 use crate::{
     crypto::EncryptedReader,
-    output::{page::PageSetup, Item},
-    spv::{legacy_bin::LegacyBinWarning, read::{
-        light::LightWarning,
-        structure::{OutlineItem, StructureMember},
-    }},
+    output::{Item, page::PageSetup},
+    spv::{
+        legacy_bin::LegacyBinWarning,
+        read::{
+            legacy_xml::LegacyXmlWarning,
+            light::LightWarning,
+            structure::{OutlineItem, StructureMember},
+        },
+    },
 };
 
 mod css;
@@ -71,6 +75,9 @@ pub enum WarningDetails {
     /// {0}
     LegacyBinWarning(LegacyBinWarning),
 
+    /// {0}
+    LegacyXmlWarning(LegacyXmlWarning),
+
     /// Unknown page orientation {0:?}.
     UnknownOrientation(String),
 }
@@ -244,12 +251,6 @@ pub enum Error {
         error: serde_path_to_error::Error<quick_xml::DeError>,
     },
 
-    /*
-    /// Error reading {member_name}: {error}
-    LegacyTableError {
-        /// The name of the file inside the ZIP file that caused the error.
-        member_name: String,
-    },*/
     /// Graphs not yet implemented.
     GraphTodo,
 
index 78c5d14857a836c91bc0b3950ab420fbd0b96b1c..813dd9d4db717c6308670512e011d1c3889f84ce 100644 (file)
@@ -158,8 +158,8 @@ pub struct DataValue {
 }
 
 impl DataValue {
-    /// Category index, if any.  This is either the numeric value of the datum,
-    /// if there is one, falling back to the index.
+    /// Category index, if any.  This is the numeric value of the datum, if
+    /// there is one, falling back to the index.
     pub fn category(&self) -> Option<usize> {
         match &self.value {
             Datum::Number(number) => *number,
index 5b27c48eb3cb22399466644bf25fcd3d42287e09..6b59198e432bf5bcfaba190dd6243c43dcfb28e0 100644 (file)
@@ -19,13 +19,13 @@ use std::{
     collections::{BTreeMap, HashMap},
     fmt::Debug,
     marker::PhantomData,
-    mem::take,
     num::NonZeroUsize,
     ops::Range,
     sync::Arc,
 };
 
 use chrono::{NaiveDateTime, NaiveTime};
+use displaydoc::Display;
 use enum_map::{Enum, EnumMap};
 use hashbrown::HashSet;
 use indexmap::IndexMap;
@@ -153,13 +153,16 @@ impl Map {
     fn remap_vmes(&mut self, value_map: &[ValueMapEntry]) {
         for vme in value_map {
             for from in vme.from.split(';') {
-                let from = from.trim().parse::<f64>().unwrap(); // XXX
-                let to = if let Ok(to) = vme.to.trim().parse::<f64>() {
-                    Datum::Number(Some(to))
+                if let Ok(from) = from.trim().parse::<f64>() {
+                    let to = if let Ok(to) = vme.to.trim().parse::<f64>() {
+                        Datum::Number(Some(to))
+                    } else {
+                        Datum::String(vme.to.clone())
+                    };
+                    self.0.insert(OrderedFloat(from), to);
                 } else {
-                    Datum::String(vme.to.clone())
-                };
-                self.0.insert(OrderedFloat(from), to);
+                    // XXX warn
+                }
             }
         }
     }
@@ -175,6 +178,24 @@ impl Map {
     }
 }
 
+/// A warning decoding a legacy XML member.
+#[derive(Clone, Debug, Display, thiserror::Error)]
+pub enum LegacyXmlWarning {
+    /// Table has no data.
+    MissingData,
+
+    /// Table has variables with unresolved dependencies: {0:?}.
+    UnresolvedDependencies(Vec<String>),
+
+    /// Derived variable {variable:?} has unsupported value expression {value:?}.
+    UnsupportedValue {
+        /// Name of derived variable.
+        variable: String,
+        /// Value expression.
+        value: String,
+    },
+}
+
 #[derive(Deserialize, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct Visualization {
@@ -200,6 +221,7 @@ impl Visualization {
         &self,
         data: IndexMap<String, IndexMap<String, Vec<DataValue>>>,
         mut look: Look,
+        warn: &mut dyn FnMut(LegacyXmlWarning),
     ) -> Result<PivotTable, super::Error> {
         let mut extension = None;
         let mut source_variables = Vec::new();
@@ -335,30 +357,20 @@ impl Visualization {
         }
 
         let mut series = BTreeMap::<&str, Series>::new();
-        while let n_source = source_variables.len()
-            && let n_derived = derived_variables.len()
-            && (n_source > 0 || n_derived > 0)
+        while let n = source_variables.len() + derived_variables.len()
+            && n > 0
         {
-            for sv in take(&mut source_variables) {
-                match sv.decode(&data, &series) {
-                    Ok(s) => {
-                        series.insert(&sv.id, s);
-                    }
-                    Err(()) => source_variables.push(sv),
-                }
-            }
-
-            for dv in take(&mut derived_variables) {
-                match dv.decode(&series) {
-                    Ok(s) => {
-                        series.insert(&dv.id, s);
-                    }
-                    Err(()) => derived_variables.push(dv),
-                }
-            }
-
-            if n_source == source_variables.len() && n_derived == derived_variables.len() {
-                unreachable!();
+            source_variables.retain(|sv| !sv.decode(&data, &mut series));
+            derived_variables.retain(|dv| !dv.decode(&mut series, warn));
+
+            if n == source_variables.len() + derived_variables.len() {
+                warn(LegacyXmlWarning::UnresolvedDependencies(
+                    source_variables
+                        .iter()
+                        .map(|sv| sv.variable.clone())
+                        .chain(derived_variables.iter().map(|dv| dv.id.clone()))
+                        .collect(),
+                ));
             }
         }
 
@@ -652,7 +664,6 @@ impl Visualization {
             })
             .collect::<Vec<_>>();
 
-        let cell = series.get("cell").unwrap()/*XXX*/;
         let mut coords = Vec::with_capacity(dims.len());
         let (cell_formats, format_map) = graph.interval.labeling.decode_format_map(&series);
         let cell_footnotes = graph
@@ -660,62 +671,58 @@ impl Visualization {
             .footnotes()
             .and_then(|footnotes| series.get(footnotes.variable.as_str()));
         let mut data = HashMap::new();
-        'outer: for (i, cell) in cell.values.iter().enumerate() {
-            coords.clear();
-            for dim in &dims {
-                // XXX indexing of values
-                let Some(coordinate) = dim.coordinate.values[i].category() else {
-                    // XXX issue warning?
-                    continue 'outer;
-                };
-                let Some(index) = dim
-                    .coordinate
-                    .coordinate_to_index
-                    .borrow()
-                    .get(&coordinate)
-                    .and_then(CategoryLocator::as_leaf)
-                else {
-                    panic!("can't find {coordinate}") // XXX
-                };
-                debug_assert!(
-                    index < dim.dimension.len(),
-                    "{index}, {}",
-                    dim.dimension.len()
-                );
-                coords.push(index);
-            }
+        if let Some(cell) = series.get("cell") {
+            'outer: for (i, cell) in cell.values.iter().enumerate() {
+                coords.clear();
+                for dim in &dims {
+                    if let Some(coordinate) = dim.coordinate.values.get(i)
+                        && let Some(coordinate) = coordinate.category()
+                        && let Some(locator) =
+                            dim.coordinate.coordinate_to_index.borrow().get(&coordinate)
+                        && let Some(index) = locator.as_leaf()
+                    {
+                        coords.push(index);
+                    } else {
+                        // XXX warn
+                        continue 'outer;
+                    }
+                }
 
-            let format = if let Some(cell_formats) = &cell_formats {
-                // XXX indexing of values
-                cell_formats.values[i].as_format(&format_map)
-            } else {
-                F40_2
-            };
-            let mut value = cell.as_pivot_value(format);
+                let format = if let Some(cell_formats) = &cell_formats
+                    && let Some(value) = cell_formats.values.get(i)
+                {
+                    value.as_format(&format_map)
+                } else {
+                    F40_2
+                };
+                let mut value = cell.as_pivot_value(format);
 
-            if let Some(cell_footnotes) = &cell_footnotes
-                && let Some(dv) = cell_footnotes.values.get(i)
-            {
-                if let Some(s) = dv.value.as_string() {
-                    for part in s.split(',') {
-                        if let Ok(index) = part.parse::<usize>()
-                            && let Some(index) = index.checked_sub(1)
-                            && let Some(footnote) = footnotes.get(index)
-                        {
-                            value.add_footnote(footnote);
+                if let Some(cell_footnotes) = &cell_footnotes
+                    && let Some(dv) = cell_footnotes.values.get(i)
+                {
+                    if let Some(s) = dv.value.as_string() {
+                        for part in s.split(',') {
+                            if let Ok(index) = part.parse::<usize>()
+                                && let Some(index) = index.checked_sub(1)
+                                && let Some(footnote) = footnotes.get(index)
+                            {
+                                value.add_footnote(footnote);
+                            }
                         }
                     }
                 }
+
+                if let Some(datum) = value.datum()
+                    && datum.is_sysmis()
+                    && value.footnotes().is_empty()
+                {
+                    // A system-missing value without a footnote represents an empty cell.
+                } else {
+                    data.insert(coords.clone(), value);
+                }
             }
-            if let Some(datum) = value.datum()
-                && datum.is_sysmis()
-                && value.footnotes().is_empty()
-            {
-                // A system-missing value without a footnote represents an empty cell.
-            } else {
-                // XXX cell_index might be invalid?
-                data.insert(coords.clone(), value);
-            }
+        } else {
+            warn(LegacyXmlWarning::MissingData);
         }
 
         for child in &graph.facet_layout.children {
@@ -1064,7 +1071,7 @@ struct SourceVariable {
     /// The name of a variable within the source, corresponding to the
     /// `variable-name` in the `tableData.bin` member.
     #[serde(rename = "@sourceName")]
-    source_name: String,
+    variable: String,
 
     /// Variable label, if any.
     #[serde(rename = "@label")]
@@ -1082,14 +1089,14 @@ struct SourceVariable {
 }
 
 impl SourceVariable {
-    fn decode(
-        &self,
+    fn decode<'a>(
+        &'a self,
         data: &IndexMap<String, IndexMap<String, Vec<DataValue>>>,
-        series: &BTreeMap<&str, Series>,
-    ) -> Result<Series, ()> {
+        series: &mut BTreeMap<&'a str, Series>,
+    ) -> bool {
         let label_series = if let Some(label_variable) = &self.label_variable {
             let Some(label_series) = series.get(label_variable.references.as_str()) else {
-                return Err(());
+                return false;
             };
             Some(label_series)
         } else {
@@ -1097,7 +1104,7 @@ impl SourceVariable {
         };
 
         let data = if let Some(source) = data.get(&self.source)
-            && let Some(values) = source.get(&self.source_name)
+            && let Some(values) = source.get(&self.variable)
         {
             values.as_slice()
         } else {
@@ -1114,10 +1121,14 @@ impl SourceVariable {
         } else if let Some(label_series) = label_series {
             map.insert_labels(&data, label_series, format);
         }
-        Ok(Series::new(self.id.clone(), data, map)
-            .with_format(format)
-            .with_affixes(affixes)
-            .with_label(self.label.clone()))
+        series.insert(
+            &self.id,
+            Series::new(self.id.clone(), data, map)
+                .with_format(format)
+                .with_affixes(affixes)
+                .with_label(self.label.clone()),
+        );
+        true
     }
 }
 
@@ -1141,7 +1152,11 @@ struct DerivedVariable {
 }
 
 impl DerivedVariable {
-    fn decode(&self, series: &BTreeMap<&str, Series>) -> Result<Series, ()> {
+    fn decode<'a>(
+        &'a self,
+        series: &mut BTreeMap<&'a str, Series>,
+        warn: &mut dyn FnMut(LegacyXmlWarning),
+    ) -> bool {
         let mut values = if self.id == "column" || self.id == "row" {
             vec![]
         } else if let Some(rest) = self.id.strip_prefix("dimension")
@@ -1149,33 +1164,44 @@ impl DerivedVariable {
         {
             vec![]
         } else if self.value == "constant(0)" {
-            let n_values = series
-                .values()
-                .find_map(|series| {
+            /// All the series have the same length, except for series with
+            /// length zero.  This returns the length of the first nonempty
+            /// series, or `None` if there isn't one yet.
+            fn series_len(series: &mut BTreeMap<&str, Series>) -> Option<usize> {
+                series.values().find_map(|series| {
                     if !series.values.is_empty() {
                         Some(series.values.len())
                     } else {
                         None
                     }
                 })
-                .ok_or(())?;
-            (0..n_values)
-                .map(|_| DataValue {
-                    index: Some(0.0),
-                    value: Datum::Number(Some(0.0)),
-                })
-                .collect()
+            }
+
+            if let Some(n_values) = series_len(series) {
+                (0..n_values)
+                    .map(|_| DataValue {
+                        index: Some(0.0),
+                        value: Datum::Number(Some(0.0)),
+                    })
+                    .collect()
+            } else {
+                return false;
+            }
         } else if self.value.starts_with("constant") {
             vec![]
         } else if let Some(rest) = self.value.strip_prefix("map(")
             && let Some(var_name) = rest.strip_suffix(")")
         {
             let Some(dependency) = series.get(var_name) else {
-                return Err(());
+                return false;
             };
             dependency.values.clone()
         } else {
-            unreachable!()
+            warn(LegacyXmlWarning::UnsupportedValue {
+                variable: self.id.clone(),
+                value: self.value.clone(),
+            });
+            vec![]
         };
         let mut map = Map::new();
         map.remap_vmes(&self.value_map);
@@ -1187,7 +1213,8 @@ impl DerivedVariable {
         {
             values.clear();
         }
-        Ok(Series::new(self.id.clone(), values, map))
+        series.insert(&self.id, Series::new(self.id.clone(), values, map));
+        true
     }
 }
 
index ae7c11fce98c7c0e647ce8870219a10cd211cf3b..0c204914bbca50f8e993f0d68443c0bc8710398c 100644 (file)
@@ -327,7 +327,13 @@ impl Table {
                 Ok(result) => result,
                 Err(error) => panic!("{error:?}"),
             };
-            let pivot_table = visualization.decode(data, *self.look.unwrap_or_default())?;
+            let pivot_table =
+                visualization.decode(data, *self.look.unwrap_or_default(), &mut |w| {
+                    warn(Warning {
+                        member: bin_member_name.clone(),
+                        details: WarningDetails::LegacyXmlWarning(w),
+                    })
+                })?;
 
             Ok(pivot_table.into_item())
         } else {