cleanup
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 1 Jan 2026 23:07:02 +0000 (15:07 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Thu, 1 Jan 2026 23:07:02 +0000 (15:07 -0800)
rust/pspp/src/cli/show_spv.rs
rust/pspp/src/spv/read.rs
rust/pspp/src/spv/read/legacy_bin.rs
rust/pspp/src/spv/read/legacy_xml.rs
rust/pspp/src/spv/read/structure.rs

index eeeeb927610cb529a4a5dcd5f1ec6584750df948..cdc46a8ff404c24f372445af8c9978d10fe41271 100644 (file)
@@ -163,7 +163,7 @@ impl ShowSpv {
                             "While parsing {binary:?} as legacy binary SPV member"
                         ))
                     })?;
-                    let data = legacy_bin.decode();
+                    let data = legacy_bin.decode(&mut |w| eprintln!("{w}"));
                     let n_values = data
                         .values()
                         .flat_map(|map| map.values())
index 15d2127ba64e64eb68126ce4e4fd4afcdfe62ee7..5b7de67df281da2beac9875be30cb840d590a27f 100644 (file)
@@ -27,11 +27,11 @@ use zip::{ZipArchive, result::ZipError};
 
 use crate::{
     crypto::EncryptedReader,
-    output::{Item, page::PageSetup},
-    spv::read::{
+    output::{page::PageSetup, Item},
+    spv::{legacy_bin::LegacyBinWarning, read::{
         light::LightWarning,
         structure::{OutlineItem, StructureMember},
-    },
+    }},
 };
 
 mod css;
@@ -68,6 +68,9 @@ pub enum WarningDetails {
     /// {0}
     LightWarning(LightWarning),
 
+    /// {0}
+    LegacyBinWarning(LegacyBinWarning),
+
     /// Unknown page orientation {0:?}.
     UnknownOrientation(String),
 }
@@ -241,6 +244,12 @@ pub enum Error {
         error: serde_path_to_error::Error<quick_xml::DeError>,
     },
 
+    /*
+    /// Error reading {member_name}: {error}
+    LegacyTableError {
+        /// The name of the file inside the ZIP file that caused the error.
+        member_name: String,
+    },*/
     /// Graphs not yet implemented.
     GraphTodo,
 
index ff86015f0351f6a9baa5920bf2d5ca474d06b71d..78c5d14857a836c91bc0b3950ab420fbd0b96b1c 100644 (file)
@@ -6,6 +6,7 @@ use std::{
 
 use binrw::{BinRead, BinResult, binread};
 use chrono::{NaiveDateTime, NaiveTime};
+use displaydoc::Display;
 use encoding_rs::UTF_8;
 use indexmap::IndexMap;
 
@@ -16,6 +17,46 @@ use crate::{
     spv::read::light::{U32String, decode_format, parse_vec},
 };
 
+/// A warning decoding a legacy binary member.
+#[derive(Clone, Debug, Display, thiserror::Error)]
+pub enum LegacyBinWarning {
+    /// String map refers to unknown source {0:?}.
+    UnknownSource(String),
+    /// String map for source {source_name:?} refers to unknown variable {variable:?}.
+    UnknownVariable {
+        /// Source name.
+        source_name: String,
+        /// Variable name.
+        variable: String,
+    },
+    /// Datum mapping {datum_idx} for variable {variable:?} in source {source_name:?} has label index {label_idx} but there are only {n_labels} labels.
+    OutOfRangeLabelIdx {
+        /// Source name.
+        source_name: String,
+        /// Variable name.
+        variable: String,
+        /// Index into variable's datum map.
+        datum_idx: usize,
+        /// Too-large index into labels.
+        label_idx: usize,
+        /// Number of labels.
+        n_labels: usize,
+    },
+    /// Datum mapping {datum_idx} for variable {variable:?} in source {source_name:?} has value index {value_idx} but the variable has only {n_values} values.
+    OutOfRangeValueIdx {
+        /// Source name.
+        source_name: String,
+        /// Variable name.
+        variable: String,
+        /// Index into variable's datum map.
+        datum_idx: usize,
+        /// Too-large index into values.
+        value_idx: usize,
+        /// Number of values in `variable`.
+        n_values: usize,
+    },
+}
+
 /// Legacy binary data.
 #[binread]
 #[br(little)]
@@ -38,7 +79,10 @@ pub struct LegacyBin {
 impl LegacyBin {
     /// Decodes legacy binary data into a map from a series name to a map of
     /// variables, which in turn contains a vector of [DataValue]s.
-    pub fn decode(&self) -> IndexMap<String, IndexMap<String, Vec<DataValue>>> {
+    pub fn decode(
+        &self,
+        warn: &mut dyn FnMut(LegacyBinWarning),
+    ) -> IndexMap<String, IndexMap<String, Vec<DataValue>>> {
         let mut sources = IndexMap::new();
         for (metadata, data) in self.metadata.iter().zip(&self.data) {
             let mut variables = IndexMap::new();
@@ -59,13 +103,40 @@ impl LegacyBin {
         }
         if let Some(strings) = &self.strings {
             for map in &strings.source_maps {
-                let source = sources.get_mut(&map.source_name).unwrap(); // XXX unwrap
+                let Some(source) = sources.get_mut(&map.source_name) else {
+                    warn(LegacyBinWarning::UnknownSource(map.source_name.clone()));
+                    continue;
+                };
                 for var_map in &map.variable_maps {
-                    let variable = source.get_mut(&var_map.variable_name).unwrap(); // XXX unwrap
-                    for datum_map in &var_map.datum_maps {
-                        // XXX two possibly out-of-range indexes below
-                        variable[datum_map.value_idx].value =
-                            Datum::String(strings.labels[datum_map.label_idx].label.clone());
+                    let Some(variable) = source.get_mut(&var_map.variable_name) else {
+                        warn(LegacyBinWarning::UnknownVariable {
+                            source_name: map.source_name.clone(),
+                            variable: var_map.variable_name.clone(),
+                        });
+                        continue;
+                    };
+                    for (datum_idx, datum_map) in var_map.datum_maps.iter().enumerate() {
+                        let Some(label) = strings.labels.get(datum_map.label_idx) else {
+                            warn(LegacyBinWarning::OutOfRangeLabelIdx {
+                                source_name: map.source_name.clone(),
+                                variable: var_map.variable_name.clone(),
+                                datum_idx,
+                                label_idx: datum_map.label_idx,
+                                n_labels: strings.labels.len(),
+                            });
+                            continue;
+                        };
+                        let Some(value) = variable.get_mut(datum_map.value_idx) else {
+                            warn(LegacyBinWarning::OutOfRangeValueIdx {
+                                source_name: map.source_name.clone(),
+                                variable: var_map.variable_name.clone(),
+                                datum_idx,
+                                value_idx: datum_map.value_idx,
+                                n_values: variable.len(),
+                            });
+                            continue;
+                        };
+                        value.value = Datum::String(label.label.clone());
                     }
                 }
             }
@@ -279,18 +350,18 @@ struct Label {
 
 /// Parses a UTF-8 string preceded by a 32-bit length.
 #[binrw::parser(reader, endian)]
-pub(super) fn parse_utf8_string() -> BinResult<String> {
+fn parse_utf8_string() -> BinResult<String> {
     Ok(U32String::read_options(reader, endian, ())?.decode(UTF_8))
 }
 
 /// Parses a UTF-8 string that is exactly `n` bytes long and whose contents end
 /// at the first null byte.
 #[binrw::parser(reader)]
-pub(super) fn parse_fixed_utf8_string(n: usize) -> BinResult<String> {
+fn parse_fixed_utf8_string(n: usize) -> BinResult<String> {
     let mut buf = vec![0; n];
     reader.read_exact(&mut buf)?;
-    let len = buf.iter().take_while(|b| **b != 0).count();
-    Ok(
-        std::str::from_utf8(&buf[..len]).unwrap().into(), // XXX unwrap
-    )
+    if let Some(null) = buf.iter().position(|b| *b == 0) {
+        buf.truncate(null);
+    }
+    Ok(UTF_8.decode_without_bom_handling(&buf).0.into_owned())
 }
index 5987b62b6143806c767c2aedddb54f3bc0dcb6c9..5b27c48eb3cb22399466644bf25fcd3d42287e09 100644 (file)
@@ -202,7 +202,6 @@ impl Visualization {
         mut look: Look,
     ) -> Result<PivotTable, super::Error> {
         let mut extension = None;
-        let mut user_source = None;
         let mut source_variables = Vec::new();
         let mut derived_variables = Vec::new();
         let mut graph = None;
@@ -212,7 +211,7 @@ impl Visualization {
         for child in &self.children {
             match child {
                 VisChild::Extension(e) => extension = Some(e),
-                VisChild::UserSource(us) => user_source = Some(us),
+                VisChild::UserSource(_) => (),
                 VisChild::SourceVariable(source_variable) => source_variables.push(source_variable),
                 VisChild::DerivedVariable(derived_variable) => {
                     derived_variables.push(derived_variable)
@@ -244,9 +243,6 @@ impl Visualization {
             }
         }
         let Some(graph) = graph else { todo!() };
-        let Some(_user_source) = user_source else {
-            todo!()
-        };
 
         let mut axes = HashMap::new();
         let mut major_ticks = HashMap::new();
index 9dcd90c52462020833b02c6d7acdc12578dc4444..ae7c11fce98c7c0e647ce8870219a10cd211cf3b 100644 (file)
@@ -310,7 +310,12 @@ impl Table {
                     "While parsing {bin_member_name:?} as legacy binary SPV member"
                 ))
             })?;
-            let data = legacy_bin.decode();
+            let data = legacy_bin.decode(&mut |w| {
+                warn(Warning {
+                    member: bin_member_name.clone(),
+                    details: WarningDetails::LegacyBinWarning(w),
+                })
+            });
             drop(bin_member);
 
             let member = BufReader::new(archive.by_name(&xml_member_name)?);