From a5267890ffd32c4f59b324142b30111b0cc0ff28 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 1 Jan 2026 15:07:02 -0800 Subject: [PATCH] cleanup --- rust/pspp/src/cli/show_spv.rs | 2 +- rust/pspp/src/spv/read.rs | 15 ++++- rust/pspp/src/spv/read/legacy_bin.rs | 97 ++++++++++++++++++++++++---- rust/pspp/src/spv/read/legacy_xml.rs | 6 +- rust/pspp/src/spv/read/structure.rs | 7 +- 5 files changed, 104 insertions(+), 23 deletions(-) diff --git a/rust/pspp/src/cli/show_spv.rs b/rust/pspp/src/cli/show_spv.rs index eeeeb92761..cdc46a8ff4 100644 --- a/rust/pspp/src/cli/show_spv.rs +++ b/rust/pspp/src/cli/show_spv.rs @@ -163,7 +163,7 @@ impl ShowSpv { "While parsing {binary:?} as legacy binary SPV member" )) })?; - let data = legacy_bin.decode(); + let data = legacy_bin.decode(&mut |w| eprintln!("{w}")); let n_values = data .values() .flat_map(|map| map.values()) diff --git a/rust/pspp/src/spv/read.rs b/rust/pspp/src/spv/read.rs index 15d2127ba6..5b7de67df2 100644 --- a/rust/pspp/src/spv/read.rs +++ b/rust/pspp/src/spv/read.rs @@ -27,11 +27,11 @@ use zip::{ZipArchive, result::ZipError}; use crate::{ crypto::EncryptedReader, - output::{Item, page::PageSetup}, - spv::read::{ + output::{page::PageSetup, Item}, + spv::{legacy_bin::LegacyBinWarning, read::{ light::LightWarning, structure::{OutlineItem, StructureMember}, - }, + }}, }; mod css; @@ -68,6 +68,9 @@ pub enum WarningDetails { /// {0} LightWarning(LightWarning), + /// {0} + LegacyBinWarning(LegacyBinWarning), + /// Unknown page orientation {0:?}. UnknownOrientation(String), } @@ -241,6 +244,12 @@ pub enum Error { error: serde_path_to_error::Error, }, + /* + /// Error reading {member_name}: {error} + LegacyTableError { + /// The name of the file inside the ZIP file that caused the error. + member_name: String, + },*/ /// Graphs not yet implemented. GraphTodo, diff --git a/rust/pspp/src/spv/read/legacy_bin.rs b/rust/pspp/src/spv/read/legacy_bin.rs index ff86015f03..78c5d14857 100644 --- a/rust/pspp/src/spv/read/legacy_bin.rs +++ b/rust/pspp/src/spv/read/legacy_bin.rs @@ -6,6 +6,7 @@ use std::{ use binrw::{BinRead, BinResult, binread}; use chrono::{NaiveDateTime, NaiveTime}; +use displaydoc::Display; use encoding_rs::UTF_8; use indexmap::IndexMap; @@ -16,6 +17,46 @@ use crate::{ spv::read::light::{U32String, decode_format, parse_vec}, }; +/// A warning decoding a legacy binary member. +#[derive(Clone, Debug, Display, thiserror::Error)] +pub enum LegacyBinWarning { + /// String map refers to unknown source {0:?}. + UnknownSource(String), + /// String map for source {source_name:?} refers to unknown variable {variable:?}. + UnknownVariable { + /// Source name. + source_name: String, + /// Variable name. + variable: String, + }, + /// Datum mapping {datum_idx} for variable {variable:?} in source {source_name:?} has label index {label_idx} but there are only {n_labels} labels. + OutOfRangeLabelIdx { + /// Source name. + source_name: String, + /// Variable name. + variable: String, + /// Index into variable's datum map. + datum_idx: usize, + /// Too-large index into labels. + label_idx: usize, + /// Number of labels. + n_labels: usize, + }, + /// Datum mapping {datum_idx} for variable {variable:?} in source {source_name:?} has value index {value_idx} but the variable has only {n_values} values. + OutOfRangeValueIdx { + /// Source name. + source_name: String, + /// Variable name. + variable: String, + /// Index into variable's datum map. + datum_idx: usize, + /// Too-large index into values. + value_idx: usize, + /// Number of values in `variable`. + n_values: usize, + }, +} + /// Legacy binary data. #[binread] #[br(little)] @@ -38,7 +79,10 @@ pub struct LegacyBin { impl LegacyBin { /// Decodes legacy binary data into a map from a series name to a map of /// variables, which in turn contains a vector of [DataValue]s. - pub fn decode(&self) -> IndexMap>> { + pub fn decode( + &self, + warn: &mut dyn FnMut(LegacyBinWarning), + ) -> IndexMap>> { let mut sources = IndexMap::new(); for (metadata, data) in self.metadata.iter().zip(&self.data) { let mut variables = IndexMap::new(); @@ -59,13 +103,40 @@ impl LegacyBin { } if let Some(strings) = &self.strings { for map in &strings.source_maps { - let source = sources.get_mut(&map.source_name).unwrap(); // XXX unwrap + let Some(source) = sources.get_mut(&map.source_name) else { + warn(LegacyBinWarning::UnknownSource(map.source_name.clone())); + continue; + }; for var_map in &map.variable_maps { - let variable = source.get_mut(&var_map.variable_name).unwrap(); // XXX unwrap - for datum_map in &var_map.datum_maps { - // XXX two possibly out-of-range indexes below - variable[datum_map.value_idx].value = - Datum::String(strings.labels[datum_map.label_idx].label.clone()); + let Some(variable) = source.get_mut(&var_map.variable_name) else { + warn(LegacyBinWarning::UnknownVariable { + source_name: map.source_name.clone(), + variable: var_map.variable_name.clone(), + }); + continue; + }; + for (datum_idx, datum_map) in var_map.datum_maps.iter().enumerate() { + let Some(label) = strings.labels.get(datum_map.label_idx) else { + warn(LegacyBinWarning::OutOfRangeLabelIdx { + source_name: map.source_name.clone(), + variable: var_map.variable_name.clone(), + datum_idx, + label_idx: datum_map.label_idx, + n_labels: strings.labels.len(), + }); + continue; + }; + let Some(value) = variable.get_mut(datum_map.value_idx) else { + warn(LegacyBinWarning::OutOfRangeValueIdx { + source_name: map.source_name.clone(), + variable: var_map.variable_name.clone(), + datum_idx, + value_idx: datum_map.value_idx, + n_values: variable.len(), + }); + continue; + }; + value.value = Datum::String(label.label.clone()); } } } @@ -279,18 +350,18 @@ struct Label { /// Parses a UTF-8 string preceded by a 32-bit length. #[binrw::parser(reader, endian)] -pub(super) fn parse_utf8_string() -> BinResult { +fn parse_utf8_string() -> BinResult { Ok(U32String::read_options(reader, endian, ())?.decode(UTF_8)) } /// Parses a UTF-8 string that is exactly `n` bytes long and whose contents end /// at the first null byte. #[binrw::parser(reader)] -pub(super) fn parse_fixed_utf8_string(n: usize) -> BinResult { +fn parse_fixed_utf8_string(n: usize) -> BinResult { let mut buf = vec![0; n]; reader.read_exact(&mut buf)?; - let len = buf.iter().take_while(|b| **b != 0).count(); - Ok( - std::str::from_utf8(&buf[..len]).unwrap().into(), // XXX unwrap - ) + if let Some(null) = buf.iter().position(|b| *b == 0) { + buf.truncate(null); + } + Ok(UTF_8.decode_without_bom_handling(&buf).0.into_owned()) } diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 5987b62b61..5b27c48eb3 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -202,7 +202,6 @@ impl Visualization { mut look: Look, ) -> Result { let mut extension = None; - let mut user_source = None; let mut source_variables = Vec::new(); let mut derived_variables = Vec::new(); let mut graph = None; @@ -212,7 +211,7 @@ impl Visualization { for child in &self.children { match child { VisChild::Extension(e) => extension = Some(e), - VisChild::UserSource(us) => user_source = Some(us), + VisChild::UserSource(_) => (), VisChild::SourceVariable(source_variable) => source_variables.push(source_variable), VisChild::DerivedVariable(derived_variable) => { derived_variables.push(derived_variable) @@ -244,9 +243,6 @@ impl Visualization { } } let Some(graph) = graph else { todo!() }; - let Some(_user_source) = user_source else { - todo!() - }; let mut axes = HashMap::new(); let mut major_ticks = HashMap::new(); diff --git a/rust/pspp/src/spv/read/structure.rs b/rust/pspp/src/spv/read/structure.rs index 9dcd90c524..ae7c11fce9 100644 --- a/rust/pspp/src/spv/read/structure.rs +++ b/rust/pspp/src/spv/read/structure.rs @@ -310,7 +310,12 @@ impl Table { "While parsing {bin_member_name:?} as legacy binary SPV member" )) })?; - let data = legacy_bin.decode(); + let data = legacy_bin.decode(&mut |w| { + warn(Warning { + member: bin_member_name.clone(), + details: WarningDetails::LegacyBinWarning(w), + }) + }); drop(bin_member); let member = BufReader::new(archive.by_name(&xml_member_name)?); -- 2.30.2