From 74e7bad286fa7387b24e36105b6d7ad81474ea20 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 21 Dec 2025 09:04:37 -0800 Subject: [PATCH] Work on reading legacy tables. --- rust/pspp/src/cli/show_spv.rs | 51 ++++++++++++++++++++++---- rust/pspp/src/output.rs | 12 +++--- rust/pspp/src/output/pivot/look_xml.rs | 12 +++--- rust/pspp/src/output/pivot/tlo.rs | 8 ++-- rust/pspp/src/spv.rs | 2 +- rust/pspp/src/spv/read.rs | 9 +++-- rust/pspp/src/spv/read/legacy_bin.rs | 18 ++++++++- rust/pspp/src/spv/read/legacy_xml.rs | 32 +++++++--------- 8 files changed, 93 insertions(+), 51 deletions(-) diff --git a/rust/pspp/src/cli/show_spv.rs b/rust/pspp/src/cli/show_spv.rs index 03acc6c0d4..e2b2868b92 100644 --- a/rust/pspp/src/cli/show_spv.rs +++ b/rust/pspp/src/cli/show_spv.rs @@ -15,9 +15,17 @@ // this program. If not, see . use anyhow::Result; +use binrw::{BinRead, error::ContextExt}; use clap::{Args, ValueEnum}; -use pspp::output::{Criteria, Item}; -use std::{fmt::Display, path::PathBuf}; +use pspp::{ + output::{Criteria, Item, ItemRefIterator, SpvMembers}, + spv::legacy_bin::LegacyBin, +}; +use std::{ + fmt::Display, + io::{Cursor, Read}, + path::PathBuf, +}; /// Show information about SPSS viewer files (SPV files). #[derive(Args, Clone, Debug)] @@ -116,13 +124,40 @@ impl ShowSpv { Ok(()) } Mode::LegacyData => { - let item = pspp::spv::ReadOptions::new(|e| eprintln!("{e}")) + let mut spv_file = pspp::spv::ReadOptions::new(|e| eprintln!("{e}")) .with_password(self.password) - .open_file(&self.input)? - .into_items(); - let items = self.criteria.apply(item); - for child in items {} - todo!() + .open_file(&self.input)?; + + let items = self.criteria.apply(spv_file.items); + for item in items { + for item in ItemRefIterator::with_hidden(&item) { + if let Some(spv_info) = dbg!(&item.spv_info) + && let Some(members) = &spv_info.members + && let SpvMembers::LegacyTable { xml: _, binary } = &members + { + let mut bin_member = spv_file.archive.by_name(&binary)?; + let mut bin_data = Vec::with_capacity(bin_member.size() as usize); + bin_member.read_to_end(&mut bin_data)?; + let mut cursor = Cursor::new(bin_data); + let legacy_bin = LegacyBin::read(&mut cursor).map_err(|e| { + e.with_message(format!( + "While parsing {binary:?} as legacy binary SPV member" + )) + })?; + let data = legacy_bin.decode(); + for (source, variables) in &data { + println!("source {source:?}"); + for (variable, values) in variables { + println!("\tvariable {variable:?}"); + for (index, value) in values.into_iter().enumerate() { + println!("\t\t{index} = {:?}", &value.value); + } + } + } + } + } + } + Ok(()) } Mode::GetTableLook => todo!(), Mode::ConvertTableLook => todo!(), diff --git a/rust/pspp/src/output.rs b/rust/pspp/src/output.rs index d0b29c3aab..76b45cb9f9 100644 --- a/rust/pspp/src/output.rs +++ b/rust/pspp/src/output.rs @@ -645,13 +645,13 @@ impl SpvInfo { /// Identifies ZIP file members for one kind of output item in an SPV file. #[derive(Clone, Debug)] pub enum SpvMembers { - /// Light detail members. - Light( + /// Light table detail members. + LightTable( /// `.bin` member name. String, ), - /// Legacy detail members. - Legacy { + /// Legacy table detail members. + LegacyTable { /// `.xml` member name. xml: String, /// `.bin` member name. @@ -676,8 +676,8 @@ pub enum SpvMembers { impl SpvMembers { pub fn iter(&self) -> impl Iterator { let (a, b, c) = match self { - SpvMembers::Light(a) => (Some(a), None, None), - SpvMembers::Legacy { xml, binary } => (Some(xml), Some(binary), None), + SpvMembers::LightTable(a) => (Some(a), None, None), + SpvMembers::LegacyTable { xml, binary } => (Some(xml), Some(binary), None), SpvMembers::Image(a) => (Some(a), None, None), SpvMembers::Graph { data, xml, csv } => (data.as_ref(), Some(xml), csv.as_ref()), }; diff --git a/rust/pspp/src/output/pivot/look_xml.rs b/rust/pspp/src/output/pivot/look_xml.rs index a65fe438e2..76eb9c2a47 100644 --- a/rust/pspp/src/output/pivot/look_xml.rs +++ b/rust/pspp/src/output/pivot/look_xml.rs @@ -19,13 +19,11 @@ use std::{fmt::Debug, num::ParseFloatError, str::FromStr}; use enum_map::enum_map; use serde::{Deserialize, de::Visitor}; -use crate::{ - output::pivot::{ - Axis2, FootnoteMarkerPosition, FootnoteMarkerType, - look::{ - self, Area, AreaStyle, Border, BorderStyle, BoxBorder, Color, HeadingRegion, HorzAlign, - LabelPosition, Look, RowColBorder, RowParity, VertAlign, - }, +use crate::output::pivot::{ + Axis2, FootnoteMarkerPosition, FootnoteMarkerType, + look::{ + self, Area, AreaStyle, Border, BorderStyle, BoxBorder, Color, HeadingRegion, HorzAlign, + LabelPosition, Look, RowColBorder, RowParity, VertAlign, }, }; use thiserror::Error as ThisError; diff --git a/rust/pspp/src/output/pivot/tlo.rs b/rust/pspp/src/output/pivot/tlo.rs index a7efd856e2..28f497232c 100644 --- a/rust/pspp/src/output/pivot/tlo.rs +++ b/rust/pspp/src/output/pivot/tlo.rs @@ -16,11 +16,9 @@ use std::{fmt::Debug, io::Cursor}; -use crate::{ - output::pivot::{ - Axis2, FootnoteMarkerPosition, FootnoteMarkerType, - look::{self, Border, BoxBorder, HeadingRegion, LabelPosition, RowColBorder}, - }, +use crate::output::pivot::{ + Axis2, FootnoteMarkerPosition, FootnoteMarkerType, + look::{self, Border, BoxBorder, HeadingRegion, LabelPosition, RowColBorder}, }; use crate::output::pivot::look::{Area, BorderStyle, Color, HorzAlign, Look, Stroke, VertAlign}; diff --git a/rust/pspp/src/spv.rs b/rust/pspp/src/spv.rs index 0e34b9f383..a9868f7041 100644 --- a/rust/pspp/src/spv.rs +++ b/rust/pspp/src/spv.rs @@ -30,5 +30,5 @@ mod read; mod write; -pub use read::{Error, ReadOptions, SpvFile, html}; +pub use read::{Error, ReadOptions, SpvFile, html, legacy_bin}; pub use write::Writer; diff --git a/rust/pspp/src/spv/read.rs b/rust/pspp/src/spv/read.rs index 611dd70ff2..485f859ef5 100644 --- a/rust/pspp/src/spv/read.rs +++ b/rust/pspp/src/spv/read.rs @@ -47,7 +47,7 @@ use crate::{ mod css; pub mod html; -mod legacy_bin; +pub mod legacy_bin; mod legacy_xml; mod light; @@ -737,8 +737,9 @@ impl Table { })?; let pivot_table = table.decode(&mut *warning.borrow_mut()); Ok(pivot_table.into_item().with_spv_info( - SpvInfo::new(structure_member) - .with_members(SpvMembers::Light(self.table_structure.data_path.clone())), + SpvInfo::new(structure_member).with_members(SpvMembers::LightTable( + self.table_structure.data_path.clone(), + )), )) } Some(xml_member_name) => { @@ -772,7 +773,7 @@ impl Table { )?; Ok(pivot_table.into_item().with_spv_info( - SpvInfo::new(structure_member).with_members(SpvMembers::Legacy { + SpvInfo::new(structure_member).with_members(SpvMembers::LegacyTable { xml: xml_member_name.clone(), binary: bin_member_name.clone(), }), diff --git a/rust/pspp/src/spv/read/legacy_bin.rs b/rust/pspp/src/spv/read/legacy_bin.rs index a28da59cf4..288c370701 100644 --- a/rust/pspp/src/spv/read/legacy_bin.rs +++ b/rust/pspp/src/spv/read/legacy_bin.rs @@ -1,3 +1,4 @@ +//! Legacy binary data. use std::{ collections::HashMap, io::{Read, Seek, SeekFrom}, @@ -14,6 +15,7 @@ use crate::{ spv::read::light::{U32String, decode_format, parse_vec}, }; +/// Legacy binary data. #[binread] #[br(little)] #[derive(Debug)] @@ -33,6 +35,8 @@ pub struct LegacyBin { } impl LegacyBin { + /// Decodes legacy binary data into a map from a series name to a map of + /// variables, which in turn contains a vector of [DataValue]s. pub fn decode(&self) -> HashMap>> { let mut sources = HashMap::new(); for (metadata, data) in self.metadata.iter().zip(&self.data) { @@ -69,13 +73,21 @@ impl LegacyBin { } } +/// One data value. #[derive(Clone, Debug)] pub struct DataValue { + /// Optional index. + /// + /// This is always `None` as initially decoded. pub index: Option, + + /// Data value. pub value: Datum, } impl DataValue { + /// Category index, if any. This is either the numeric value of the datum, + /// if there is one, falling back to the index. pub fn category(&self) -> Option { match &self.value { Datum::Number(number) => *number, @@ -84,7 +96,10 @@ impl DataValue { .and_then(|v| (v >= 0.0 && v < usize::MAX as f64).then_some(v as usize)) } - // This should probably be a method on some hypothetical FormatMap. + /// Interprets this data value as a [Format], first by looking it up in + /// `format_map` and otherwise by interpreting it as a [Format] directly. + /// + /// This should probably be a method on some hypothetical FormatMap. pub fn as_format(&self, format_map: &HashMap) -> Format { let f = match &self.value { Datum::Number(Some(number)) => *number as i64, @@ -97,6 +112,7 @@ impl DataValue { } } + /// Returns this data value interpreted using `format`. pub fn as_pivot_value(&self, format: Format) -> Value { if format.type_().category() == Category::Date && let Some(s) = self.value.as_string() diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index 1c5de47f69..b1eba2b86e 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -429,7 +429,7 @@ impl Visualization { .map(|(series, _level)| *series) .collect::>(); - #[derive(Clone)] + #[derive(Clone, Debug)] struct CatBuilder { /// The category we've built so far. category: Category, @@ -451,6 +451,9 @@ impl Visualization { let Some(row) = value.category() else { continue; }; + if row != cats.len() { + continue; + } coordinate_to_index.insert(row, CategoryLocator::new_leaf(index)); let name = variables[0].new_name(value, footnotes); cats.push(CatBuilder { @@ -462,21 +465,21 @@ impl Visualization { *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index; // Now group them, in one pass per grouping variable, innermost first. - for j in 1..variables.len() { + for variable in &variables[1..] { let mut coordinate_to_index = HashMap::new(); let mut next_cats = Vec::with_capacity(cats.len()); let mut start = 0; for end in 1..=cats.len() { - let dv1 = &variables[j].values[cats[start].leaves.start]; + let dv1 = &variable.values[cats[start].leaves.start]; if end < cats.len() - && variables[j].values[cats[end].leaves.clone()] + && variable.values[cats[end].leaves.clone()] .iter() .all(|dv| &dv.value == &dv1.value) { } else { - let name = variables[j].map.lookup(dv1); + let name = variable.map.lookup(dv1); let next_cat = if end - start > 1 || name.is_number_or(|s| s.is_empty()) { - let name = variables[j].new_name(dv1, footnotes); + let name = variable.new_name(dv1, footnotes); let mut group = Group::new(name); for i in start..end { group.push(cats[i].category.clone()); @@ -495,7 +498,7 @@ impl Visualization { start = end; } } - *variables[j].coordinate_to_index.borrow_mut() = coordinate_to_index; + *variable.coordinate_to_index.borrow_mut() = coordinate_to_index; cats = next_cats; } @@ -658,17 +661,7 @@ impl Visualization { coords.clear(); for dim in &dims { // XXX indexing of values, and unwrap - let coordinate = dim.coordinate.values[i].category().unwrap(); - let Some(index) = dim - .coordinate - .coordinate_to_index - .borrow() - .get(&coordinate) - .and_then(CategoryLocator::as_leaf) - else { - panic!("can't find {coordinate}") // XXX - }; - coords.push(index); + coords.push(dim.coordinate.values[i].category().unwrap()); } let format = if let Some(cell_formats) = &cell_formats { @@ -2337,7 +2330,7 @@ struct Labeling { #[serde(rename = "@variable")] variable: String, - #[serde(default)] + #[serde(rename = "$value", default)] children: Vec, } @@ -2376,6 +2369,7 @@ struct Formatting { #[serde(rename = "@variable")] variable: String, + #[serde(rename = "$value")] mappings: Vec, } -- 2.30.2