Work on reading legacy tables.
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 21 Dec 2025 17:04:37 +0000 (09:04 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 21 Dec 2025 17:04:37 +0000 (09:04 -0800)
rust/pspp/src/cli/show_spv.rs
rust/pspp/src/output.rs
rust/pspp/src/output/pivot/look_xml.rs
rust/pspp/src/output/pivot/tlo.rs
rust/pspp/src/spv.rs
rust/pspp/src/spv/read.rs
rust/pspp/src/spv/read/legacy_bin.rs
rust/pspp/src/spv/read/legacy_xml.rs

index 03acc6c0d48da4a7efbc3a95ef84beff06467aea..e2b2868b92f4560c32f06d21cea908944b83d846 100644 (file)
 // this program.  If not, see <http://www.gnu.org/licenses/>.
 
 use anyhow::Result;
+use binrw::{BinRead, error::ContextExt};
 use clap::{Args, ValueEnum};
-use pspp::output::{Criteria, Item};
-use std::{fmt::Display, path::PathBuf};
+use pspp::{
+    output::{Criteria, Item, ItemRefIterator, SpvMembers},
+    spv::legacy_bin::LegacyBin,
+};
+use std::{
+    fmt::Display,
+    io::{Cursor, Read},
+    path::PathBuf,
+};
 
 /// Show information about SPSS viewer files (SPV files).
 #[derive(Args, Clone, Debug)]
@@ -116,13 +124,40 @@ impl ShowSpv {
                 Ok(())
             }
             Mode::LegacyData => {
-                let item = pspp::spv::ReadOptions::new(|e| eprintln!("{e}"))
+                let mut spv_file = pspp::spv::ReadOptions::new(|e| eprintln!("{e}"))
                     .with_password(self.password)
-                    .open_file(&self.input)?
-                    .into_items();
-                let items = self.criteria.apply(item);
-                for child in items {}
-                todo!()
+                    .open_file(&self.input)?;
+
+                let items = self.criteria.apply(spv_file.items);
+                for item in items {
+                    for item in ItemRefIterator::with_hidden(&item) {
+                        if let Some(spv_info) = dbg!(&item.spv_info)
+                            && let Some(members) = &spv_info.members
+                            && let SpvMembers::LegacyTable { xml: _, binary } = &members
+                        {
+                            let mut bin_member = spv_file.archive.by_name(&binary)?;
+                            let mut bin_data = Vec::with_capacity(bin_member.size() as usize);
+                            bin_member.read_to_end(&mut bin_data)?;
+                            let mut cursor = Cursor::new(bin_data);
+                            let legacy_bin = LegacyBin::read(&mut cursor).map_err(|e| {
+                                e.with_message(format!(
+                                    "While parsing {binary:?} as legacy binary SPV member"
+                                ))
+                            })?;
+                            let data = legacy_bin.decode();
+                            for (source, variables) in &data {
+                                println!("source {source:?}");
+                                for (variable, values) in variables {
+                                    println!("\tvariable {variable:?}");
+                                    for (index, value) in values.into_iter().enumerate() {
+                                        println!("\t\t{index} = {:?}", &value.value);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                Ok(())
             }
             Mode::GetTableLook => todo!(),
             Mode::ConvertTableLook => todo!(),
index d0b29c3aab34ae945a56099364f0e440cfd6a42e..76b45cb9f980a992c39d3a7c75b0f9b4042c02cc 100644 (file)
@@ -645,13 +645,13 @@ impl SpvInfo {
 /// Identifies ZIP file members for one kind of output item in an SPV file.
 #[derive(Clone, Debug)]
 pub enum SpvMembers {
-    /// Light detail members.
-    Light(
+    /// Light table detail members.
+    LightTable(
         /// `.bin` member name.
         String,
     ),
-    /// Legacy detail members.
-    Legacy {
+    /// Legacy table detail members.
+    LegacyTable {
         /// `.xml` member name.
         xml: String,
         /// `.bin` member name.
@@ -676,8 +676,8 @@ pub enum SpvMembers {
 impl SpvMembers {
     pub fn iter(&self) -> impl Iterator<Item = &str> {
         let (a, b, c) = match self {
-            SpvMembers::Light(a) => (Some(a), None, None),
-            SpvMembers::Legacy { xml, binary } => (Some(xml), Some(binary), None),
+            SpvMembers::LightTable(a) => (Some(a), None, None),
+            SpvMembers::LegacyTable { xml, binary } => (Some(xml), Some(binary), None),
             SpvMembers::Image(a) => (Some(a), None, None),
             SpvMembers::Graph { data, xml, csv } => (data.as_ref(), Some(xml), csv.as_ref()),
         };
index a65fe438e24c36d6a774499806c11411adcfd079..76eb9c2a4749fc8d7da65491abf5776dbbe4148c 100644 (file)
@@ -19,13 +19,11 @@ use std::{fmt::Debug, num::ParseFloatError, str::FromStr};
 use enum_map::enum_map;
 use serde::{Deserialize, de::Visitor};
 
-use crate::{
-    output::pivot::{
-        Axis2, FootnoteMarkerPosition, FootnoteMarkerType,
-        look::{
-            self, Area, AreaStyle, Border, BorderStyle, BoxBorder, Color, HeadingRegion, HorzAlign,
-            LabelPosition, Look, RowColBorder, RowParity, VertAlign,
-        },
+use crate::output::pivot::{
+    Axis2, FootnoteMarkerPosition, FootnoteMarkerType,
+    look::{
+        self, Area, AreaStyle, Border, BorderStyle, BoxBorder, Color, HeadingRegion, HorzAlign,
+        LabelPosition, Look, RowColBorder, RowParity, VertAlign,
     },
 };
 use thiserror::Error as ThisError;
index a7efd856e205fd5fa499560f2b135f80d47d1e2d..28f497232cf079fe2f028ee6d24065c043e8c4c1 100644 (file)
 
 use std::{fmt::Debug, io::Cursor};
 
-use crate::{
-    output::pivot::{
-        Axis2, FootnoteMarkerPosition, FootnoteMarkerType,
-        look::{self, Border, BoxBorder, HeadingRegion, LabelPosition, RowColBorder},
-    },
+use crate::output::pivot::{
+    Axis2, FootnoteMarkerPosition, FootnoteMarkerType,
+    look::{self, Border, BoxBorder, HeadingRegion, LabelPosition, RowColBorder},
 };
 
 use crate::output::pivot::look::{Area, BorderStyle, Color, HorzAlign, Look, Stroke, VertAlign};
index 0e34b9f383c6f35aceb465b51f71fd7152da719e..a9868f70416b66358b27202b3746747843165c38 100644 (file)
@@ -30,5 +30,5 @@
 mod read;
 mod write;
 
-pub use read::{Error, ReadOptions, SpvFile, html};
+pub use read::{Error, ReadOptions, SpvFile, html, legacy_bin};
 pub use write::Writer;
index 611dd70ff264b11dc85a19f662ca958b983a38c8..485f859ef51219e8b5b132168f600c2c83775c3e 100644 (file)
@@ -47,7 +47,7 @@ use crate::{
 
 mod css;
 pub mod html;
-mod legacy_bin;
+pub mod legacy_bin;
 mod legacy_xml;
 mod light;
 
@@ -737,8 +737,9 @@ impl Table {
                     })?;
                 let pivot_table = table.decode(&mut *warning.borrow_mut());
                 Ok(pivot_table.into_item().with_spv_info(
-                    SpvInfo::new(structure_member)
-                        .with_members(SpvMembers::Light(self.table_structure.data_path.clone())),
+                    SpvInfo::new(structure_member).with_members(SpvMembers::LightTable(
+                        self.table_structure.data_path.clone(),
+                    )),
                 ))
             }
             Some(xml_member_name) => {
@@ -772,7 +773,7 @@ impl Table {
                 )?;
 
                 Ok(pivot_table.into_item().with_spv_info(
-                    SpvInfo::new(structure_member).with_members(SpvMembers::Legacy {
+                    SpvInfo::new(structure_member).with_members(SpvMembers::LegacyTable {
                         xml: xml_member_name.clone(),
                         binary: bin_member_name.clone(),
                     }),
index a28da59cf4856d9fa255058aac20a179af001217..288c3707017b2b05c05162bd22088c3f88b1af53 100644 (file)
@@ -1,3 +1,4 @@
+//! Legacy binary data.
 use std::{
     collections::HashMap,
     io::{Read, Seek, SeekFrom},
@@ -14,6 +15,7 @@ use crate::{
     spv::read::light::{U32String, decode_format, parse_vec},
 };
 
+/// Legacy binary data.
 #[binread]
 #[br(little)]
 #[derive(Debug)]
@@ -33,6 +35,8 @@ pub struct LegacyBin {
 }
 
 impl LegacyBin {
+    /// Decodes legacy binary data into a map from a series name to a map of
+    /// variables, which in turn contains a vector of [DataValue]s.
     pub fn decode(&self) -> HashMap<String, HashMap<String, Vec<DataValue>>> {
         let mut sources = HashMap::new();
         for (metadata, data) in self.metadata.iter().zip(&self.data) {
@@ -69,13 +73,21 @@ impl LegacyBin {
     }
 }
 
+/// One data value.
 #[derive(Clone, Debug)]
 pub struct DataValue {
+    /// Optional index.
+    ///
+    /// This is always `None` as initially decoded.
     pub index: Option<f64>,
+
+    /// Data value.
     pub value: Datum<String>,
 }
 
 impl DataValue {
+    /// Category index, if any.  This is either the numeric value of the datum,
+    /// if there is one, falling back to the index.
     pub fn category(&self) -> Option<usize> {
         match &self.value {
             Datum::Number(number) => *number,
@@ -84,7 +96,10 @@ impl DataValue {
         .and_then(|v| (v >= 0.0 && v < usize::MAX as f64).then_some(v as usize))
     }
 
-    // This should probably be a method on some hypothetical FormatMap.
+    /// Interprets this data value as a [Format], first by looking it up in
+    /// `format_map` and otherwise by interpreting it as a [Format] directly.
+    ///
+    /// This should probably be a method on some hypothetical FormatMap.
     pub fn as_format(&self, format_map: &HashMap<i64, Format>) -> Format {
         let f = match &self.value {
             Datum::Number(Some(number)) => *number as i64,
@@ -97,6 +112,7 @@ impl DataValue {
         }
     }
 
+    /// Returns this data value interpreted using `format`.
     pub fn as_pivot_value(&self, format: Format) -> Value {
         if format.type_().category() == Category::Date
             && let Some(s) = self.value.as_string()
index 1c5de47f69d7181750c31cd2cebf0432337f100a..b1eba2b86e8250e38f475e93c3e5af0110f6a274 100644 (file)
@@ -429,7 +429,7 @@ impl Visualization {
                 .map(|(series, _level)| *series)
                 .collect::<Vec<_>>();
 
-            #[derive(Clone)]
+            #[derive(Clone, Debug)]
             struct CatBuilder {
                 /// The category we've built so far.
                 category: Category,
@@ -451,6 +451,9 @@ impl Visualization {
                 let Some(row) = value.category() else {
                     continue;
                 };
+                if row != cats.len() {
+                    continue;
+                }
                 coordinate_to_index.insert(row, CategoryLocator::new_leaf(index));
                 let name = variables[0].new_name(value, footnotes);
                 cats.push(CatBuilder {
@@ -462,21 +465,21 @@ impl Visualization {
             *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index;
 
             // Now group them, in one pass per grouping variable, innermost first.
-            for j in 1..variables.len() {
+            for variable in &variables[1..] {
                 let mut coordinate_to_index = HashMap::new();
                 let mut next_cats = Vec::with_capacity(cats.len());
                 let mut start = 0;
                 for end in 1..=cats.len() {
-                    let dv1 = &variables[j].values[cats[start].leaves.start];
+                    let dv1 = &variable.values[cats[start].leaves.start];
                     if end < cats.len()
-                        && variables[j].values[cats[end].leaves.clone()]
+                        && variable.values[cats[end].leaves.clone()]
                             .iter()
                             .all(|dv| &dv.value == &dv1.value)
                     {
                     } else {
-                        let name = variables[j].map.lookup(dv1);
+                        let name = variable.map.lookup(dv1);
                         let next_cat = if end - start > 1 || name.is_number_or(|s| s.is_empty()) {
-                            let name = variables[j].new_name(dv1, footnotes);
+                            let name = variable.new_name(dv1, footnotes);
                             let mut group = Group::new(name);
                             for i in start..end {
                                 group.push(cats[i].category.clone());
@@ -495,7 +498,7 @@ impl Visualization {
                         start = end;
                     }
                 }
-                *variables[j].coordinate_to_index.borrow_mut() = coordinate_to_index;
+                *variable.coordinate_to_index.borrow_mut() = coordinate_to_index;
                 cats = next_cats;
             }
 
@@ -658,17 +661,7 @@ impl Visualization {
             coords.clear();
             for dim in &dims {
                 // XXX indexing of values, and unwrap
-                let coordinate = dim.coordinate.values[i].category().unwrap();
-                let Some(index) = dim
-                    .coordinate
-                    .coordinate_to_index
-                    .borrow()
-                    .get(&coordinate)
-                    .and_then(CategoryLocator::as_leaf)
-                else {
-                    panic!("can't find {coordinate}") // XXX
-                };
-                coords.push(index);
+                coords.push(dim.coordinate.values[i].category().unwrap());
             }
 
             let format = if let Some(cell_formats) = &cell_formats {
@@ -2337,7 +2330,7 @@ struct Labeling {
     #[serde(rename = "@variable")]
     variable: String,
 
-    #[serde(default)]
+    #[serde(rename = "$value", default)]
     children: Vec<LabelingChild>,
 }
 
@@ -2376,6 +2369,7 @@ struct Formatting {
     #[serde(rename = "@variable")]
     variable: String,
 
+    #[serde(rename = "$value")]
     mappings: Vec<FormatMapping>,
 }