From 6542f06844b7efa1020bc561a493a182a923249f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 21 Dec 2025 10:13:24 -0800 Subject: [PATCH] work on legacy output --- rust/pspp/src/spv/read/legacy_xml.rs | 59 ++++++++++++++++++---------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index b1eba2b86e..3ce50353a2 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -27,6 +27,7 @@ use std::{ use chrono::{NaiveDateTime, NaiveTime}; use enum_map::{Enum, EnumMap}; use hashbrown::HashSet; +use itertools::Itertools; use ordered_float::OrderedFloat; use serde::Deserialize; @@ -434,6 +435,9 @@ impl Visualization { /// The category we've built so far. category: Category, + /// The index in the series of one example of this category. + index: usize, + /// The range of leaf indexes covered by `category`. /// /// If `category` is a leaf, the range has a length of 1. @@ -446,21 +450,21 @@ impl Visualization { // Make leaf categories. let mut coordinate_to_index = HashMap::new(); - let mut cats = Vec::new(); + let mut cats = Vec::::new(); for (index, value) in variables[0].values.iter().enumerate() { - let Some(row) = value.category() else { - continue; - }; - if row != cats.len() { - continue; + if let Some(coordinate) = value.category() + && !coordinate_to_index.contains_key(&coordinate) + { + coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len())); + cats.push(CatBuilder { + category: Category::from(Leaf::new( + variables[0].new_name(value, footnotes), + )), + index, + leaves: cats.len()..cats.len() + 1, + location: CategoryLocator::new_leaf(cats.len()), + }); } - coordinate_to_index.insert(row, CategoryLocator::new_leaf(index)); - let name = variables[0].new_name(value, footnotes); - cats.push(CatBuilder { - category: Category::from(Leaf::new(name)), - leaves: cats.len()..cats.len() + 1, - location: CategoryLocator::new_leaf(cats.len()), - }); } *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index; @@ -470,15 +474,12 @@ impl Visualization { let mut next_cats = Vec::with_capacity(cats.len()); let mut start = 0; for end in 1..=cats.len() { - let dv1 = &variable.values[cats[start].leaves.start]; - if end < cats.len() - && variable.values[cats[end].leaves.clone()] - .iter() - .all(|dv| &dv.value == &dv1.value) - { + let dv1 = &variable.values[cats[start].index]; + if end < cats.len() && &variable.values[cats[end].index].value == &dv1.value { + println!("categories {start}..={end} have same value {dv1:?}"); } else { let name = variable.map.lookup(dv1); - let next_cat = if end - start > 1 || name.is_number_or(|s| s.is_empty()) { + let next_cat = if end - start > 1 || name.is_number_or(|s| !s.is_empty()) { let name = variable.new_name(dv1, footnotes); let mut group = Group::new(name); for i in start..end { @@ -486,6 +487,7 @@ impl Visualization { } CatBuilder { category: Category::from(group), + index: cats[start].index, leaves: cats[start].leaves.start..cats[end - 1].leaves.end, location: cats[start].location.parent(), } @@ -661,7 +663,22 @@ impl Visualization { coords.clear(); for dim in &dims { // XXX indexing of values, and unwrap - coords.push(dim.coordinate.values[i].category().unwrap()); + let coordinate = dim.coordinate.values[i].category().unwrap(); + let Some(index) = dim + .coordinate + .coordinate_to_index + .borrow() + .get(&coordinate) + .and_then(CategoryLocator::as_leaf) + else { + panic!("can't find {coordinate}") // XXX + }; + debug_assert!( + index < dim.dimension.len(), + "{index}, {}", + dim.dimension.len() + ); + coords.push(index); } let format = if let Some(cell_formats) = &cell_formats { -- 2.30.2