work on legacy output
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 21 Dec 2025 18:13:24 +0000 (10:13 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 21 Dec 2025 18:13:24 +0000 (10:13 -0800)
rust/pspp/src/spv/read/legacy_xml.rs

index b1eba2b86e8250e38f475e93c3e5af0110f6a274..3ce50353a24fcac888aae63131b0d9e3fe630276 100644 (file)
@@ -27,6 +27,7 @@ use std::{
 use chrono::{NaiveDateTime, NaiveTime};
 use enum_map::{Enum, EnumMap};
 use hashbrown::HashSet;
+use itertools::Itertools;
 use ordered_float::OrderedFloat;
 use serde::Deserialize;
 
@@ -434,6 +435,9 @@ impl Visualization {
                 /// The category we've built so far.
                 category: Category,
 
+                /// The index in the series of one example of this category.
+                index: usize,
+
                 /// The range of leaf indexes covered by `category`.
                 ///
                 /// If `category` is a leaf, the range has a length of 1.
@@ -446,21 +450,21 @@ impl Visualization {
 
             // Make leaf categories.
             let mut coordinate_to_index = HashMap::new();
-            let mut cats = Vec::new();
+            let mut cats = Vec::<CatBuilder>::new();
             for (index, value) in variables[0].values.iter().enumerate() {
-                let Some(row) = value.category() else {
-                    continue;
-                };
-                if row != cats.len() {
-                    continue;
+                if let Some(coordinate) = value.category()
+                    && !coordinate_to_index.contains_key(&coordinate)
+                {
+                    coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len()));
+                    cats.push(CatBuilder {
+                        category: Category::from(Leaf::new(
+                            variables[0].new_name(value, footnotes),
+                        )),
+                        index,
+                        leaves: cats.len()..cats.len() + 1,
+                        location: CategoryLocator::new_leaf(cats.len()),
+                    });
                 }
-                coordinate_to_index.insert(row, CategoryLocator::new_leaf(index));
-                let name = variables[0].new_name(value, footnotes);
-                cats.push(CatBuilder {
-                    category: Category::from(Leaf::new(name)),
-                    leaves: cats.len()..cats.len() + 1,
-                    location: CategoryLocator::new_leaf(cats.len()),
-                });
             }
             *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index;
 
@@ -470,15 +474,12 @@ impl Visualization {
                 let mut next_cats = Vec::with_capacity(cats.len());
                 let mut start = 0;
                 for end in 1..=cats.len() {
-                    let dv1 = &variable.values[cats[start].leaves.start];
-                    if end < cats.len()
-                        && variable.values[cats[end].leaves.clone()]
-                            .iter()
-                            .all(|dv| &dv.value == &dv1.value)
-                    {
+                    let dv1 = &variable.values[cats[start].index];
+                    if end < cats.len() && &variable.values[cats[end].index].value == &dv1.value {
+                        println!("categories {start}..={end} have same value {dv1:?}");
                     } else {
                         let name = variable.map.lookup(dv1);
-                        let next_cat = if end - start > 1 || name.is_number_or(|s| s.is_empty()) {
+                        let next_cat = if end - start > 1 || name.is_number_or(|s| !s.is_empty()) {
                             let name = variable.new_name(dv1, footnotes);
                             let mut group = Group::new(name);
                             for i in start..end {
@@ -486,6 +487,7 @@ impl Visualization {
                             }
                             CatBuilder {
                                 category: Category::from(group),
+                                index: cats[start].index,
                                 leaves: cats[start].leaves.start..cats[end - 1].leaves.end,
                                 location: cats[start].location.parent(),
                             }
@@ -661,7 +663,22 @@ impl Visualization {
             coords.clear();
             for dim in &dims {
                 // XXX indexing of values, and unwrap
-                coords.push(dim.coordinate.values[i].category().unwrap());
+                let coordinate = dim.coordinate.values[i].category().unwrap();
+                let Some(index) = dim
+                    .coordinate
+                    .coordinate_to_index
+                    .borrow()
+                    .get(&coordinate)
+                    .and_then(CategoryLocator::as_leaf)
+                else {
+                    panic!("can't find {coordinate}") // XXX
+                };
+                debug_assert!(
+                    index < dim.dimension.len(),
+                    "{index}, {}",
+                    dim.dimension.len()
+                );
+                coords.push(index);
             }
 
             let format = if let Some(cell_formats) = &cell_formats {