more fixes
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 27 Dec 2025 17:34:00 +0000 (09:34 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 27 Dec 2025 17:34:00 +0000 (09:34 -0800)
rust/pspp/src/spv/read/legacy_xml.rs
rust/pspp/src/spv/read/tests.rs
rust/pspp/src/spv/testdata/legacy13.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy13.spv [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy14.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy14.spv [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy15.expected [new file with mode: 0644]
rust/pspp/src/spv/testdata/legacy15.spv [new file with mode: 0644]

index c7fda83c11782b095b6b64dc7ad3b43be95d25ca..99510240a209623dd5313cacf130b8dbd0a397c5 100644 (file)
@@ -453,28 +453,29 @@ impl Visualization {
             }
 
             // Make leaf categories.
-            let mut coordinate_to_index = HashMap::new();
-            let mut cats = Vec::<CatBuilder>::new();
+            let mut map = BTreeMap::new();
             for (index, value) in variables[0].values.iter().enumerate() {
-                if let Some(coordinate) = value.category()
-                    && !coordinate_to_index.contains_key(&coordinate)
-                {
-                    coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len()));
-                    cats.push(CatBuilder {
-                        category: Category::from(Leaf::new(
-                            variables[0].new_name(value, footnotes),
-                        )),
-                        index,
-                        leaves: cats.len()..cats.len() + 1,
-                        location: CategoryLocator::new_leaf(cats.len()),
-                    });
+                if let Some(coordinate) = value.category() {
+                    map.entry(coordinate).or_insert(index);
                 }
             }
+            let mut coordinate_to_index = BTreeMap::new();
+            let mut cats = Vec::<CatBuilder>::new();
+            for (coordinate, index) in map {
+                let value = &variables[0].values[index];
+                coordinate_to_index.insert(coordinate, CategoryLocator::new_leaf(cats.len()));
+                cats.push(CatBuilder {
+                    category: Category::from(Leaf::new(variables[0].new_name(value, footnotes))),
+                    index,
+                    leaves: cats.len()..cats.len() + 1,
+                    location: CategoryLocator::new_leaf(cats.len()),
+                });
+            }
             *variables[0].coordinate_to_index.borrow_mut() = coordinate_to_index;
 
             // Now group them, in one pass per grouping variable, innermost first.
             for variable in &variables[1..] {
-                let mut coordinate_to_index = HashMap::new();
+                let mut coordinate_to_index = BTreeMap::new();
                 let mut next_cats = Vec::with_capacity(cats.len());
                 let mut start = 0;
                 for end in 1..=cats.len() {
@@ -1020,7 +1021,7 @@ struct Series {
     values: Vec<DataValue>,
     map: Map,
     affixes: Vec<Affix>,
-    coordinate_to_index: RefCell<HashMap<usize, CategoryLocator>>,
+    coordinate_to_index: RefCell<BTreeMap<usize, CategoryLocator>>,
     dimension_index: Cell<Option<usize>>,
 }
 
index e9cc08ec64428cc8fedfe5b0f45fa3aee8bf492d..d7cb877056a6c710a72470be53e343963d3b4d9f 100644 (file)
@@ -95,6 +95,13 @@ fn legacy14() {
     test_raw_spvfile("legacy14");
 }
 
+/// Checks that categories are ordered correctly when the first row has some
+/// missing cells (in this case, "Beta" lacks a value in the first row).
+#[test]
+fn legacy15() {
+    test_raw_spvfile("legacy15");
+}
+
 fn test_raw_spvfile(name: &str) {
     let input_filename = Path::new("src/spv/testdata")
         .join(name)
diff --git a/rust/pspp/src/spv/testdata/legacy13.expected b/rust/pspp/src/spv/testdata/legacy13.expected
new file mode 100644 (file)
index 0000000..7e60923
--- /dev/null
@@ -0,0 +1,13 @@
+                                             Chi-Square Tests
+╭────────────────────────────┬────────┬──┬─────────────────────┬────────────────────┬────────────────────╮
+│                            │  Value │df│Asymp. Sig. (2-sided)│Exact Sig. (2-sided)│Exact Sig. (1-sided)│
+├────────────────────────────┼────────┼──┼─────────────────────┼────────────────────┼────────────────────┤
+│Pearson Chi-Square          │9.479[a]│ 1│                 .002│                    │                    │
+│Continuity Correction[b]    │   7.898│ 1│                 .005│                    │                    │
+│Likelihood Ratio            │   9.731│ 1│                 .002│                    │                    │
+│Fisher's Exact Test         │        │  │                     │                .003│                .002│
+│Linear-by-Linear Association│   9.321│ 1│                 .002│                    │                    │
+│N of Valid Cases[b]         │      60│  │                     │                    │                    │
+╰────────────────────────────┴────────┴──┴─────────────────────┴────────────────────┴────────────────────╯
+a. 0 cells (.0%) have expected count less than 5. The minimum expected count is 10.27.
+b. Computed only for a 2x2 table
diff --git a/rust/pspp/src/spv/testdata/legacy13.spv b/rust/pspp/src/spv/testdata/legacy13.spv
new file mode 100644 (file)
index 0000000..a76403f
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy13.spv differ
diff --git a/rust/pspp/src/spv/testdata/legacy14.expected b/rust/pspp/src/spv/testdata/legacy14.expected
new file mode 100644 (file)
index 0000000..7494300
--- /dev/null
@@ -0,0 +1,13 @@
+                            Frequencies
+                 Frequency│Percent│Valid Percent│Cumulative Percent
+──────────────────────────┼───────┼─────────────┼──────────────────
+Valid Variable A        44│   55.0│         55.0│              55.0
+     ╶────────────────────┼───────┼─────────────┼──────────────────
+      Variable B        13│   16.3│         16.3│              71.3
+     ╶────────────────────┼───────┼─────────────┼──────────────────
+      Variable C        15│   18.8│         18.8│              90.0
+     ╶────────────────────┼───────┼─────────────┼──────────────────
+      Variable D         8│   10.0│         10.0│             100.0
+     ╶────────────────────┼───────┼─────────────┼──────────────────
+      Total             80│  100.0│        100.0│
+──────────────────────────┴───────┴─────────────┴──────────────────
diff --git a/rust/pspp/src/spv/testdata/legacy14.spv b/rust/pspp/src/spv/testdata/legacy14.spv
new file mode 100644 (file)
index 0000000..023c192
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy14.spv differ
diff --git a/rust/pspp/src/spv/testdata/legacy15.expected b/rust/pspp/src/spv/testdata/legacy15.expected
new file mode 100644 (file)
index 0000000..ed45243
--- /dev/null
@@ -0,0 +1,10 @@
+                                 Coefficients[a]
+╭────────────┬────────────────────────────┬─────────────────────────┬──────┬────╮
+│            │ Unstandardized Coefficients│Standardized Coefficients│      │    │
+│            ├────────────┬───────────────┼─────────────────────────┤      │    │
+│Model       │      B     │   Std. Error  │           Beta          │   t  │Sig.│
+├────────────┼────────────┼───────────────┼─────────────────────────┼──────┼────┤
+│1 (Constant)│      59.146│         18.854│                         │ 3.137│.016│
+│  Variable A│       -.664│           .585│                    -.395│-1.136│.293│
+╰────────────┴────────────┴───────────────┴─────────────────────────┴──────┴────╯
+a. Dependent Variable: A
diff --git a/rust/pspp/src/spv/testdata/legacy15.spv b/rust/pspp/src/spv/testdata/legacy15.spv
new file mode 100644 (file)
index 0000000..63e0e66
Binary files /dev/null and b/rust/pspp/src/spv/testdata/legacy15.spv differ