From ec171cb5758ed3dcf179ccbb2562d5ae962982e0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 11 Oct 2025 17:52:54 -0700 Subject: [PATCH] work --- rust/Cargo.lock | 7 + rust/doc/src/invoking/pspp-show-spv.md | 4 +- rust/pspp/Cargo.toml | 1 + rust/pspp/src/output.rs | 308 ++++++++++++++++++++++--- 4 files changed, 286 insertions(+), 34 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index ca955f116f..07d54ed734 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -227,6 +227,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -1856,6 +1862,7 @@ dependencies = [ "aes", "anyhow", "binrw", + "bit-vec", "cairo-rs", "chardetng", "chrono", diff --git a/rust/doc/src/invoking/pspp-show-spv.md b/rust/doc/src/invoking/pspp-show-spv.md index e5854bb99b..386f2c32b7 100644 --- a/rust/doc/src/invoking/pspp-show-spv.md +++ b/rust/doc/src/invoking/pspp-show-spv.md @@ -83,7 +83,9 @@ only objects that satisfy all of them are selected: The `--subtypes` option matches particular tables within a command, case insensitively. Subtypes are not necessarily unique: two commands that produce similar output tables may use the same - subtype. Subtypes are always in English and `dir` will print them. + subtype. Only tables have subtypes, so specifying `--subtypes` will + exclude other kinds of objects. Subtypes are always in English and + `dir` will print them. The `--labels` option matches the labels in table output (that is, the table titles). Labels are affected by the output language, diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 1f60f971cf..47e4671e8c 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -57,6 +57,7 @@ serde_path_to_error = "0.1.20" html_parser = "0.7.0" paper-sizes = { path = "../paper-sizes", features = ["serde"] } enumset = "1.1.10" +bit-vec = "0.8.0" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/output.rs b/rust/pspp/src/output.rs index 563d5ef5fc..544f2353a3 100644 --- a/rust/pspp/src/output.rs +++ b/rust/pspp/src/output.rs @@ -18,12 +18,14 @@ use std::{ borrow::Cow, collections::BTreeMap, + iter::once, mem::take, str::FromStr, sync::{Arc, OnceLock}, }; use anyhow::anyhow; +use bit_vec::BitVec; use clap::{ArgAction, ArgMatches, Args, FromArgMatches, value_parser}; use enum_map::EnumMap; use enumset::{EnumSet, EnumSetType}; @@ -87,6 +89,17 @@ impl Item { } } + /// Returns a new group item suitable as the root node of an output document. + /// + /// A root node is a group whose own properties are mostly disregarded. + /// Instead of having root nodes, it would make just as much sense to just + /// keep around arrays of nodes that would serve as the top level of an + /// output document, but we'd need more special cases instead of just using + /// the existing support for group items. + pub fn new_root() -> Self { + Self::new(Details::Group(Vec::new())).with_label(Some(String::from("Output"))) + } + pub fn label(&self) -> Cow<'static, str> { match &self.label { Some(label) => Cow::from(label.clone()), @@ -98,6 +111,10 @@ impl Item { Self { show, ..self } } + pub fn with_label(self, label: Option) -> Self { + Self { label, ..self } + } + pub fn with_command_name(self, command_name: Option) -> Self { Self { command_name, @@ -141,6 +158,43 @@ impl Details { } } + pub fn as_mut_group(&mut self) -> Option<&mut Vec>> { + match self { + Self::Group(children) => Some(children), + _ => None, + } + } + + pub fn children(&self) -> impl Iterator> { + match self { + Self::Group(children) => Some(children.iter()), + _ => None, + } + .into_iter() + .flatten() + } + + pub fn as_message(&self) -> Option<&Diagnostic> { + match self { + Self::Message(diagnostic) => Some(diagnostic), + _ => None, + } + } + + pub fn as_table(&self) -> Option<&PivotTable> { + match self { + Self::Table(table) => Some(table), + _ => None, + } + } + + pub fn as_text(&self) -> Option<&Text> { + match self { + Self::Text(text) => Some(text), + _ => None, + } + } + pub fn command_name(&self) -> Option<&String> { match self { Details::Chart @@ -156,7 +210,7 @@ impl Details { pub fn label(&self) -> Cow<'static, str> { match self { Details::Chart => todo!(), - Details::Image => todo!(), + Details::Image => Cow::from("Image"), Details::Group(_) => Cow::from("Group"), Details::Message(diagnostic) => Cow::from(diagnostic.severity.as_title_str()), Details::PageBreak => Cow::from("Page Break"), @@ -165,9 +219,25 @@ impl Details { } } + pub fn is_group(&self) -> bool { + matches!(self, Self::Group(_)) + } + + pub fn is_message(&self) -> bool { + matches!(self, Self::Message(_)) + } + pub fn is_page_break(&self) -> bool { matches!(self, Self::PageBreak) } + + pub fn is_table(&self) -> bool { + matches!(self, Self::Table(_)) + } + + pub fn is_text(&self) -> bool { + matches!(self, Self::Text(_)) + } } impl FromIterator for Details @@ -372,6 +442,14 @@ impl SpvInfo { ..self } } + + pub fn member_names(&self) -> Vec<&str> { + let mut member_names = vec![self.structure_member.as_str()]; + if let Some(members) = &self.members { + member_names.extend(members.iter()); + } + member_names + } } /// Identifies ZIP file members for one kind of output item in an SPV file. @@ -396,6 +474,17 @@ pub enum SpvMembers { ), } +impl SpvMembers { + pub fn iter(&self) -> impl Iterator { + let (a, b) = match self { + SpvMembers::Light(a) => (a.as_str(), None), + SpvMembers::Legacy { xml: a, binary: b } => (a.as_str(), Some(b.as_str())), + SpvMembers::Image(a) => (a.as_str(), None), + }; + once(a).chain(once(b).flatten()) + } +} + /// Classifications for output items. These only roughly correspond to the /// output item types; for example, "warnings" are a subset of text items. #[derive(Debug, EnumSetType)] @@ -494,9 +583,9 @@ pub struct Selection { pub nth_commands: Vec, /// Include the objects with the given 1-based indexes within each of the - /// commands that are included. Indexes are 1-based. Negative indexes - /// count backward from the last object in a command. - pub instances: Vec, + /// commands that are included. Indexes are 1-based. Index 0 represents + /// the last instance in a command. + pub instances: Vec, /// Include only XML and binary member names that match. Without any member /// names, include all objects. @@ -506,23 +595,24 @@ pub struct Selection { impl Selection { pub fn parse_nth_commands(s: &str) -> Result, anyhow::Error> { s.split(',') - .map(|s| match s.parse::() { - Ok(0) => Err(anyhow!("--nth-commmands values must be positive")), - Ok(n) => Ok(n), + .map(|s| match s.trim().parse::() { + Ok(n) if n > 0 => Ok(n), + Ok(_) => Err(anyhow!("--nth-commmands values must be positive")), Err(error) => Err(error.into()), }) .collect() } - pub fn parse_instances(s: &str) -> Result, anyhow::Error> { + pub fn parse_instances(s: &str) -> Result, anyhow::Error> { s.split(',') .map(|s| { + let s = s.trim(); if s == "last" { - Ok(-1) + Ok(0) } else { - match s.parse::() { - Ok(0) => Err(anyhow!("--instances values must be nonzero")), - Ok(n) => Ok(n), + match s.parse::() { + Ok(n) if n > 0 => Ok(n), + Ok(_) => Err(anyhow!("--instances values must be positive")), Err(error) => Err(error.into()), } } @@ -589,7 +679,16 @@ impl Default for StringMatch { } impl StringMatch { - fn matches(&self, s: &str) -> bool { + pub fn is_default(&self) -> bool { + if let Self::Exclude(strings) = self + && strings.is_empty() + { + true + } else { + false + } + } + pub fn matches(&self, s: &str) -> bool { fn inner(items: &[String], s: &str) -> bool { items.iter().any(|item| match item.strip_suffix('*') { Some(prefix) => s.starts_with(prefix), @@ -626,33 +725,172 @@ pub struct Criteria(pub Vec); impl Criteria { /// Returns a new output item whose children are all the (direct and /// indirect) children of `item` that meet the criteria. - fn apply(&self, mut item: Item) -> Item { - fn take_children(item: &mut Item) -> Vec> { - match &mut item.details { - Details::Group(items) => take(items), - _ => Vec::new(), - } + fn apply(&self, item: Item) -> Item { + fn take_children(item: &Item) -> Vec<&Item> { + item.details.children().map(|item| &**item).collect() } - fn flatten_children( - children: Vec>, + fn flatten_children<'a>( + children: Vec<&'a Item>, depth: usize, - items: &mut Vec, + items: &mut Vec<&'a Item>, depths: &mut Vec, ) { for child in children { - flatten(Arc::unwrap_or_clone(child), depth, items, depths); + flatten(child, depth, items, depths); } } - fn flatten(mut item: Item, depth: usize, items: &mut Vec, depths: &mut Vec) { - let children = take_children(&mut item); + fn flatten<'a>( + item: &'a Item, + depth: usize, + items: &mut Vec<&'a Item>, + depths: &mut Vec, + ) { + let children = take_children(item); items.push(item); depths.push(depth); flatten_children(children, depth + 1, items, depths); } + fn select_matches( + items: &[&Item], + depths: &[usize], + selection: &Selection, + include: &mut BitVec, + ) { + let mut instance_within_command = 0; + let mut last_instance = None; + let mut command_item = None; + let mut command_command_item = None; + let mut nth_command = 0; + for (index, (item, depth)) in std::iter::zip(items, depths).enumerate() { + if *depth == 0 { + command_item = Some(index); + if let Some(last_instance) = last_instance.take() { + include.set(last_instance, true); + } + instance_within_command = 0; + } + if !selection.classes.contains(item.class()) { + continue; + } + if let Some(visible) = selection.visible + && !item.details.is_group() + && visible != item.show + { + continue; + } + if let Some(error) = selection.error + && error + != item + .spv_info + .as_ref() + .map_or(false, |spv_info| spv_info.error) + { + continue; + } + if !selection + .commands + .matches(item.command_name.as_ref().map_or("", |name| name.as_str())) + { + continue; + } + if !selection.nth_commands.is_empty() { + if command_item != command_command_item { + command_command_item = command_command_item; + nth_command += 1; + } + if !selection.nth_commands.contains(&nth_command) { + continue; + } + } + if !selection.subtypes.is_default() { + let Some(table) = item.details.as_table() else { + continue; + }; + let subtype = table.subtype().display(table).to_string(); + if !selection.subtypes.matches(&subtype) { + continue; + } + } + if !selection.labels.matches(&item.label()) { + continue; + } + if !selection.members.is_empty() { + let Some(spv_info) = item.spv_info.as_ref() else { + continue; + }; + let member_names = spv_info.member_names(); + if !selection + .members + .iter() + .any(|name| member_names.contains(&name.as_str())) + { + continue; + } + } + if !selection.instances.is_empty() { + if *depth == 0 { + continue; + } + instance_within_command += 1; + if !selection.instances.contains(&instance_within_command) { + if selection.instances.contains(&0) { + last_instance = Some(index); + } + continue; + } + } + + include.set(index, true); + } + } + fn unflatten_items( + items: Vec>, + mut index: usize, + include: &BitVec, + out: &mut Vec>, + ) { + for item in items { + unflatten_item(Arc::unwrap_or_clone(item), index, include, out); + index += 1; + } + } + fn unflatten_item( + mut item: Item, + mut index: usize, + include: &BitVec, + out: &mut Vec>, + ) { + let include_item = include[index]; + index += 1; + match item.details { + Details::Group(ref mut children) => { + let in_children = take(children); + if !include_item { + unflatten_items(in_children, index, include, out); + return; + } + unflatten_items(in_children, index, include, children); + } + _ => {} + } + if include_item { + out.push(Arc::new(item)); + } + todo!() + } + let mut items = Vec::new(); let mut depths = Vec::new(); - flatten_children(take_children(&mut item), 0, &mut items, &mut depths); + flatten_children(take_children(&item), 0, &mut items, &mut depths); + + let mut include = BitVec::from_elem(items.len(), false); + for selection in &self.0 { + select_matches(&items, &depths, selection, &mut include); + } + + let mut output = Item::new_root(); + unflatten_item(item, 0, &include, output.details.as_mut_group().unwrap()); todo!() } } @@ -673,7 +911,7 @@ impl FromArgMatches for Criteria { Subtypes(StringMatch), Labels(StringMatch), NthCommands(Vec), - Instances(Vec), + Instances(Vec), ShowHidden(bool), Errors(bool), } @@ -777,10 +1015,17 @@ struct SelectionArgs { #[arg(long, required = false, value_parser = StringMatch::from_str, action = ArgAction::Append)] labels: StringMatch, - /// Include only the Nth (1-based) instance of the selected commands. + /// Include only objects from the Nth (1-based) command that matches + /// `--command`. #[arg(long, required = false, value_parser = Selection::parse_nth_commands, action = ArgAction::Append)] nth_commands: Vec, + /// Include only the given instances of an object that matches the other + /// criteria within a single command. Each instance may be a number (1 for + /// the first, and so on), or `last` for the last instance. + #[arg(long, required = false, value_parser = Selection::parse_instances, action = ArgAction::Append)] + instances: Vec, + /// Include hidden objects in the output (by default, they are excluded) #[arg(long, required = false, action = ArgAction::Append)] show_hidden: bool, @@ -837,11 +1082,8 @@ mod tests { #[test] fn parse_instances() { assert_eq!(Selection::parse_instances("1").unwrap(), vec![1]); - assert_eq!( - Selection::parse_instances("2,3,-2,-3").unwrap(), - vec![2, 3, -2, -3] - ); - assert_eq!(Selection::parse_instances("last,1").unwrap(), vec![-1, 1]); + assert_eq!(Selection::parse_instances("2,3").unwrap(), vec![2, 3]); + assert_eq!(Selection::parse_instances("last,1").unwrap(), vec![0, 1]); assert!(Selection::parse_instances("0").is_err()); } -- 2.30.2