From dff14236b176dd4a5b2e5145255bb453783c3e8a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 10 Oct 2025 15:34:25 -0700 Subject: [PATCH] can parse selections --- rust/pspp/src/output.rs | 301 +++++++++++++++++++++++++++++++++++--- rust/pspp/src/show_spv.rs | 77 +--------- 2 files changed, 286 insertions(+), 92 deletions(-) diff --git a/rust/pspp/src/output.rs b/rust/pspp/src/output.rs index 6e60673776..1ea6c5c561 100644 --- a/rust/pspp/src/output.rs +++ b/rust/pspp/src/output.rs @@ -17,12 +17,16 @@ #![allow(dead_code)] use std::{ borrow::Cow, + collections::BTreeMap, str::FromStr, sync::{Arc, OnceLock}, }; +use anyhow::anyhow; +use clap::{ArgAction, ArgMatches, Args, FromArgMatches, value_parser}; use enum_map::EnumMap; use enumset::{EnumSet, EnumSetType}; +use itertools::Itertools; use pivot::PivotTable; use serde::Serialize; @@ -460,43 +464,109 @@ impl Item { } } -pub struct Match { +#[derive(Clone, Debug)] +pub struct Selection { /// - `None`: Include all objects. - /// - `Some(false)`: Include only visible objects. - /// - `Some(true)`: Include only hidden objects. - hidden: Option, + /// - `Some(true)`: Include only visible objects. + /// - `Some(false)`: Include only hidden objects. + pub visible: Option, /// - `None`: Include all objects. /// - `Some(false)`: Include only objects with no error on loading. /// - `Some(true)`: Include only objects with an error on loading. - error: Option, + pub error: Option, /// Classes to include. - classes: EnumSet, + pub classes: EnumSet, /// Command names to match. - commands: StringMatch, + pub commands: StringMatch, /// Subtypes to match. - subtypes: StringMatch, + pub subtypes: StringMatch, /// Labels to match. - labels: StringMatch, + pub labels: StringMatch, - /// Include objects under commands with indexes listed in COMMANDS. Indexes - /// are 1-based. Everything is included if N_COMMANDS is 0. - nth_commands: Vec, + /// Include objects under commands with the given 1-based indexes. Without + /// any indexes, include all objects. + pub nth_commands: Vec, + + /// Include the objects with the given 1-based indexes within each of the + /// commands that are included. Indexes are 1-based. Negative indexes + /// count backward from the last object in a command. + pub instances: Vec, + + /// Include only XML and binary member names that match. Without any member + /// names, include all objects. + pub members: Vec, +} + +impl Selection { + pub fn parse_nth_commands(s: &str) -> Result, anyhow::Error> { + s.split(',') + .map(|s| match s.parse::() { + Ok(0) => Err(anyhow!("--nth-commmands values must be positive")), + Ok(n) => Ok(n), + Err(error) => Err(error.into()), + }) + .collect() + } - /// Include XML and binary member names that match (except that everything - /// is included by default if empty). - members: Vec, + pub fn parse_instances(s: &str) -> Result, anyhow::Error> { + s.split(',') + .map(|s| match s.parse::() { + Ok(0) => Err(anyhow!("--instances values must be nonzero")), + Ok(n) => Ok(n), + Err(error) => Err(error.into()), + }) + .collect() + } - /// Include the objects with indexes listed in INSTANCES within each of the - /// commands that are included. Indexes are 1-based. Index -1 means the - /// last object within a command. - instances: Vec, + pub fn parse_classes(s: &str) -> Result, anyhow::Error> { + if s.is_empty() { + return Ok(EnumSet::all()); + } + let (s, invert) = match s.strip_prefix('^') { + Some(rest) => (rest, true), + None => (s, false), + }; + let mut classes = EnumSet::empty(); + for name in s.split(',') { + if name == "all" { + classes = EnumSet::all(); + } else { + classes.insert( + name.trim() + .parse() + .map_err(|_| anyhow!("unknown output class `{name}`"))?, + ); + } + } + if invert { + classes = !classes; + } + Ok(classes) + } } +impl Default for Selection { + fn default() -> Self { + Self { + visible: Some(true), + error: None, + classes: EnumSet::all(), + commands: Default::default(), + subtypes: Default::default(), + labels: Default::default(), + nth_commands: Default::default(), + members: Default::default(), + instances: Default::default(), + } + } +} + +#[derive(Clone, Debug)] pub enum StringMatch { Include(Vec), Exclude(Vec), @@ -525,9 +595,200 @@ impl StringMatch { } } +/// Can't fail. +#[derive(Debug, thiserror::Error)] +pub enum Infallible {} + +impl FromStr for StringMatch { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + if let Some(rest) = s.strip_prefix("^") { + Ok(Self::Exclude(rest.split(",").map_into().collect())) + } else { + Ok(Self::Include(s.split(",").map_into().collect())) + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct Selections(pub Vec); + +impl FromArgMatches for Selections { + fn from_arg_matches(matches: &ArgMatches) -> Result { + let mut this = Self::default(); + this.update_from_arg_matches(matches)?; + Ok(this) + } + + fn update_from_arg_matches(&mut self, matches: &ArgMatches) -> Result<(), clap::Error> { + #[derive(Debug)] + enum Value { + Or, + Classes(EnumSet), + Commands(StringMatch), + Subtypes(StringMatch), + Labels(StringMatch), + NthCommands(Vec), + Instances(Vec), + ShowHidden(bool), + Errors(bool), + } + + fn extract( + matches: &ArgMatches, + id: &clap::Id, + output: &mut BTreeMap, + f: F, + ) where + F: Fn(T) -> Value, + { + for (value, index) in matches + .try_get_many::(id.as_str()) + .unwrap() + .unwrap() + .zip(matches.indices_of(id.as_str()).unwrap()) + { + output.insert(index, f(value.clone())); + } + } + + println!("{:#?}", matches.ids()); + let mut values = BTreeMap::new(); + for id in matches.ids() { + if matches.try_get_many::(id.as_str()).is_ok() { + // ignore groups + continue; + } + let value_source = matches + .value_source(id.as_str()) + .expect("id came from matches"); + if value_source != clap::parser::ValueSource::CommandLine { + // Any other source just gets tacked on at the end (like default values) + continue; + } + match id.as_str() { + "or" => extract(matches, id, &mut values, |_: bool| Value::Or), + "select" => extract(matches, id, &mut values, Value::Classes), + "commands" => extract(matches, id, &mut values, Value::Commands), + "subtypes" => extract(matches, id, &mut values, Value::Subtypes), + "labels" => extract(matches, id, &mut values, Value::Labels), + "nth-commands" => extract(matches, id, &mut values, Value::NthCommands), + "instances" => extract(matches, id, &mut values, Value::Instances), + "show-hidden" => extract(matches, id, &mut values, Value::ShowHidden), + "errors" => extract(matches, id, &mut values, Value::Errors), + _ => unreachable!(), + } + } + + if values.is_empty() { + return Ok(()); + } + + let mut selection = Selection::default(); + for value in values.into_values() { + match value { + Value::Or => self.0.push(std::mem::take(&mut selection)), + Value::Classes(classes) => selection.classes = classes, + Value::Commands(commands) => selection.commands = commands, + Value::Subtypes(subtypes) => selection.subtypes = subtypes, + Value::Labels(labels) => selection.labels = labels, + Value::NthCommands(nth_commands) => selection.nth_commands = nth_commands, + Value::Instances(instances) => selection.instances = instances, + Value::ShowHidden(show) => selection.visible = if show { None } else { Some(true) }, + Value::Errors(only) => selection.error = if only { Some(true) } else { None }, + } + } + self.0.push(selection); + Ok(()) + } +} + +impl Args for Selections { + fn augment_args(cmd: clap::Command) -> clap::Command { + SelectionArgs::augment_args(cmd.next_help_heading("Input selection options")) + } + + fn augment_args_for_update(cmd: clap::Command) -> clap::Command { + Self::augment_args(cmd) + } +} + +/// Show information about SPSS viewer files (SPV files). +#[derive(Args, Clone, Debug)] +struct SelectionArgs { + /// Classes of objects to include or, with leading `^`, to exclude. The + /// supported classes are: charts, headings, logs, models, tables, texts, + /// trees, warnings, outlineheaders, pagetitle, notes, unknown, other. + #[arg(long, required = false, value_parser = Selection::parse_classes, action = ArgAction::Append)] + select: EnumSet, + + /// Identifiers of commands to include or, with leading `^`, to exclude. + #[arg(long, required = false, value_parser = StringMatch::from_str, action = ArgAction::Append)] + commands: StringMatch, + + /// Table subtypes to include or, with leading `^`, to exclude. + #[arg(long, required = false, value_parser = StringMatch::from_str, action = ArgAction::Append)] + subtypes: StringMatch, + + /// Labels (table titles) to include or, with leading `^`, to exclude. + #[arg(long, required = false, value_parser = StringMatch::from_str, action = ArgAction::Append)] + labels: StringMatch, + + /// Include only the Nth (1-based) instance of the selected commands. + #[arg(long, required = false, value_parser = Selection::parse_nth_commands, action = ArgAction::Append)] + nth_commands: Vec, + + /// Include hidden objects in the output (by default, they are excluded) + #[arg(long, required = false, action = ArgAction::Append)] + show_hidden: bool, + + /// Include only objects that cause an error when read (by default, objects + /// with and without errors are included). + #[arg(long, required = false, action = ArgAction::Append)] + errors: bool, + + /// Include only XML and binary member names that match. Without any member + /// names, include all objects. + pub members: Vec, + + /// Separate two groups of selection options. + #[arg(long, action = ArgAction::Append, long = "or", num_args = 0, value_parser = value_parser!(bool), default_missing_value = "true", default_value = "false")] + _or: bool, +} + #[cfg(test)] mod tests { - use crate::output::StringMatch; + use enumset::EnumSet; + + use crate::output::{Class, Selection, StringMatch}; + + #[test] + fn parse_classes() { + assert_eq!(Selection::parse_classes("").unwrap(), EnumSet::all()); + assert_eq!( + Selection::parse_classes("tables").unwrap(), + EnumSet::only(Class::Tables) + ); + assert_eq!( + Selection::parse_classes("tables,pagetitle").unwrap(), + EnumSet::only(Class::Tables) | EnumSet::only(Class::PageTitle) + ); + assert_eq!( + Selection::parse_classes("^tables,pagetitle").unwrap(), + !(EnumSet::only(Class::Tables) | EnumSet::only(Class::PageTitle)) + ); + } + + #[test] + fn parse_nth_commands() { + assert_eq!(Selection::parse_nth_commands("1").unwrap(), vec![1]); + assert_eq!( + Selection::parse_nth_commands("1,2,3").unwrap(), + vec![1, 2, 3] + ); + assert!(Selection::parse_nth_commands("0").is_err()); + } #[test] fn string_matches() { diff --git a/rust/pspp/src/show_spv.rs b/rust/pspp/src/show_spv.rs index 179ef79e97..35ef9786ae 100644 --- a/rust/pspp/src/show_spv.rs +++ b/rust/pspp/src/show_spv.rs @@ -14,10 +14,9 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . -use anyhow::{Result, anyhow}; +use anyhow::Result; use clap::{Args, ValueEnum}; -use enumset::EnumSet; -use pspp::output::Class; +use pspp::output::Selections; use std::{fmt::Display, path::PathBuf}; /// Show information about SPSS viewer files (SPV files). @@ -34,49 +33,8 @@ pub struct ShowSpv { #[arg(required = true)] input: PathBuf, - /// Classes of objects to include or, with leading `^`, to exclude. The - /// supported classes are: charts, headings, logs, models, tables, texts, - /// trees, warnings, outlineheaders, pagetitle, notes, unknown, other. - #[arg(long, required = false, value_parser = parse_select, help_heading = "Input selection options")] - select: EnumSet, - - /// Identifiers of commands to include or, with leading `^`, to exclude. - #[arg(long, required = false)] - commands: String, - - /// Table subtypes to include or, with leading `^`, to exclude. - #[arg(long, required = false)] - subtypes: String, - - /// Labels (table titles) to include or, with leading `^`, to exclude. - #[arg(long, required = false)] - labels: String, -} - -fn parse_select(s: &str) -> Result, anyhow::Error> { - if s.is_empty() { - return Ok(EnumSet::all()); - } - let (s, invert) = match s.strip_prefix('^') { - Some(rest) => (rest, true), - None => (s, false), - }; - let mut classes = EnumSet::empty(); - for name in s.split(',') { - if name == "all" { - classes = EnumSet::all(); - } else { - classes.insert( - name.trim() - .parse() - .map_err(|_| anyhow!("unknown output class `{name}`"))?, - ); - } - } - if invert { - classes = !classes; - } - Ok(classes) + #[command(flatten)] + selection: Selections, } /// What to show in a system file. @@ -111,32 +69,7 @@ impl Display for Mode { impl ShowSpv { pub fn run(self) -> Result<()> { + println!("{:#?}", &self); todo!() } } - -#[cfg(test)] -mod tests { - use enumset::EnumSet; - - use crate::show_spv::parse_select; - - #[test] - fn test_parse_select() { - assert_eq!(parse_select("").unwrap(), EnumSet::all()); - assert_eq!( - parse_select("tables").unwrap(), - EnumSet::only(pspp::output::Class::Tables) - ); - assert_eq!( - parse_select("tables,pagetitle").unwrap(), - EnumSet::only(pspp::output::Class::Tables) - | EnumSet::only(pspp::output::Class::PageTitle) - ); - assert_eq!( - parse_select("^tables,pagetitle").unwrap(), - !(EnumSet::only(pspp::output::Class::Tables) - | EnumSet::only(pspp::output::Class::PageTitle)) - ); - } -} -- 2.30.2