work
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 12 Oct 2025 00:52:54 +0000 (17:52 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 12 Oct 2025 00:52:54 +0000 (17:52 -0700)
rust/Cargo.lock
rust/doc/src/invoking/pspp-show-spv.md
rust/pspp/Cargo.toml
rust/pspp/src/output.rs

index ca955f116f0f7479b2e4e33c3f6ba2dfb0753706..07d54ed734b780bf99a661ea79ab45620224f7d1 100644 (file)
@@ -227,6 +227,12 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -1856,6 +1862,7 @@ dependencies = [
  "aes",
  "anyhow",
  "binrw",
+ "bit-vec",
  "cairo-rs",
  "chardetng",
  "chrono",
index e5854bb99bf56b87d1807498c77ce974c15c0dd4..386f2c32b7b31f427800e38b2a6689fe287bb701 100644 (file)
@@ -83,7 +83,9 @@ only objects that satisfy all of them are selected:
   The `--subtypes` option matches particular tables within a command,
   case insensitively.  Subtypes are not necessarily unique: two
   commands that produce similar output tables may use the same
-  subtype.  Subtypes are always in English and `dir` will print them.
+  subtype.  Only tables have subtypes, so specifying `--subtypes` will
+  exclude other kinds of objects.  Subtypes are always in English and
+  `dir` will print them.
 
   The `--labels` option matches the labels in table output (that is,
   the table titles).  Labels are affected by the output language,
index 1f60f971cfbf00cbd2b08a7ceac5d743e50cdcfc..47e4671e8cf54e13508acae6fd009d65b4a886d4 100644 (file)
@@ -57,6 +57,7 @@ serde_path_to_error = "0.1.20"
 html_parser = "0.7.0"
 paper-sizes = { path = "../paper-sizes", features = ["serde"] }
 enumset = "1.1.10"
+bit-vec = "0.8.0"
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index 563d5ef5fc70de507b06e1f2307594f257741d28..544f2353a3ec9cb77bc0537c970d76d78e92fe0a 100644 (file)
 use std::{
     borrow::Cow,
     collections::BTreeMap,
+    iter::once,
     mem::take,
     str::FromStr,
     sync::{Arc, OnceLock},
 };
 
 use anyhow::anyhow;
+use bit_vec::BitVec;
 use clap::{ArgAction, ArgMatches, Args, FromArgMatches, value_parser};
 use enum_map::EnumMap;
 use enumset::{EnumSet, EnumSetType};
@@ -87,6 +89,17 @@ impl Item {
         }
     }
 
+    /// Returns a new group item suitable as the root node of an output document.
+    ///
+    /// A root node is a group whose own properties are mostly disregarded.
+    /// Instead of having root nodes, it would make just as much sense to just
+    /// keep around arrays of nodes that would serve as the top level of an
+    /// output document, but we'd need more special cases instead of just using
+    /// the existing support for group items.
+    pub fn new_root() -> Self {
+        Self::new(Details::Group(Vec::new())).with_label(Some(String::from("Output")))
+    }
+
     pub fn label(&self) -> Cow<'static, str> {
         match &self.label {
             Some(label) => Cow::from(label.clone()),
@@ -98,6 +111,10 @@ impl Item {
         Self { show, ..self }
     }
 
+    pub fn with_label(self, label: Option<String>) -> Self {
+        Self { label, ..self }
+    }
+
     pub fn with_command_name(self, command_name: Option<String>) -> Self {
         Self {
             command_name,
@@ -141,6 +158,43 @@ impl Details {
         }
     }
 
+    pub fn as_mut_group(&mut self) -> Option<&mut Vec<Arc<Item>>> {
+        match self {
+            Self::Group(children) => Some(children),
+            _ => None,
+        }
+    }
+
+    pub fn children(&self) -> impl Iterator<Item = &Arc<Item>> {
+        match self {
+            Self::Group(children) => Some(children.iter()),
+            _ => None,
+        }
+        .into_iter()
+        .flatten()
+    }
+
+    pub fn as_message(&self) -> Option<&Diagnostic> {
+        match self {
+            Self::Message(diagnostic) => Some(diagnostic),
+            _ => None,
+        }
+    }
+
+    pub fn as_table(&self) -> Option<&PivotTable> {
+        match self {
+            Self::Table(table) => Some(table),
+            _ => None,
+        }
+    }
+
+    pub fn as_text(&self) -> Option<&Text> {
+        match self {
+            Self::Text(text) => Some(text),
+            _ => None,
+        }
+    }
+
     pub fn command_name(&self) -> Option<&String> {
         match self {
             Details::Chart
@@ -156,7 +210,7 @@ impl Details {
     pub fn label(&self) -> Cow<'static, str> {
         match self {
             Details::Chart => todo!(),
-            Details::Image => todo!(),
+            Details::Image => Cow::from("Image"),
             Details::Group(_) => Cow::from("Group"),
             Details::Message(diagnostic) => Cow::from(diagnostic.severity.as_title_str()),
             Details::PageBreak => Cow::from("Page Break"),
@@ -165,9 +219,25 @@ impl Details {
         }
     }
 
+    pub fn is_group(&self) -> bool {
+        matches!(self, Self::Group(_))
+    }
+
+    pub fn is_message(&self) -> bool {
+        matches!(self, Self::Message(_))
+    }
+
     pub fn is_page_break(&self) -> bool {
         matches!(self, Self::PageBreak)
     }
+
+    pub fn is_table(&self) -> bool {
+        matches!(self, Self::Table(_))
+    }
+
+    pub fn is_text(&self) -> bool {
+        matches!(self, Self::Text(_))
+    }
 }
 
 impl<A> FromIterator<A> for Details
@@ -372,6 +442,14 @@ impl SpvInfo {
             ..self
         }
     }
+
+    pub fn member_names(&self) -> Vec<&str> {
+        let mut member_names = vec![self.structure_member.as_str()];
+        if let Some(members) = &self.members {
+            member_names.extend(members.iter());
+        }
+        member_names
+    }
 }
 
 /// Identifies ZIP file members for one kind of output item in an SPV file.
@@ -396,6 +474,17 @@ pub enum SpvMembers {
     ),
 }
 
+impl SpvMembers {
+    pub fn iter(&self) -> impl Iterator<Item = &str> {
+        let (a, b) = match self {
+            SpvMembers::Light(a) => (a.as_str(), None),
+            SpvMembers::Legacy { xml: a, binary: b } => (a.as_str(), Some(b.as_str())),
+            SpvMembers::Image(a) => (a.as_str(), None),
+        };
+        once(a).chain(once(b).flatten())
+    }
+}
+
 /// Classifications for output items.  These only roughly correspond to the
 /// output item types; for example, "warnings" are a subset of text items.
 #[derive(Debug, EnumSetType)]
@@ -494,9 +583,9 @@ pub struct Selection {
     pub nth_commands: Vec<usize>,
 
     /// Include the objects with the given 1-based indexes within each of the
-    /// commands that are included.  Indexes are 1-based.  Negative indexes
-    /// count backward from the last object in a command.
-    pub instances: Vec<isize>,
+    /// commands that are included.  Indexes are 1-based.  Index 0 represents
+    /// the last instance in a command.
+    pub instances: Vec<usize>,
 
     /// Include only XML and binary member names that match.  Without any member
     /// names, include all objects.
@@ -506,23 +595,24 @@ pub struct Selection {
 impl Selection {
     pub fn parse_nth_commands(s: &str) -> Result<Vec<usize>, anyhow::Error> {
         s.split(',')
-            .map(|s| match s.parse::<usize>() {
-                Ok(0) => Err(anyhow!("--nth-commmands values must be positive")),
-                Ok(n) => Ok(n),
+            .map(|s| match s.trim().parse::<usize>() {
+                Ok(n) if n > 0 => Ok(n),
+                Ok(_) => Err(anyhow!("--nth-commmands values must be positive")),
                 Err(error) => Err(error.into()),
             })
             .collect()
     }
 
-    pub fn parse_instances(s: &str) -> Result<Vec<isize>, anyhow::Error> {
+    pub fn parse_instances(s: &str) -> Result<Vec<usize>, anyhow::Error> {
         s.split(',')
             .map(|s| {
+                let s = s.trim();
                 if s == "last" {
-                    Ok(-1)
+                    Ok(0)
                 } else {
-                    match s.parse::<isize>() {
-                        Ok(0) => Err(anyhow!("--instances values must be nonzero")),
-                        Ok(n) => Ok(n),
+                    match s.parse::<usize>() {
+                        Ok(n) if n > 0 => Ok(n),
+                        Ok(_) => Err(anyhow!("--instances values must be positive")),
                         Err(error) => Err(error.into()),
                     }
                 }
@@ -589,7 +679,16 @@ impl Default for StringMatch {
 }
 
 impl StringMatch {
-    fn matches(&self, s: &str) -> bool {
+    pub fn is_default(&self) -> bool {
+        if let Self::Exclude(strings) = self
+            && strings.is_empty()
+        {
+            true
+        } else {
+            false
+        }
+    }
+    pub fn matches(&self, s: &str) -> bool {
         fn inner(items: &[String], s: &str) -> bool {
             items.iter().any(|item| match item.strip_suffix('*') {
                 Some(prefix) => s.starts_with(prefix),
@@ -626,33 +725,172 @@ pub struct Criteria(pub Vec<Selection>);
 impl Criteria {
     /// Returns a new output item whose children are all the (direct and
     /// indirect) children of `item` that meet the criteria.
-    fn apply(&self, mut item: Item) -> Item {
-        fn take_children(item: &mut Item) -> Vec<Arc<Item>> {
-            match &mut item.details {
-                Details::Group(items) => take(items),
-                _ => Vec::new(),
-            }
+    fn apply(&self, item: Item) -> Item {
+        fn take_children(item: &Item) -> Vec<&Item> {
+            item.details.children().map(|item| &**item).collect()
         }
-        fn flatten_children(
-            children: Vec<Arc<Item>>,
+        fn flatten_children<'a>(
+            children: Vec<&'a Item>,
             depth: usize,
-            items: &mut Vec<Item>,
+            items: &mut Vec<&'a Item>,
             depths: &mut Vec<usize>,
         ) {
             for child in children {
-                flatten(Arc::unwrap_or_clone(child), depth, items, depths);
+                flatten(child, depth, items, depths);
             }
         }
-        fn flatten(mut item: Item, depth: usize, items: &mut Vec<Item>, depths: &mut Vec<usize>) {
-            let children = take_children(&mut item);
+        fn flatten<'a>(
+            item: &'a Item,
+            depth: usize,
+            items: &mut Vec<&'a Item>,
+            depths: &mut Vec<usize>,
+        ) {
+            let children = take_children(item);
             items.push(item);
             depths.push(depth);
             flatten_children(children, depth + 1, items, depths);
         }
 
+        fn select_matches(
+            items: &[&Item],
+            depths: &[usize],
+            selection: &Selection,
+            include: &mut BitVec,
+        ) {
+            let mut instance_within_command = 0;
+            let mut last_instance = None;
+            let mut command_item = None;
+            let mut command_command_item = None;
+            let mut nth_command = 0;
+            for (index, (item, depth)) in std::iter::zip(items, depths).enumerate() {
+                if *depth == 0 {
+                    command_item = Some(index);
+                    if let Some(last_instance) = last_instance.take() {
+                        include.set(last_instance, true);
+                    }
+                    instance_within_command = 0;
+                }
+                if !selection.classes.contains(item.class()) {
+                    continue;
+                }
+                if let Some(visible) = selection.visible
+                    && !item.details.is_group()
+                    && visible != item.show
+                {
+                    continue;
+                }
+                if let Some(error) = selection.error
+                    && error
+                        != item
+                            .spv_info
+                            .as_ref()
+                            .map_or(false, |spv_info| spv_info.error)
+                {
+                    continue;
+                }
+                if !selection
+                    .commands
+                    .matches(item.command_name.as_ref().map_or("", |name| name.as_str()))
+                {
+                    continue;
+                }
+                if !selection.nth_commands.is_empty() {
+                    if command_item != command_command_item {
+                        command_command_item = command_command_item;
+                        nth_command += 1;
+                    }
+                    if !selection.nth_commands.contains(&nth_command) {
+                        continue;
+                    }
+                }
+                if !selection.subtypes.is_default() {
+                    let Some(table) = item.details.as_table() else {
+                        continue;
+                    };
+                    let subtype = table.subtype().display(table).to_string();
+                    if !selection.subtypes.matches(&subtype) {
+                        continue;
+                    }
+                }
+                if !selection.labels.matches(&item.label()) {
+                    continue;
+                }
+                if !selection.members.is_empty() {
+                    let Some(spv_info) = item.spv_info.as_ref() else {
+                        continue;
+                    };
+                    let member_names = spv_info.member_names();
+                    if !selection
+                        .members
+                        .iter()
+                        .any(|name| member_names.contains(&name.as_str()))
+                    {
+                        continue;
+                    }
+                }
+                if !selection.instances.is_empty() {
+                    if *depth == 0 {
+                        continue;
+                    }
+                    instance_within_command += 1;
+                    if !selection.instances.contains(&instance_within_command) {
+                        if selection.instances.contains(&0) {
+                            last_instance = Some(index);
+                        }
+                        continue;
+                    }
+                }
+
+                include.set(index, true);
+            }
+        }
+        fn unflatten_items(
+            items: Vec<Arc<Item>>,
+            mut index: usize,
+            include: &BitVec,
+            out: &mut Vec<Arc<Item>>,
+        ) {
+            for item in items {
+                unflatten_item(Arc::unwrap_or_clone(item), index, include, out);
+                index += 1;
+            }
+        }
+        fn unflatten_item(
+            mut item: Item,
+            mut index: usize,
+            include: &BitVec,
+            out: &mut Vec<Arc<Item>>,
+        ) {
+            let include_item = include[index];
+            index += 1;
+            match item.details {
+                Details::Group(ref mut children) => {
+                    let in_children = take(children);
+                    if !include_item {
+                        unflatten_items(in_children, index, include, out);
+                        return;
+                    }
+                    unflatten_items(in_children, index, include, children);
+                }
+                _ => {}
+            }
+            if include_item {
+                out.push(Arc::new(item));
+            }
+            todo!()
+        }
+
         let mut items = Vec::new();
         let mut depths = Vec::new();
-        flatten_children(take_children(&mut item), 0, &mut items, &mut depths);
+        flatten_children(take_children(&item), 0, &mut items, &mut depths);
+
+        let mut include = BitVec::from_elem(items.len(), false);
+        for selection in &self.0 {
+            select_matches(&items, &depths, selection, &mut include);
+        }
+
+        let mut output = Item::new_root();
+        unflatten_item(item, 0, &include, output.details.as_mut_group().unwrap());
         todo!()
     }
 }
@@ -673,7 +911,7 @@ impl FromArgMatches for Criteria {
             Subtypes(StringMatch),
             Labels(StringMatch),
             NthCommands(Vec<usize>),
-            Instances(Vec<isize>),
+            Instances(Vec<usize>),
             ShowHidden(bool),
             Errors(bool),
         }
@@ -777,10 +1015,17 @@ struct SelectionArgs {
     #[arg(long, required = false, value_parser = StringMatch::from_str, action = ArgAction::Append)]
     labels: StringMatch,
 
-    /// Include only the Nth (1-based) instance of the selected commands.
+    /// Include only objects from the Nth (1-based) command that matches
+    /// `--command`.
     #[arg(long, required = false, value_parser = Selection::parse_nth_commands, action = ArgAction::Append)]
     nth_commands: Vec<usize>,
 
+    /// Include only the given instances of an object that matches the other
+    /// criteria within a single command.  Each instance may be a number (1 for
+    /// the first, and so on), or `last` for the last instance.
+    #[arg(long, required = false, value_parser = Selection::parse_instances, action = ArgAction::Append)]
+    instances: Vec<usize>,
+
     /// Include hidden objects in the output (by default, they are excluded)
     #[arg(long, required = false, action = ArgAction::Append)]
     show_hidden: bool,
@@ -837,11 +1082,8 @@ mod tests {
     #[test]
     fn parse_instances() {
         assert_eq!(Selection::parse_instances("1").unwrap(), vec![1]);
-        assert_eq!(
-            Selection::parse_instances("2,3,-2,-3").unwrap(),
-            vec![2, 3, -2, -3]
-        );
-        assert_eq!(Selection::parse_instances("last,1").unwrap(), vec![-1, 1]);
+        assert_eq!(Selection::parse_instances("2,3").unwrap(), vec![2, 3]);
+        assert_eq!(Selection::parse_instances("last,1").unwrap(), vec![0, 1]);
         assert!(Selection::parse_instances("0").is_err());
     }