work

author Ben Pfaff <blp@cs.stanford.edu>

Sat, 26 Apr 2025 02:42:28 +0000 (19:42 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sat, 26 Apr 2025 02:42:28 +0000 (19:42 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sat, 26 Apr 2025 02:42:28 +0000 (19:42 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sat, 26 Apr 2025 02:42:28 +0000 (19:42 -0700)
diff --git a/rust/pspp/src/cooked.rs b/rust/pspp/src/cooked.rs

index c82c41fbe333c8cce1fe1854c4a7ce4341e0d4c2..8de34d5ac63179ccf26e47884e31b6c7deb19b9f 100644 (file)
--- a/rust/pspp/src/cooked.rs
+++ b/rust/pspp/src/cooked.rs
@@ -811,8 +811,7 @@ impl MultipleResponseSet {
  
          Ok(MultipleResponseSet {
              name: mr_set_name,
-            min_width,
-            max_width,
+            width: min_width..=max_width,
              label: input.label.to_string(),
              mr_type,
              variables,
diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs

index 91e41acbcfae59892ec552e6769967b34ff6ecb6..d0941be0612066e9c75a8c70b0235fdec452c84e 100644 (file)
--- a/rust/pspp/src/dictionary.rs
+++ b/rust/pspp/src/dictionary.rs
@@ -1,10 +1,12 @@
+//! Dictionaries and variables.
+
  use core::str;
  use std::{
      cmp::Ordering,
      collections::{HashMap, HashSet},
      fmt::{Debug, Formatter, Result as FmtResult},
      hash::Hash,
-    ops::{Bound, RangeBounds},
+    ops::{Bound, RangeBounds, RangeInclusive},
  };
  
  use encoding_rs::Encoding;
@@ -19,8 +21,10 @@ use crate::{
      raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType},
  };
  
+/// An index within [Dictionary::variables].
  pub type DictIndex = usize;
  
+/// [VarType], plus a width for [VarType::String].
  #[derive(Copy, Clone, Debug, PartialEq, Eq)]
  pub enum VarWidth {
      Numeric,
@@ -265,19 +269,53 @@ impl From<&[u8]> for Value {
      }
  }
  
+/// A collection of variables, plus additional metadata.
  #[derive(Clone, Debug)]
  pub struct Dictionary {
+    /// The variables.
      pub variables: IndexSet<ByIdentifier<Variable>>,
+
+    /// Indexes into `variables` of the `SPLIT FILE` variables.
      pub split_file: Vec<DictIndex>,
+
+    /// Index of the weight variable, if any.
+    ///
+    /// The weight variable must be numeric.
      pub weight: Option<DictIndex>,
+
+    /// Index of the filter variable, if any.
+    ///
+    /// The filter variable must be numeric.  If there is a filter variable,
+    /// then data analysis excludes cases whose filter value is zero or system-
+    /// or user-missing.
      pub filter: Option<DictIndex>,
+
+    /// An optional limit on the number of cases read by procedures.
      pub case_limit: Option<u64>,
+
+    /// Optional label (name) for the dictionary.
      pub file_label: Option<String>,
+
+    /// Optional additional documentation associated with the dictionary.
      pub documents: Vec<String>,
+
+    /// Named collections of variables within the dictionary.
      pub vectors: HashSet<ByIdentifier<Vector>>,
+
+    /// Attributes for the dictionary itself.
+    ///
+    /// Individual variables can have their own attributes.
      pub attributes: Attributes,
+
+    /// Multiple response sets.
      pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
+
+    /// Variable sets.
+    ///
+    /// Only the GUI makes use of variable sets.
      pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
+
+    /// Character encoding for the dictionary and the data.
      pub encoding: &'static Encoding,
  }
  
@@ -285,6 +323,7 @@ pub struct Dictionary {
  pub struct DuplicateVariableName;
  
  impl Dictionary {
+    /// Creates a new, empty dictionary with the specified `encoding`.
      pub fn new(encoding: &'static Encoding) -> Self {
          Self {
              variables: IndexSet::new(),
@@ -302,10 +341,12 @@ impl Dictionary {
          }
      }
  
+    /// Returns a reference to the weight variable, if any.
      pub fn weight_var(&self) -> Option<&Variable> {
          self.weight.map(|index| &self.variables[index].0)
      }
  
+    /// Returns references to all the split variables, if any.
      pub fn split_vars(&self) -> Vec<&Variable> {
          self.split_file
              .iter()
@@ -313,7 +354,10 @@ impl Dictionary {
              .collect()
      }
  
-    pub fn add_var(&mut self, variable: Variable) -> Result<usize, DuplicateVariableName> {
+    /// Adds `variable` at the end of the dictionary and returns its index.  The
+    /// operation fails if the dictionary already contains a variable with the
+    /// same name (or a variant with different case).
+    pub fn add_var(&mut self, variable: Variable) -> Result<DictIndex, DuplicateVariableName> {
          let (index, inserted) = self.variables.insert_full(ByIdentifier::new(variable));
          if inserted {
              Ok(index)
@@ -322,7 +366,16 @@ impl Dictionary {
          }
      }
  
+    /// Reorders the variables in the dictionary so that the variable with
+    /// 0-based index `from_index` is moved to `to_index`.  Other variables stay
+    /// in the same relative positions.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `from_index` or `to_index` is not valid.
      pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) {
+        debug_assert!(from_index < self.variables.len());
+        debug_assert!(to_index < self.variables.len());
          if from_index != to_index {
              self.variables.move_index(from_index, to_index);
              self.update_dict_indexes(&|index| {
@@ -346,6 +399,8 @@ impl Dictionary {
          }
      }
  
+    /// Evaluates `keep` on each variable in the dictionary and deletes
+    /// variables for which it returns false.
      pub fn retain_vars<F>(&mut self, keep: F)
      where
          F: Fn(&Variable) -> bool,
@@ -365,6 +420,12 @@ impl Dictionary {
          }
      }
  
+    /// Deletes the variables whose indexes are in the given `range`.
+    ///
+    /// # Panic
+    ///
+    /// Panics if any part of `range` is outside the valid range of variable
+    /// indexes.
      pub fn delete_vars<R>(&mut self, range: R)
      where
          R: RangeBounds<DictIndex>,
@@ -432,6 +493,9 @@ impl Dictionary {
              .collect();
      }
  
+    /// Attempts to change the name of the variable with the given `index` to
+    /// `new_name`.  Returns true if successful, false if `new_name` would
+    /// duplicate the name of some other variable.
      pub fn try_rename_var(&mut self, index: usize, new_name: Identifier) -> bool {
          let mut variable = self.variables.swap_remove_index(index).unwrap();
          let may_rename = !self.variables.contains(&new_name.0);
@@ -444,6 +508,11 @@ impl Dictionary {
          may_rename
      }
  
+    /// Changes the name of the variable with given `index` to `new_name`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the new name duplicates the name of some existing variable.
      pub fn rename_var(&mut self, index: usize, new_name: Identifier) {
          assert!(self.try_rename_var(index, new_name));
      }
@@ -525,21 +594,59 @@ impl TryFrom<&Attributes> for Option<Role> {
      }
  }
  
+/// A variable, usually inside a [Dictionary].
  #[derive(Clone, Debug)]
  pub struct Variable {
+    /// The variable's name.
+    ///
+    /// PSPP variable names are case-insensitive.
      pub name: Identifier,
+
+    /// Variable width.
      pub width: VarWidth,
+
+    /// User-missing values.
+    ///
+    /// Numeric variables also have a system-missing value (represented as
+    /// `None`).
+    ///
+    /// Both kinds of missing values are excluded from most analyses.
      pub missing_values: MissingValues,
+
+    /// Output format used in most contexts.
      pub print_format: Format,
+
+    /// Output format used on the `WRITE` command.
      pub write_format: Format,
+
+    /// Value labels, to associate a number (or a string) with a more meaningful
+    /// description, e.g. 1 -> Apple, 2 -> Banana, ...
      pub value_labels: HashMap<Value, String>,
+
+    /// Variable label, an optional meaningful description for the variable
+    /// itself.
      pub label: Option<String>,
+
+    /// Measurement level for the variable's data.
      pub measure: Option<Measure>,
+
+    /// Role in data analysis.
      pub role: Option<Role>,
+
+    /// Width of data column in GUI.
      pub display_width: u32,
+
+    /// Data alignment in GUI.
      pub alignment: Alignment,
+
+    /// Whether to retain values of the variable from one case to the next.
      pub leave: bool,
+
+    /// For compatibility with old software that supported at most 8-character
+    /// variable names.
      pub short_names: Vec<Identifier>,
+
+    /// Variable attributes.
      pub attributes: Attributes,
  }
  
@@ -602,13 +709,22 @@ impl HasIdentifier for Vector {
      }
  }
  
+/// Variables that represent multiple responses to a survey question.
  #[derive(Clone, Debug)]
  pub struct MultipleResponseSet {
+    /// The set's name.
      pub name: Identifier,
+
+    /// A description for the set.
      pub label: String,
-    pub min_width: VarWidth,
-    pub max_width: VarWidth,
+
+    /// Range of widths among the variables.
+    pub width: RangeInclusive<VarWidth>,
+
+    /// What kind of multiple response set this is.
      pub mr_type: MultipleResponseType,
+
+    /// The variables comprising the set.
      pub variables: Vec<DictIndex>,
  }
  
@@ -628,12 +744,21 @@ impl HasIdentifier for MultipleResponseSet {
      }
  }
  
+/// The type of a [MultipleResponseSet].
  #[derive(Clone, Debug)]
  pub enum MultipleResponseType {
+    /// A "multiple dichotomy set", analogous to a survey question with a set of
+    /// checkboxes.  Each variable in the set is treated in a Boolean fashion:
+    /// one value (the "counted value") means that the box was checked, and any
+    /// other value means that it was not.
      MultipleDichotomy {
          value: Value,
          labels: CategoryLabels,
      },
+
+    /// A "multiple category set", a survey question where the respondent is
+    /// instructed to list up to N choices.  Each variable represents one of the
+    /// responses.
      MultipleCategory,
  }
  
diff --git a/rust/pspp/src/lex/command_name.rs b/rust/pspp/src/lex/command_name.rs

index 0e9957d4b309eac93abd0c19aab644c20777892b..5b6b382c90ac7c86e48e889115f40f147662f264 100644 (file)
--- a/rust/pspp/src/lex/command_name.rs
+++ b/rust/pspp/src/lex/command_name.rs
@@ -17,7 +17,7 @@ pub struct Match {
  ///      match, then `string` does not match `command` and the function returns false.
  ///
  ///   4. Otherwise, `string` and `command` match.  Set *MISSING_WORDS to n - m.  Set
-///      *EXACT to false if any of the S[i] were found to be abbreviated in the
+///      *EXACT to false if any of the `S[i]` were found to be abbreviated in the
  ///      comparisons done in step 3, or to true if they were all exactly equal
  ///      (modulo case).  Return true.
  pub fn command_match(command: &str, string: &str) -> Option<Match> {
diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs

index 34e6ae850ce6b2cc11b9ea98f9f341e1c9f66f6a..bbe0b9b96fcb528c2e35b0029bb4d7eb141555b6 100644 (file)
--- a/rust/pspp/src/lex/lexer.rs
+++ b/rust/pspp/src/lex/lexer.rs
@@ -29,7 +29,7 @@ use super::{
      token::Token,
  };
  
-/// Error handling for a [`Reader`].
+/// Error handling for a syntax reader.
  #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
  pub enum ErrorHandling {
      /// Discard input line and continue reading.
diff --git a/rust/pspp/src/lex/mod.rs b/rust/pspp/src/lex/mod.rs

index 97b347ff2e24bad25e9b672b106e62ef79a71bd5..e2e79aa2c566601ef2ef9e0b4838d2953e3881f8 100644 (file)
--- a/rust/pspp/src/lex/mod.rs
+++ b/rust/pspp/src/lex/mod.rs
@@ -2,7 +2,7 @@
  //!
  //! PSPP divides traditional "lexical analysis" or "tokenization" into two
  //! phases: a lower-level phase called "segmentation" and a higher-level phase
-//! called "scanning".  [super::segment] implements the segmentation phase and
+//! called "scanning".  [segment] implements the segmentation phase and
  //! this module the scanning phase.
  //!
  //! Scanning accepts as input a stream of segments, which are UTF-8 strings each
diff --git a/rust/pspp/src/lex/scan/mod.rs b/rust/pspp/src/lex/scan/mod.rs

index 4b665437c7362425d6cd93c81652ea12a0268e34..d99bfb16457515e047eb5055fdc45ee06829dab4 100644 (file)
--- a/rust/pspp/src/lex/scan/mod.rs
+++ b/rust/pspp/src/lex/scan/mod.rs
@@ -2,8 +2,8 @@
  //!
  //! PSPP divides traditional "lexical analysis" or "tokenization" into two
  //! phases: a lower-level phase called "segmentation" and a higher-level phase
-//! called "scanning".  [segment] implements the segmentation phase and [scan]
-//! the scanning phase.
+//! called "scanning".  [mod segment] implements the segmentation phase and [mod
+//! scan] the scanning phase.
  //!
  //! Scanning accepts as input a stream of segments, which are UTF-8 strings each
  //! labeled with a segment type.  It outputs a stream of "scan tokens", which
diff --git a/rust/pspp/src/raw.rs b/rust/pspp/src/raw.rs

index dfb5e40570a36915df4bdc2f7f7514321e6a0c1b..5c3eb8547260e3afa82515b767028c23efe006e2 100644 (file)
--- a/rust/pspp/src/raw.rs
+++ b/rust/pspp/src/raw.rs
@@ -2028,10 +2028,16 @@ fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
      Ok((string.into(), rest))
  }
  
+/// [Level of measurement](https://en.wikipedia.org/wiki/Level_of_measurement).
  #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
  pub enum Measure {
+    /// Nominal values can only be compared for equality.
      Nominal,
+
+    /// Ordinal values can be meaningfully ordered.
      Ordinal,
+
+    /// Scale values can be meaningfully compared for the degree of difference.
      Scale,
  }
author	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 26 Apr 2025 02:42:28 +0000 (19:42 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 26 Apr 2025 02:42:28 +0000 (19:42 -0700)
rust/pspp/src/cooked.rs		patch \| blob \| history
rust/pspp/src/dictionary.rs		patch \| blob \| history
rust/pspp/src/lex/command_name.rs		patch \| blob \| history
rust/pspp/src/lex/lexer.rs		patch \| blob \| history
rust/pspp/src/lex/mod.rs		patch \| blob \| history
rust/pspp/src/lex/scan/mod.rs		patch \| blob \| history
rust/pspp/src/raw.rs		patch \| blob \| history