From: Ben Pfaff Date: Sat, 26 Apr 2025 02:42:28 +0000 (-0700) Subject: work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b908b334970f84b50254db95f26b37a6c6c972da;p=pspp work --- diff --git a/rust/pspp/src/cooked.rs b/rust/pspp/src/cooked.rs index c82c41fbe3..8de34d5ac6 100644 --- a/rust/pspp/src/cooked.rs +++ b/rust/pspp/src/cooked.rs @@ -811,8 +811,7 @@ impl MultipleResponseSet { Ok(MultipleResponseSet { name: mr_set_name, - min_width, - max_width, + width: min_width..=max_width, label: input.label.to_string(), mr_type, variables, diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 91e41acbcf..d0941be061 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -1,10 +1,12 @@ +//! Dictionaries and variables. + use core::str; use std::{ cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Debug, Formatter, Result as FmtResult}, hash::Hash, - ops::{Bound, RangeBounds}, + ops::{Bound, RangeBounds, RangeInclusive}, }; use encoding_rs::Encoding; @@ -19,8 +21,10 @@ use crate::{ raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType}, }; +/// An index within [Dictionary::variables]. pub type DictIndex = usize; +/// [VarType], plus a width for [VarType::String]. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum VarWidth { Numeric, @@ -265,19 +269,53 @@ impl From<&[u8]> for Value { } } +/// A collection of variables, plus additional metadata. #[derive(Clone, Debug)] pub struct Dictionary { + /// The variables. pub variables: IndexSet>, + + /// Indexes into `variables` of the `SPLIT FILE` variables. pub split_file: Vec, + + /// Index of the weight variable, if any. + /// + /// The weight variable must be numeric. pub weight: Option, + + /// Index of the filter variable, if any. + /// + /// The filter variable must be numeric. If there is a filter variable, + /// then data analysis excludes cases whose filter value is zero or system- + /// or user-missing. pub filter: Option, + + /// An optional limit on the number of cases read by procedures. pub case_limit: Option, + + /// Optional label (name) for the dictionary. pub file_label: Option, + + /// Optional additional documentation associated with the dictionary. pub documents: Vec, + + /// Named collections of variables within the dictionary. pub vectors: HashSet>, + + /// Attributes for the dictionary itself. + /// + /// Individual variables can have their own attributes. pub attributes: Attributes, + + /// Multiple response sets. pub mrsets: HashSet>, + + /// Variable sets. + /// + /// Only the GUI makes use of variable sets. pub variable_sets: HashSet>, + + /// Character encoding for the dictionary and the data. pub encoding: &'static Encoding, } @@ -285,6 +323,7 @@ pub struct Dictionary { pub struct DuplicateVariableName; impl Dictionary { + /// Creates a new, empty dictionary with the specified `encoding`. pub fn new(encoding: &'static Encoding) -> Self { Self { variables: IndexSet::new(), @@ -302,10 +341,12 @@ impl Dictionary { } } + /// Returns a reference to the weight variable, if any. pub fn weight_var(&self) -> Option<&Variable> { self.weight.map(|index| &self.variables[index].0) } + /// Returns references to all the split variables, if any. pub fn split_vars(&self) -> Vec<&Variable> { self.split_file .iter() @@ -313,7 +354,10 @@ impl Dictionary { .collect() } - pub fn add_var(&mut self, variable: Variable) -> Result { + /// Adds `variable` at the end of the dictionary and returns its index. The + /// operation fails if the dictionary already contains a variable with the + /// same name (or a variant with different case). + pub fn add_var(&mut self, variable: Variable) -> Result { let (index, inserted) = self.variables.insert_full(ByIdentifier::new(variable)); if inserted { Ok(index) @@ -322,7 +366,16 @@ impl Dictionary { } } + /// Reorders the variables in the dictionary so that the variable with + /// 0-based index `from_index` is moved to `to_index`. Other variables stay + /// in the same relative positions. + /// + /// # Panics + /// + /// Panics if `from_index` or `to_index` is not valid. pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) { + debug_assert!(from_index < self.variables.len()); + debug_assert!(to_index < self.variables.len()); if from_index != to_index { self.variables.move_index(from_index, to_index); self.update_dict_indexes(&|index| { @@ -346,6 +399,8 @@ impl Dictionary { } } + /// Evaluates `keep` on each variable in the dictionary and deletes + /// variables for which it returns false. pub fn retain_vars(&mut self, keep: F) where F: Fn(&Variable) -> bool, @@ -365,6 +420,12 @@ impl Dictionary { } } + /// Deletes the variables whose indexes are in the given `range`. + /// + /// # Panic + /// + /// Panics if any part of `range` is outside the valid range of variable + /// indexes. pub fn delete_vars(&mut self, range: R) where R: RangeBounds, @@ -432,6 +493,9 @@ impl Dictionary { .collect(); } + /// Attempts to change the name of the variable with the given `index` to + /// `new_name`. Returns true if successful, false if `new_name` would + /// duplicate the name of some other variable. pub fn try_rename_var(&mut self, index: usize, new_name: Identifier) -> bool { let mut variable = self.variables.swap_remove_index(index).unwrap(); let may_rename = !self.variables.contains(&new_name.0); @@ -444,6 +508,11 @@ impl Dictionary { may_rename } + /// Changes the name of the variable with given `index` to `new_name`. + /// + /// # Panics + /// + /// Panics if the new name duplicates the name of some existing variable. pub fn rename_var(&mut self, index: usize, new_name: Identifier) { assert!(self.try_rename_var(index, new_name)); } @@ -525,21 +594,59 @@ impl TryFrom<&Attributes> for Option { } } +/// A variable, usually inside a [Dictionary]. #[derive(Clone, Debug)] pub struct Variable { + /// The variable's name. + /// + /// PSPP variable names are case-insensitive. pub name: Identifier, + + /// Variable width. pub width: VarWidth, + + /// User-missing values. + /// + /// Numeric variables also have a system-missing value (represented as + /// `None`). + /// + /// Both kinds of missing values are excluded from most analyses. pub missing_values: MissingValues, + + /// Output format used in most contexts. pub print_format: Format, + + /// Output format used on the `WRITE` command. pub write_format: Format, + + /// Value labels, to associate a number (or a string) with a more meaningful + /// description, e.g. 1 -> Apple, 2 -> Banana, ... pub value_labels: HashMap, + + /// Variable label, an optional meaningful description for the variable + /// itself. pub label: Option, + + /// Measurement level for the variable's data. pub measure: Option, + + /// Role in data analysis. pub role: Option, + + /// Width of data column in GUI. pub display_width: u32, + + /// Data alignment in GUI. pub alignment: Alignment, + + /// Whether to retain values of the variable from one case to the next. pub leave: bool, + + /// For compatibility with old software that supported at most 8-character + /// variable names. pub short_names: Vec, + + /// Variable attributes. pub attributes: Attributes, } @@ -602,13 +709,22 @@ impl HasIdentifier for Vector { } } +/// Variables that represent multiple responses to a survey question. #[derive(Clone, Debug)] pub struct MultipleResponseSet { + /// The set's name. pub name: Identifier, + + /// A description for the set. pub label: String, - pub min_width: VarWidth, - pub max_width: VarWidth, + + /// Range of widths among the variables. + pub width: RangeInclusive, + + /// What kind of multiple response set this is. pub mr_type: MultipleResponseType, + + /// The variables comprising the set. pub variables: Vec, } @@ -628,12 +744,21 @@ impl HasIdentifier for MultipleResponseSet { } } +/// The type of a [MultipleResponseSet]. #[derive(Clone, Debug)] pub enum MultipleResponseType { + /// A "multiple dichotomy set", analogous to a survey question with a set of + /// checkboxes. Each variable in the set is treated in a Boolean fashion: + /// one value (the "counted value") means that the box was checked, and any + /// other value means that it was not. MultipleDichotomy { value: Value, labels: CategoryLabels, }, + + /// A "multiple category set", a survey question where the respondent is + /// instructed to list up to N choices. Each variable represents one of the + /// responses. MultipleCategory, } diff --git a/rust/pspp/src/lex/command_name.rs b/rust/pspp/src/lex/command_name.rs index 0e9957d4b3..5b6b382c90 100644 --- a/rust/pspp/src/lex/command_name.rs +++ b/rust/pspp/src/lex/command_name.rs @@ -17,7 +17,7 @@ pub struct Match { /// match, then `string` does not match `command` and the function returns false. /// /// 4. Otherwise, `string` and `command` match. Set *MISSING_WORDS to n - m. Set -/// *EXACT to false if any of the S[i] were found to be abbreviated in the +/// *EXACT to false if any of the `S[i]` were found to be abbreviated in the /// comparisons done in step 3, or to true if they were all exactly equal /// (modulo case). Return true. pub fn command_match(command: &str, string: &str) -> Option { diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index 34e6ae850c..bbe0b9b96f 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -29,7 +29,7 @@ use super::{ token::Token, }; -/// Error handling for a [`Reader`]. +/// Error handling for a syntax reader. #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] pub enum ErrorHandling { /// Discard input line and continue reading. diff --git a/rust/pspp/src/lex/mod.rs b/rust/pspp/src/lex/mod.rs index 97b347ff2e..e2e79aa2c5 100644 --- a/rust/pspp/src/lex/mod.rs +++ b/rust/pspp/src/lex/mod.rs @@ -2,7 +2,7 @@ //! //! PSPP divides traditional "lexical analysis" or "tokenization" into two //! phases: a lower-level phase called "segmentation" and a higher-level phase -//! called "scanning". [super::segment] implements the segmentation phase and +//! called "scanning". [segment] implements the segmentation phase and //! this module the scanning phase. //! //! Scanning accepts as input a stream of segments, which are UTF-8 strings each diff --git a/rust/pspp/src/lex/scan/mod.rs b/rust/pspp/src/lex/scan/mod.rs index 4b665437c7..d99bfb1645 100644 --- a/rust/pspp/src/lex/scan/mod.rs +++ b/rust/pspp/src/lex/scan/mod.rs @@ -2,8 +2,8 @@ //! //! PSPP divides traditional "lexical analysis" or "tokenization" into two //! phases: a lower-level phase called "segmentation" and a higher-level phase -//! called "scanning". [segment] implements the segmentation phase and [scan] -//! the scanning phase. +//! called "scanning". [mod segment] implements the segmentation phase and [mod +//! scan] the scanning phase. //! //! Scanning accepts as input a stream of segments, which are UTF-8 strings each //! labeled with a segment type. It outputs a stream of "scan tokens", which diff --git a/rust/pspp/src/raw.rs b/rust/pspp/src/raw.rs index dfb5e40570..5c3eb85472 100644 --- a/rust/pspp/src/raw.rs +++ b/rust/pspp/src/raw.rs @@ -2028,10 +2028,16 @@ fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> { Ok((string.into(), rest)) } +/// [Level of measurement](https://en.wikipedia.org/wiki/Level_of_measurement). #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Measure { + /// Nominal values can only be compared for equality. Nominal, + + /// Ordinal values can be meaningfully ordered. Ordinal, + + /// Scale values can be meaningfully compared for the degree of difference. Scale, }