+//! Dictionaries and variables.
+
use core::str;
use std::{
cmp::Ordering,
collections::{HashMap, HashSet},
fmt::{Debug, Formatter, Result as FmtResult},
hash::Hash,
- ops::{Bound, RangeBounds},
+ ops::{Bound, RangeBounds, RangeInclusive},
};
use encoding_rs::Encoding;
raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType},
};
+/// An index within [Dictionary::variables].
pub type DictIndex = usize;
+/// [VarType], plus a width for [VarType::String].
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum VarWidth {
Numeric,
}
}
+/// A collection of variables, plus additional metadata.
#[derive(Clone, Debug)]
pub struct Dictionary {
+ /// The variables.
pub variables: IndexSet<ByIdentifier<Variable>>,
+
+ /// Indexes into `variables` of the `SPLIT FILE` variables.
pub split_file: Vec<DictIndex>,
+
+ /// Index of the weight variable, if any.
+ ///
+ /// The weight variable must be numeric.
pub weight: Option<DictIndex>,
+
+ /// Index of the filter variable, if any.
+ ///
+ /// The filter variable must be numeric. If there is a filter variable,
+ /// then data analysis excludes cases whose filter value is zero or system-
+ /// or user-missing.
pub filter: Option<DictIndex>,
+
+ /// An optional limit on the number of cases read by procedures.
pub case_limit: Option<u64>,
+
+ /// Optional label (name) for the dictionary.
pub file_label: Option<String>,
+
+ /// Optional additional documentation associated with the dictionary.
pub documents: Vec<String>,
+
+ /// Named collections of variables within the dictionary.
pub vectors: HashSet<ByIdentifier<Vector>>,
+
+ /// Attributes for the dictionary itself.
+ ///
+ /// Individual variables can have their own attributes.
pub attributes: Attributes,
+
+ /// Multiple response sets.
pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
+
+ /// Variable sets.
+ ///
+ /// Only the GUI makes use of variable sets.
pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
+
+ /// Character encoding for the dictionary and the data.
pub encoding: &'static Encoding,
}
pub struct DuplicateVariableName;
impl Dictionary {
+ /// Creates a new, empty dictionary with the specified `encoding`.
pub fn new(encoding: &'static Encoding) -> Self {
Self {
variables: IndexSet::new(),
}
}
+ /// Returns a reference to the weight variable, if any.
pub fn weight_var(&self) -> Option<&Variable> {
self.weight.map(|index| &self.variables[index].0)
}
+ /// Returns references to all the split variables, if any.
pub fn split_vars(&self) -> Vec<&Variable> {
self.split_file
.iter()
.collect()
}
- pub fn add_var(&mut self, variable: Variable) -> Result<usize, DuplicateVariableName> {
+ /// Adds `variable` at the end of the dictionary and returns its index. The
+ /// operation fails if the dictionary already contains a variable with the
+ /// same name (or a variant with different case).
+ pub fn add_var(&mut self, variable: Variable) -> Result<DictIndex, DuplicateVariableName> {
let (index, inserted) = self.variables.insert_full(ByIdentifier::new(variable));
if inserted {
Ok(index)
}
}
+ /// Reorders the variables in the dictionary so that the variable with
+ /// 0-based index `from_index` is moved to `to_index`. Other variables stay
+ /// in the same relative positions.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `from_index` or `to_index` is not valid.
pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) {
+ debug_assert!(from_index < self.variables.len());
+ debug_assert!(to_index < self.variables.len());
if from_index != to_index {
self.variables.move_index(from_index, to_index);
self.update_dict_indexes(&|index| {
}
}
+ /// Evaluates `keep` on each variable in the dictionary and deletes
+ /// variables for which it returns false.
pub fn retain_vars<F>(&mut self, keep: F)
where
F: Fn(&Variable) -> bool,
}
}
+ /// Deletes the variables whose indexes are in the given `range`.
+ ///
+ /// # Panic
+ ///
+ /// Panics if any part of `range` is outside the valid range of variable
+ /// indexes.
pub fn delete_vars<R>(&mut self, range: R)
where
R: RangeBounds<DictIndex>,
.collect();
}
+ /// Attempts to change the name of the variable with the given `index` to
+ /// `new_name`. Returns true if successful, false if `new_name` would
+ /// duplicate the name of some other variable.
pub fn try_rename_var(&mut self, index: usize, new_name: Identifier) -> bool {
let mut variable = self.variables.swap_remove_index(index).unwrap();
let may_rename = !self.variables.contains(&new_name.0);
may_rename
}
+ /// Changes the name of the variable with given `index` to `new_name`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the new name duplicates the name of some existing variable.
pub fn rename_var(&mut self, index: usize, new_name: Identifier) {
assert!(self.try_rename_var(index, new_name));
}
}
}
+/// A variable, usually inside a [Dictionary].
#[derive(Clone, Debug)]
pub struct Variable {
+ /// The variable's name.
+ ///
+ /// PSPP variable names are case-insensitive.
pub name: Identifier,
+
+ /// Variable width.
pub width: VarWidth,
+
+ /// User-missing values.
+ ///
+ /// Numeric variables also have a system-missing value (represented as
+ /// `None`).
+ ///
+ /// Both kinds of missing values are excluded from most analyses.
pub missing_values: MissingValues,
+
+ /// Output format used in most contexts.
pub print_format: Format,
+
+ /// Output format used on the `WRITE` command.
pub write_format: Format,
+
+ /// Value labels, to associate a number (or a string) with a more meaningful
+ /// description, e.g. 1 -> Apple, 2 -> Banana, ...
pub value_labels: HashMap<Value, String>,
+
+ /// Variable label, an optional meaningful description for the variable
+ /// itself.
pub label: Option<String>,
+
+ /// Measurement level for the variable's data.
pub measure: Option<Measure>,
+
+ /// Role in data analysis.
pub role: Option<Role>,
+
+ /// Width of data column in GUI.
pub display_width: u32,
+
+ /// Data alignment in GUI.
pub alignment: Alignment,
+
+ /// Whether to retain values of the variable from one case to the next.
pub leave: bool,
+
+ /// For compatibility with old software that supported at most 8-character
+ /// variable names.
pub short_names: Vec<Identifier>,
+
+ /// Variable attributes.
pub attributes: Attributes,
}
}
}
+/// Variables that represent multiple responses to a survey question.
#[derive(Clone, Debug)]
pub struct MultipleResponseSet {
+ /// The set's name.
pub name: Identifier,
+
+ /// A description for the set.
pub label: String,
- pub min_width: VarWidth,
- pub max_width: VarWidth,
+
+ /// Range of widths among the variables.
+ pub width: RangeInclusive<VarWidth>,
+
+ /// What kind of multiple response set this is.
pub mr_type: MultipleResponseType,
+
+ /// The variables comprising the set.
pub variables: Vec<DictIndex>,
}
}
}
+/// The type of a [MultipleResponseSet].
#[derive(Clone, Debug)]
pub enum MultipleResponseType {
+ /// A "multiple dichotomy set", analogous to a survey question with a set of
+ /// checkboxes. Each variable in the set is treated in a Boolean fashion:
+ /// one value (the "counted value") means that the box was checked, and any
+ /// other value means that it was not.
MultipleDichotomy {
value: Value,
labels: CategoryLabels,
},
+
+ /// A "multiple category set", a survey question where the respondent is
+ /// instructed to list up to N choices. Each variable represents one of the
+ /// responses.
MultipleCategory,
}