work on making dictionary fields private
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 23 Jul 2025 00:26:14 +0000 (17:26 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 23 Jul 2025 00:26:14 +0000 (17:26 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/write.rs

index 623d7f3b06e4233d7f462a75ad775064035437a1..181639c3c19ceede88b81fc9fdb9f92d41893cbc 100644 (file)
@@ -20,7 +20,7 @@ use core::str;
 use std::{
     borrow::Cow,
     cmp::Ordering,
-    collections::{BTreeMap, BTreeSet, HashMap, HashSet},
+    collections::{btree_set, BTreeMap, BTreeSet, HashMap, HashSet},
     fmt::{Debug, Display, Formatter, Result as FmtResult},
     hash::{DefaultHasher, Hash, Hasher},
     ops::{Bound, Index, Not, RangeBounds, RangeInclusive},
@@ -260,19 +260,19 @@ pub struct Dictionary {
     pub variables: IndexSet<ByIdentifier<Variable>>,
 
     /// Indexes into `variables` of the `SPLIT FILE` variables.
-    pub split_file: Vec<DictIndex>,
+    split_file: Vec<DictIndex>,
 
     /// Index of the weight variable, if any.
     ///
     /// The weight variable must be numeric.
-    pub weight: Option<DictIndex>,
+    weight: Option<DictIndex>,
 
     /// Index of the filter variable, if any.
     ///
     /// The filter variable must be numeric.  If there is a filter variable,
     /// then data analysis excludes cases whose filter value is zero or system-
     /// or user-missing.
-    pub filter: Option<DictIndex>,
+    filter: Option<DictIndex>,
 
     /// An optional limit on the number of cases read by procedures.
     pub case_limit: Option<u64>,
@@ -292,12 +292,12 @@ pub struct Dictionary {
     pub attributes: Attributes,
 
     /// Multiple response sets.
-    pub mrsets: BTreeSet<ByIdentifier<MultipleResponseSet>>,
+    pub mrsets: BTreeSet<ByIdentifier<DictIndexMultipleResponseSet>>,
 
     /// Variable sets.
     ///
     /// Only the GUI makes use of variable sets.
-    pub variable_sets: Vec<VariableSet>,
+    pub variable_sets: Vec<DictIndexVariableSet>,
 
     /// Character encoding for the dictionary and the data.
     pub encoding: &'static Encoding,
@@ -316,8 +316,8 @@ impl Serialize for Dictionary {
         map.serialize_field("documents", &self.documents)?;
         map.serialize_field("vectors", &self.vectors())?;
         map.serialize_field("attributes", &self.attributes)?;
-        map.serialize_field("mrsets", &self.mrsets)?;
-        //variable sets
+        map.serialize_field("mrsets", &self.mrsets())?;
+        map.serialize_field("variable_sets", &self.variable_sets())?;
         map.serialize_field("encoding", self.encoding)?;
         map.end()
     }
@@ -335,6 +335,16 @@ pub enum AddVarError {
     },
 }
 
+/// Weight variable must be numeric.
+#[derive(Debug, ThisError)]
+#[error("Weight variable must be numeric.")]
+pub struct InvalidWeightVariable;
+
+/// Filter variable must be numeric.
+#[derive(Debug, ThisError)]
+#[error("Filter variable must be numeric.")]
+pub struct InvalidFilterVariable;
+
 impl Dictionary {
     /// Creates a new, empty dictionary with the specified `encoding`.
     pub fn new(encoding: &'static Encoding) -> Self {
@@ -359,23 +369,78 @@ impl Dictionary {
         self.weight.map(|index| &self.variables[index].0)
     }
 
+    /// Returns the weight variable's dictionary index.
+    pub fn weight_index(&self) -> Option<DictIndex> {
+        self.weight
+    }
+
+    /// Sets the weight variable to the variable with the given dictionary
+    /// index.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `dict_index` is not a valid dictionary index.
+    pub fn set_weight(
+        &mut self,
+        dict_index: Option<DictIndex>,
+    ) -> Result<(), InvalidWeightVariable> {
+        if let Some(dict_index) = dict_index
+            && !self.variables[dict_index].width.is_numeric()
+        {
+            Err(InvalidWeightVariable)
+        } else {
+            self.weight = dict_index;
+            Ok(())
+        }
+    }
+
     /// Returns a reference to the filter variable, if any.
     pub fn filter_var(&self) -> Option<&Variable> {
         self.filter.map(|index| &self.variables[index].0)
     }
 
-    /// Returns references to all the split variables, if any.
-    pub fn split_vars(&self) -> Vec<&Variable> {
-        self.split_file
-            .iter()
-            .map(|index| &self.variables[*index].0)
-            .collect()
+    /// Returns the filter variable's dictionary index.
+    pub fn filter_index(&self) -> Option<DictIndex> {
+        self.filter
+    }
+
+    /// Sets the filter variable to the variable with the given dictionary
+    /// index.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `dict_index` is not a valid dictionary index.
+    pub fn set_filter(
+        &mut self,
+        dict_index: Option<DictIndex>,
+    ) -> Result<(), InvalidFilterVariable> {
+        if let Some(dict_index) = dict_index
+            && !self.variables[dict_index].width.is_numeric()
+        {
+            Err(InvalidFilterVariable)
+        } else {
+            self.filter = dict_index;
+            Ok(())
+        }
+    }
+
+    /// Returns the split variables.
+    pub fn split_vars(&self) -> MappedVariables<'_> {
+        MappedVariables::new(self, &self.split_file)
     }
 
     pub fn vectors(&self) -> Vectors<'_> {
         Vectors::new(self)
     }
 
+    pub fn mrsets(&self) -> MultipleResponseSets<'_> {
+        MultipleResponseSets::new(self)
+    }
+
+    pub fn variable_sets(&self) -> VariableSets<'_> {
+        VariableSets::new(self)
+    }
+
     /// Adds `variable` at the end of the dictionary and returns its index.
     ///
     /// The operation fails if the dictionary already contains a variable with
@@ -1519,9 +1584,214 @@ impl<'a> Serialize for Vectors<'a> {
     }
 }
 
+pub struct VariableSet<'a> {
+    dictionary: &'a Dictionary,
+    variable_set: &'a DictIndexVariableSet,
+}
+
+impl<'a> VariableSet<'a> {
+    pub fn name(&self) -> &'a String {
+        &self.variable_set.name
+    }
+    pub fn variables(&self) -> MappedVariables<'a> {
+        MappedVariables::new(self.dictionary, &self.variable_set.variables)
+    }
+}
+
+#[derive(Debug)]
+pub struct VariableSets<'a>(&'a Dictionary);
+
+impl<'a> VariableSets<'a> {
+    fn new(dictionary: &'a Dictionary) -> Self {
+        Self(dictionary)
+    }
+    pub fn len(&self) -> usize {
+        self.0.variable_sets.len()
+    }
+    pub fn get(&self, index: usize) -> Option<VariableSet<'a>> {
+        self.0
+            .variable_sets
+            .get(index)
+            .map(|variable_set| VariableSet {
+                dictionary: self.0,
+                variable_set: &*variable_set,
+            })
+    }
+    pub fn iter(&self) -> VariableSetsIter<'a> {
+        VariableSetsIter::new(self.0)
+    }
+}
+
+impl<'a> IntoIterator for &VariableSets<'a> {
+    type Item = VariableSet<'a>;
+
+    type IntoIter = VariableSetsIter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+impl<'a> Serialize for VariableSets<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let mut map = serializer.serialize_map(Some(self.len()))?;
+        for variable_set in self {
+            map.serialize_key(variable_set.name())?;
+            map.serialize_value(&variable_set.variables())?;
+        }
+        map.end()
+    }
+}
+
+pub struct VariableSetsIter<'a> {
+    dictionary: &'a Dictionary,
+    iter: std::slice::Iter<'a, DictIndexVariableSet>,
+}
+
+impl<'a> VariableSetsIter<'a> {
+    fn new(dictionary: &'a Dictionary) -> Self {
+        Self {
+            dictionary,
+            iter: dictionary.variable_sets.iter(),
+        }
+    }
+}
+impl<'a> Iterator for VariableSetsIter<'a> {
+    type Item = VariableSet<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next().map(|variable_set| VariableSet {
+            dictionary: self.dictionary,
+            variable_set,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSets<'a>(&'a Dictionary);
+
+impl<'a> MultipleResponseSets<'a> {
+    fn new(dictionary: &'a Dictionary) -> Self {
+        Self(dictionary)
+    }
+
+    pub fn len(&self) -> usize {
+        self.0.mrsets.len()
+    }
+
+    pub fn get(&self, name: &Identifier) -> Option<MultipleResponseSet<'a>> {
+        self.0
+            .mrsets
+            .get(&name.0)
+            .map(|mrset| MultipleResponseSet::new(self.0, mrset))
+    }
+
+    pub fn iter(&self) -> MultipleResponseSetIter<'a> {
+        MultipleResponseSetIter::new(self.0)
+    }
+}
+
+impl<'a> IntoIterator for &MultipleResponseSets<'a> {
+    type Item = MultipleResponseSet<'a>;
+
+    type IntoIter = MultipleResponseSetIter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+pub struct MultipleResponseSetIter<'a> {
+    dictionary: &'a Dictionary,
+    iter: btree_set::Iter<'a, ByIdentifier<DictIndexMultipleResponseSet>>,
+}
+
+impl<'a> MultipleResponseSetIter<'a> {
+    fn new(dictionary: &'a Dictionary) -> Self {
+        Self {
+            dictionary,
+            iter: dictionary.mrsets.iter(),
+        }
+    }
+}
+
+impl<'a> Iterator for MultipleResponseSetIter<'a> {
+    type Item = MultipleResponseSet<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter
+            .next()
+            .map(|set| MultipleResponseSet::new(self.dictionary, set))
+    }
+}
+
+impl<'a> Serialize for MultipleResponseSets<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let mut seq = serializer.serialize_seq(Some(self.len()))?;
+        for set in self {
+            seq.serialize_element(&set)?;
+        }
+        seq.end()
+    }
+}
+
+/// Variables that represent multiple responses to a survey question.
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet<'a> {
+    dictionary: &'a Dictionary,
+    mrset: &'a DictIndexMultipleResponseSet,
+}
+
+impl<'a> MultipleResponseSet<'a> {
+    fn new(dictionary: &'a Dictionary, mrset: &'a DictIndexMultipleResponseSet) -> Self {
+        Self { dictionary, mrset }
+    }
+
+    pub fn name(&self) -> &Identifier {
+        &self.mrset.name
+    }
+
+    pub fn label(&self) -> &String {
+        &self.mrset.label
+    }
+
+    pub fn width(&self) -> RangeInclusive<VarWidth> {
+        self.mrset.width.clone()
+    }
+
+    pub fn mr_type(&self) -> &MultipleResponseType {
+        &self.mrset.mr_type
+    }
+
+    pub fn variables(&self) -> MappedVariables<'a> {
+        MappedVariables::new(self.dictionary, &self.mrset.variables)
+    }
+}
+
+impl<'a> Serialize for MultipleResponseSet<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let mut map = serializer.serialize_map(Some(5))?;
+        map.serialize_entry("name", self.name())?;
+        map.serialize_entry("label", self.label())?;
+        map.serialize_entry("width", &self.width())?;
+        map.serialize_entry("type", self.mr_type())?;
+        map.serialize_entry("variables", &self.variables())?;
+        map.end()
+    }
+}
+
 /// Variables that represent multiple responses to a survey question.
 #[derive(Clone, Debug, Serialize)]
-pub struct MultipleResponseSet {
+pub struct DictIndexMultipleResponseSet {
     /// The set's name.
     pub name: Identifier,
 
@@ -1538,7 +1808,7 @@ pub struct MultipleResponseSet {
     pub variables: Vec<DictIndex>,
 }
 
-impl MultipleResponseSet {
+impl DictIndexMultipleResponseSet {
     fn with_updated_dict_indexes(
         mut self,
         f: impl Fn(DictIndex) -> Option<DictIndex>,
@@ -1548,7 +1818,7 @@ impl MultipleResponseSet {
     }
 }
 
-impl HasIdentifier for MultipleResponseSet {
+impl HasIdentifier for DictIndexMultipleResponseSet {
     fn identifier(&self) -> &UniCase<String> {
         &self.name.0
     }
@@ -1604,12 +1874,12 @@ pub enum CategoryLabels {
 }
 
 #[derive(Clone, Debug)]
-pub struct VariableSet {
+pub struct DictIndexVariableSet {
     pub name: String,
     pub variables: Vec<DictIndex>,
 }
 
-impl VariableSet {
+impl DictIndexVariableSet {
     fn with_updated_dict_indexes(
         mut self,
         f: impl Fn(DictIndex) -> Option<DictIndex>,
index c4c67bf753b1e9169e38aa949271ca6fefe8596f..14aafcf167f45ffc7fc2fc15093a5825e3bbbbf2 100644 (file)
@@ -28,8 +28,8 @@ use crate::{
     crypto::EncryptedFile,
     data::{Datum, RawString},
     dictionary::{
-        Dictionary, InvalidRole, MissingValues, MissingValuesError, MultipleResponseSet,
-        MultipleResponseType, VarWidth, Variable, VariableSet,
+        DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues,
+        MissingValuesError, MultipleResponseType, VarWidth, Variable,
     },
     endian::Endian,
     format::{Error as FormatError, Format, UncheckedFormat},
@@ -970,20 +970,17 @@ impl Records {
                 });
             } else {
                 let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
-                let variable = &dictionary.variables[*dict_index];
                 if *var_index == index {
-                    if variable.is_numeric() {
-                        dictionary.weight = Some(*dict_index);
-                    } else {
+                    if dictionary.set_weight(Some(*dict_index)).is_err() {
                         warn(Error::InvalidWeightVar {
                             index: weight_index,
-                            name: variable.name.clone(),
+                            name: dictionary.variables[*dict_index].name.clone(),
                         });
                     }
                 } else {
                     warn(Error::WeightIndexStringContinuation {
                         index: weight_index,
-                        name: variable.name.clone(),
+                        name: dictionary.variables[*dict_index].name.clone(),
                     });
                 }
             }
@@ -1075,7 +1072,7 @@ impl Records {
             .iter()
             .flat_map(|record| record.sets.iter())
         {
-            match MultipleResponseSet::decode(&dictionary, record, &mut warn) {
+            match DictIndexMultipleResponseSet::decode(&dictionary, record, &mut warn) {
                 Ok(mrset) => {
                     dictionary.mrsets.insert(ByIdentifier::new(mrset));
                 }
@@ -1291,7 +1288,7 @@ impl Records {
                 };
                 variables.push(dict_index);
             }
-            let variable_set = VariableSet {
+            let variable_set = DictIndexVariableSet {
                 name: record.name,
                 variables,
             };
@@ -1531,7 +1528,7 @@ impl Decoder {
     }
 }
 
-impl MultipleResponseSet {
+impl DictIndexMultipleResponseSet {
     fn decode(
         dictionary: &Dictionary,
         input: &raw::records::MultipleResponseSet<Identifier, String>,
@@ -1581,7 +1578,7 @@ impl MultipleResponseSet {
 
         let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width)?;
 
-        Ok(MultipleResponseSet {
+        Ok(DictIndexMultipleResponseSet {
             name: mr_set_name,
             width: min_width..=max_width,
             label: input.label.to_string(),
index 293c9beee18e0a7b6bff0eb31b9b8a9dc1bc5127..859bdad154e85bb6ebaa331978d2bf8292243c6f 100644 (file)
@@ -206,7 +206,7 @@ where
                 Some(Compression::ZLib) => 2,
                 None => 0,
             },
-            weight_index: if let Some(weight_index) = self.dictionary.weight {
+            weight_index: if let Some(weight_index) = self.dictionary.weight_index() {
                 count_segments(&self.case_vars[..weight_index]) + 1
             } else {
                 0