use std::{
borrow::Cow,
cmp::Ordering,
- collections::{BTreeMap, BTreeSet, HashMap, HashSet},
+ collections::{btree_set, BTreeMap, BTreeSet, HashMap, HashSet},
fmt::{Debug, Display, Formatter, Result as FmtResult},
hash::{DefaultHasher, Hash, Hasher},
ops::{Bound, Index, Not, RangeBounds, RangeInclusive},
pub variables: IndexSet<ByIdentifier<Variable>>,
/// Indexes into `variables` of the `SPLIT FILE` variables.
- pub split_file: Vec<DictIndex>,
+ split_file: Vec<DictIndex>,
/// Index of the weight variable, if any.
///
/// The weight variable must be numeric.
- pub weight: Option<DictIndex>,
+ weight: Option<DictIndex>,
/// Index of the filter variable, if any.
///
/// The filter variable must be numeric. If there is a filter variable,
/// then data analysis excludes cases whose filter value is zero or system-
/// or user-missing.
- pub filter: Option<DictIndex>,
+ filter: Option<DictIndex>,
/// An optional limit on the number of cases read by procedures.
pub case_limit: Option<u64>,
pub attributes: Attributes,
/// Multiple response sets.
- pub mrsets: BTreeSet<ByIdentifier<MultipleResponseSet>>,
+ pub mrsets: BTreeSet<ByIdentifier<DictIndexMultipleResponseSet>>,
/// Variable sets.
///
/// Only the GUI makes use of variable sets.
- pub variable_sets: Vec<VariableSet>,
+ pub variable_sets: Vec<DictIndexVariableSet>,
/// Character encoding for the dictionary and the data.
pub encoding: &'static Encoding,
map.serialize_field("documents", &self.documents)?;
map.serialize_field("vectors", &self.vectors())?;
map.serialize_field("attributes", &self.attributes)?;
- map.serialize_field("mrsets", &self.mrsets)?;
- //variable sets
+ map.serialize_field("mrsets", &self.mrsets())?;
+ map.serialize_field("variable_sets", &self.variable_sets())?;
map.serialize_field("encoding", self.encoding)?;
map.end()
}
},
}
+/// Weight variable must be numeric.
+#[derive(Debug, ThisError)]
+#[error("Weight variable must be numeric.")]
+pub struct InvalidWeightVariable;
+
+/// Filter variable must be numeric.
+#[derive(Debug, ThisError)]
+#[error("Filter variable must be numeric.")]
+pub struct InvalidFilterVariable;
+
impl Dictionary {
/// Creates a new, empty dictionary with the specified `encoding`.
pub fn new(encoding: &'static Encoding) -> Self {
self.weight.map(|index| &self.variables[index].0)
}
+ /// Returns the weight variable's dictionary index.
+ pub fn weight_index(&self) -> Option<DictIndex> {
+ self.weight
+ }
+
+ /// Sets the weight variable to the variable with the given dictionary
+ /// index.
+ ///
+ /// # Panic
+ ///
+ /// Panics if `dict_index` is not a valid dictionary index.
+ pub fn set_weight(
+ &mut self,
+ dict_index: Option<DictIndex>,
+ ) -> Result<(), InvalidWeightVariable> {
+ if let Some(dict_index) = dict_index
+ && !self.variables[dict_index].width.is_numeric()
+ {
+ Err(InvalidWeightVariable)
+ } else {
+ self.weight = dict_index;
+ Ok(())
+ }
+ }
+
/// Returns a reference to the filter variable, if any.
pub fn filter_var(&self) -> Option<&Variable> {
self.filter.map(|index| &self.variables[index].0)
}
- /// Returns references to all the split variables, if any.
- pub fn split_vars(&self) -> Vec<&Variable> {
- self.split_file
- .iter()
- .map(|index| &self.variables[*index].0)
- .collect()
+ /// Returns the filter variable's dictionary index.
+ pub fn filter_index(&self) -> Option<DictIndex> {
+ self.filter
+ }
+
+ /// Sets the filter variable to the variable with the given dictionary
+ /// index.
+ ///
+ /// # Panic
+ ///
+ /// Panics if `dict_index` is not a valid dictionary index.
+ pub fn set_filter(
+ &mut self,
+ dict_index: Option<DictIndex>,
+ ) -> Result<(), InvalidFilterVariable> {
+ if let Some(dict_index) = dict_index
+ && !self.variables[dict_index].width.is_numeric()
+ {
+ Err(InvalidFilterVariable)
+ } else {
+ self.filter = dict_index;
+ Ok(())
+ }
+ }
+
+ /// Returns the split variables.
+ pub fn split_vars(&self) -> MappedVariables<'_> {
+ MappedVariables::new(self, &self.split_file)
}
pub fn vectors(&self) -> Vectors<'_> {
Vectors::new(self)
}
+ pub fn mrsets(&self) -> MultipleResponseSets<'_> {
+ MultipleResponseSets::new(self)
+ }
+
+ pub fn variable_sets(&self) -> VariableSets<'_> {
+ VariableSets::new(self)
+ }
+
/// Adds `variable` at the end of the dictionary and returns its index.
///
/// The operation fails if the dictionary already contains a variable with
}
}
+pub struct VariableSet<'a> {
+ dictionary: &'a Dictionary,
+ variable_set: &'a DictIndexVariableSet,
+}
+
+impl<'a> VariableSet<'a> {
+ pub fn name(&self) -> &'a String {
+ &self.variable_set.name
+ }
+ pub fn variables(&self) -> MappedVariables<'a> {
+ MappedVariables::new(self.dictionary, &self.variable_set.variables)
+ }
+}
+
+#[derive(Debug)]
+pub struct VariableSets<'a>(&'a Dictionary);
+
+impl<'a> VariableSets<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self(dictionary)
+ }
+ pub fn len(&self) -> usize {
+ self.0.variable_sets.len()
+ }
+ pub fn get(&self, index: usize) -> Option<VariableSet<'a>> {
+ self.0
+ .variable_sets
+ .get(index)
+ .map(|variable_set| VariableSet {
+ dictionary: self.0,
+ variable_set: &*variable_set,
+ })
+ }
+ pub fn iter(&self) -> VariableSetsIter<'a> {
+ VariableSetsIter::new(self.0)
+ }
+}
+
+impl<'a> IntoIterator for &VariableSets<'a> {
+ type Item = VariableSet<'a>;
+
+ type IntoIter = VariableSetsIter<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter()
+ }
+}
+
+impl<'a> Serialize for VariableSets<'a> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ let mut map = serializer.serialize_map(Some(self.len()))?;
+ for variable_set in self {
+ map.serialize_key(variable_set.name())?;
+ map.serialize_value(&variable_set.variables())?;
+ }
+ map.end()
+ }
+}
+
+pub struct VariableSetsIter<'a> {
+ dictionary: &'a Dictionary,
+ iter: std::slice::Iter<'a, DictIndexVariableSet>,
+}
+
+impl<'a> VariableSetsIter<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self {
+ dictionary,
+ iter: dictionary.variable_sets.iter(),
+ }
+ }
+}
+impl<'a> Iterator for VariableSetsIter<'a> {
+ type Item = VariableSet<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter.next().map(|variable_set| VariableSet {
+ dictionary: self.dictionary,
+ variable_set,
+ })
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSets<'a>(&'a Dictionary);
+
+impl<'a> MultipleResponseSets<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self(dictionary)
+ }
+
+ pub fn len(&self) -> usize {
+ self.0.mrsets.len()
+ }
+
+ pub fn get(&self, name: &Identifier) -> Option<MultipleResponseSet<'a>> {
+ self.0
+ .mrsets
+ .get(&name.0)
+ .map(|mrset| MultipleResponseSet::new(self.0, mrset))
+ }
+
+ pub fn iter(&self) -> MultipleResponseSetIter<'a> {
+ MultipleResponseSetIter::new(self.0)
+ }
+}
+
+impl<'a> IntoIterator for &MultipleResponseSets<'a> {
+ type Item = MultipleResponseSet<'a>;
+
+ type IntoIter = MultipleResponseSetIter<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter()
+ }
+}
+
+pub struct MultipleResponseSetIter<'a> {
+ dictionary: &'a Dictionary,
+ iter: btree_set::Iter<'a, ByIdentifier<DictIndexMultipleResponseSet>>,
+}
+
+impl<'a> MultipleResponseSetIter<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self {
+ dictionary,
+ iter: dictionary.mrsets.iter(),
+ }
+ }
+}
+
+impl<'a> Iterator for MultipleResponseSetIter<'a> {
+ type Item = MultipleResponseSet<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter
+ .next()
+ .map(|set| MultipleResponseSet::new(self.dictionary, set))
+ }
+}
+
+impl<'a> Serialize for MultipleResponseSets<'a> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ let mut seq = serializer.serialize_seq(Some(self.len()))?;
+ for set in self {
+ seq.serialize_element(&set)?;
+ }
+ seq.end()
+ }
+}
+
+/// Variables that represent multiple responses to a survey question.
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet<'a> {
+ dictionary: &'a Dictionary,
+ mrset: &'a DictIndexMultipleResponseSet,
+}
+
+impl<'a> MultipleResponseSet<'a> {
+ fn new(dictionary: &'a Dictionary, mrset: &'a DictIndexMultipleResponseSet) -> Self {
+ Self { dictionary, mrset }
+ }
+
+ pub fn name(&self) -> &Identifier {
+ &self.mrset.name
+ }
+
+ pub fn label(&self) -> &String {
+ &self.mrset.label
+ }
+
+ pub fn width(&self) -> RangeInclusive<VarWidth> {
+ self.mrset.width.clone()
+ }
+
+ pub fn mr_type(&self) -> &MultipleResponseType {
+ &self.mrset.mr_type
+ }
+
+ pub fn variables(&self) -> MappedVariables<'a> {
+ MappedVariables::new(self.dictionary, &self.mrset.variables)
+ }
+}
+
+impl<'a> Serialize for MultipleResponseSet<'a> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ let mut map = serializer.serialize_map(Some(5))?;
+ map.serialize_entry("name", self.name())?;
+ map.serialize_entry("label", self.label())?;
+ map.serialize_entry("width", &self.width())?;
+ map.serialize_entry("type", self.mr_type())?;
+ map.serialize_entry("variables", &self.variables())?;
+ map.end()
+ }
+}
+
/// Variables that represent multiple responses to a survey question.
#[derive(Clone, Debug, Serialize)]
-pub struct MultipleResponseSet {
+pub struct DictIndexMultipleResponseSet {
/// The set's name.
pub name: Identifier,
pub variables: Vec<DictIndex>,
}
-impl MultipleResponseSet {
+impl DictIndexMultipleResponseSet {
fn with_updated_dict_indexes(
mut self,
f: impl Fn(DictIndex) -> Option<DictIndex>,
}
}
-impl HasIdentifier for MultipleResponseSet {
+impl HasIdentifier for DictIndexMultipleResponseSet {
fn identifier(&self) -> &UniCase<String> {
&self.name.0
}
}
#[derive(Clone, Debug)]
-pub struct VariableSet {
+pub struct DictIndexVariableSet {
pub name: String,
pub variables: Vec<DictIndex>,
}
-impl VariableSet {
+impl DictIndexVariableSet {
fn with_updated_dict_indexes(
mut self,
f: impl Fn(DictIndex) -> Option<DictIndex>,
crypto::EncryptedFile,
data::{Datum, RawString},
dictionary::{
- Dictionary, InvalidRole, MissingValues, MissingValuesError, MultipleResponseSet,
- MultipleResponseType, VarWidth, Variable, VariableSet,
+ DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues,
+ MissingValuesError, MultipleResponseType, VarWidth, Variable,
},
endian::Endian,
format::{Error as FormatError, Format, UncheckedFormat},
});
} else {
let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
- let variable = &dictionary.variables[*dict_index];
if *var_index == index {
- if variable.is_numeric() {
- dictionary.weight = Some(*dict_index);
- } else {
+ if dictionary.set_weight(Some(*dict_index)).is_err() {
warn(Error::InvalidWeightVar {
index: weight_index,
- name: variable.name.clone(),
+ name: dictionary.variables[*dict_index].name.clone(),
});
}
} else {
warn(Error::WeightIndexStringContinuation {
index: weight_index,
- name: variable.name.clone(),
+ name: dictionary.variables[*dict_index].name.clone(),
});
}
}
.iter()
.flat_map(|record| record.sets.iter())
{
- match MultipleResponseSet::decode(&dictionary, record, &mut warn) {
+ match DictIndexMultipleResponseSet::decode(&dictionary, record, &mut warn) {
Ok(mrset) => {
dictionary.mrsets.insert(ByIdentifier::new(mrset));
}
};
variables.push(dict_index);
}
- let variable_set = VariableSet {
+ let variable_set = DictIndexVariableSet {
name: record.name,
variables,
};
}
}
-impl MultipleResponseSet {
+impl DictIndexMultipleResponseSet {
fn decode(
dictionary: &Dictionary,
input: &raw::records::MultipleResponseSet<Identifier, String>,
let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width)?;
- Ok(MultipleResponseSet {
+ Ok(DictIndexMultipleResponseSet {
name: mr_set_name,
width: min_width..=max_width,
label: input.label.to_string(),