+use std::{
+ collections::{HashMap, HashSet},
+ fmt::Debug,
+};
+
+use encoding_rs::Encoding;
+use indexmap::IndexSet;
+
+use crate::{
+ cooked::{Alignment, Measure, MissingValues, Value, VarWidth},
+ format::Format,
+ identifier::{ByIdentifier, HasIdentifier, Identifier},
+ raw::CategoryLabels,
+};
+
+pub type DictIndex = usize;
+
+#[derive(Clone, Debug)]
+pub struct Dictionary {
+ pub variables: IndexSet<ByIdentifier<Variable>>,
+ pub split_file: Vec<DictIndex>,
+ pub weight: Option<DictIndex>,
+ pub filter: Option<DictIndex>,
+ pub case_limit: Option<u64>,
+ pub file_label: Option<String>,
+ pub documents: Vec<String>,
+ pub vectors: HashSet<ByIdentifier<Vector>>,
+ pub attributes: HashSet<ByIdentifier<Attribute>>,
+ pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
+ pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
+ pub encoding: &'static Encoding,
+}
+
+impl Dictionary {
+ pub fn new(encoding: &'static Encoding) -> Self {
+ Self {
+ variables: IndexSet::new(),
+ split_file: Vec::new(),
+ weight: None,
+ filter: None,
+ case_limit: None,
+ file_label: None,
+ documents: Vec::new(),
+ vectors: HashSet::new(),
+ attributes: HashSet::new(),
+ mrsets: HashSet::new(),
+ variable_sets: HashSet::new(),
+ encoding,
+ }
+ }
+
+ pub fn delete_vars(&mut self, start: DictIndex, count: usize) {
+ self.update_dict_indexes(&|index| {
+ if index < start {
+ Some(index)
+ } else if index < start + count {
+ None
+ } else {
+ Some(index - count)
+ }
+ })
+ }
+
+ fn update_dict_indexes<F>(&mut self, f: &F)
+ where
+ F: Fn(DictIndex) -> Option<DictIndex>,
+ {
+ update_dict_index_vec(&mut self.split_file, f);
+ self.weight = self.weight.map(|index| f(index)).flatten();
+ self.filter = self.filter.map(|index| f(index)).flatten();
+ self.vectors = self
+ .vectors
+ .drain()
+ .filter_map(|vector_by_id| {
+ vector_by_id
+ .0
+ .with_updated_dict_indexes(f)
+ .map(|vector| ByIdentifier::new(vector))
+ })
+ .collect();
+ self.mrsets = self
+ .mrsets
+ .drain()
+ .filter_map(|mrset_by_id| {
+ mrset_by_id
+ .0
+ .with_updated_dict_indexes(f)
+ .map(|mrset| ByIdentifier::new(mrset))
+ })
+ .collect();
+ self.variable_sets = self
+ .variable_sets
+ .drain()
+ .filter_map(|var_set_by_id| {
+ var_set_by_id
+ .0
+ .with_updated_dict_indexes(f)
+ .map(|var_set| ByIdentifier::new(var_set))
+ })
+ .collect();
+ }
+}
+
+fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
+where
+ F: Fn(DictIndex) -> Option<DictIndex>,
+{
+ dict_indexes.retain_mut(|index| {
+ if let Some(new) = f(*index) {
+ *index = new;
+ true
+ } else {
+ false
+ }
+ });
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+pub enum Role {
+ Input,
+ Target,
+ Both,
+ None,
+ Partition,
+ Split,
+}
+
+#[derive(Clone, Debug)]
+pub struct Variable {
+ pub name: Identifier,
+ pub width: VarWidth,
+ pub missing_values: MissingValues,
+ pub print_format: Format,
+ pub write_format: Format,
+ pub value_labels: HashMap<Value, String>,
+ pub label: Option<String>,
+ pub measure: Measure,
+ pub role: Role,
+ pub display_width: u32,
+ pub alignment: Alignment,
+ pub leave: bool,
+ pub short_names: Vec<Identifier>,
+ pub attributes: HashSet<ByIdentifier<Attribute>>,
+}
+
+impl HasIdentifier for Variable {
+ fn identifier(&self) -> &Identifier {
+ &self.name
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct Vector {
+ pub name: Identifier,
+ pub variables: Vec<DictIndex>,
+}
+
+impl Vector {
+ fn with_updated_dict_indexes(
+ mut self,
+ f: impl Fn(DictIndex) -> Option<DictIndex>,
+ ) -> Option<Self> {
+ update_dict_index_vec(&mut self.variables, f);
+ (!self.variables.is_empty()).then_some(self)
+ }
+}
+
+impl HasIdentifier for Vector {
+ fn identifier(&self) -> &Identifier {
+ &self.name
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct Attribute {
+ pub name: Identifier,
+ pub values: Vec<String>,
+}
+
+impl HasIdentifier for Attribute {
+ fn identifier(&self) -> &Identifier {
+ &self.name
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet {
+ pub name: Identifier,
+ pub label: String,
+ pub mr_type: MultipleResponseType,
+ pub variables: Vec<DictIndex>,
+}
+
+impl MultipleResponseSet {
+ fn with_updated_dict_indexes(
+ mut self,
+ f: impl Fn(DictIndex) -> Option<DictIndex>,
+ ) -> Option<Self> {
+ update_dict_index_vec(&mut self.variables, f);
+ (self.variables.len() > 1).then_some(self)
+ }
+}
+
+impl HasIdentifier for MultipleResponseSet {
+ fn identifier(&self) -> &Identifier {
+ &self.name
+ }
+}
+
+#[derive(Clone, Debug)]
+pub enum MultipleResponseType {
+ MultipleDichotomy {
+ value: Value,
+ labels: CategoryLabels,
+ },
+ MultipleCategory,
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableSet {
+ pub name: Identifier,
+ pub variables: Vec<DictIndex>,
+}
+
+impl VariableSet {
+ fn with_updated_dict_indexes(
+ mut self,
+ f: impl Fn(DictIndex) -> Option<DictIndex>,
+ ) -> Option<Self> {
+ update_dict_index_vec(&mut self.variables, f);
+ (!self.variables.is_empty()).then_some(self)
+ }
+}
+
+impl HasIdentifier for VariableSet {
+ fn identifier(&self) -> &Identifier {
+ &self.name
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use std::collections::HashSet;
+
+ use crate::identifier::Identifier;
+
+ use super::{ByIdentifier, HasIdentifier};
+
+ #[derive(PartialEq, Eq, Debug, Clone)]
+ struct Variable {
+ name: Identifier,
+ value: i32,
+ }
+
+ impl HasIdentifier for Variable {
+ fn identifier(&self) -> &Identifier {
+ &self.name
+ }
+ }
+
+ #[test]
+ fn test() {
+ // Variables should not be the same if their values differ.
+ let abcd = Identifier::new_utf8("abcd").unwrap();
+ let abcd1 = Variable {
+ name: abcd.clone(),
+ value: 1,
+ };
+ let abcd2 = Variable {
+ name: abcd,
+ value: 2,
+ };
+ assert_ne!(abcd1, abcd2);
+
+ // But `ByName` should treat them the same.
+ let abcd1_by_name = ByIdentifier::new(abcd1);
+ let abcd2_by_name = ByIdentifier::new(abcd2);
+ assert_eq!(abcd1_by_name, abcd2_by_name);
+
+ // And a `HashSet` of `ByName` should also treat them the same.
+ let mut vars: HashSet<ByIdentifier<Variable>> = HashSet::new();
+ assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
+ assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
+ assert_eq!(
+ vars.get(&Identifier::new_utf8("abcd").unwrap())
+ .unwrap()
+ .0
+ .value,
+ 1
+ );
+ }
+}