continue crypto work
[pspp] / rust / src / dictionary.rs
diff --git a/rust/src/dictionary.rs b/rust/src/dictionary.rs
deleted file mode 100644 (file)
index e9eca11..0000000
+++ /dev/null
@@ -1,417 +0,0 @@
-use std::{
-    collections::{HashMap, HashSet},
-    fmt::Debug,
-    ops::{Bound, RangeBounds},
-};
-
-use encoding_rs::Encoding;
-use indexmap::IndexSet;
-
-use crate::{
-    cooked::{Value, VarWidth},
-    format::Spec,
-    identifier::{ByIdentifier, HasIdentifier, Identifier},
-    raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType},
-};
-
-pub type DictIndex = usize;
-
-#[derive(Clone, Debug)]
-pub struct Dictionary {
-    pub variables: IndexSet<ByIdentifier<Variable>>,
-    pub split_file: Vec<DictIndex>,
-    pub weight: Option<DictIndex>,
-    pub filter: Option<DictIndex>,
-    pub case_limit: Option<u64>,
-    pub file_label: Option<String>,
-    pub documents: Vec<String>,
-    pub vectors: HashSet<ByIdentifier<Vector>>,
-    pub attributes: HashSet<ByIdentifier<Attribute>>,
-    pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
-    pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
-    pub encoding: &'static Encoding,
-}
-
-impl Dictionary {
-    pub fn new(encoding: &'static Encoding) -> Self {
-        Self {
-            variables: IndexSet::new(),
-            split_file: Vec::new(),
-            weight: None,
-            filter: None,
-            case_limit: None,
-            file_label: None,
-            documents: Vec::new(),
-            vectors: HashSet::new(),
-            attributes: HashSet::new(),
-            mrsets: HashSet::new(),
-            variable_sets: HashSet::new(),
-            encoding,
-        }
-    }
-
-    pub fn add_var(&mut self, variable: Variable) -> Result<(), ()> {
-        if self.variables.insert(ByIdentifier::new(variable)) {
-            Ok(())
-        } else {
-            Err(())
-        }
-    }
-
-    pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) {
-        if from_index != to_index {
-            self.variables.move_index(from_index, to_index);
-            self.update_dict_indexes(&|index| {
-                if index == from_index {
-                    Some(to_index)
-                } else if from_index < to_index {
-                    if index > from_index && index <= to_index {
-                        Some(index - 1)
-                    } else {
-                        Some(index)
-                    }
-                } else {
-                    if index >= to_index && index < from_index {
-                        Some(index + 1)
-                    } else {
-                        Some(index)
-                    }
-                }
-            })
-        }
-    }
-
-    pub fn retain_vars<F>(&mut self, keep: F)
-    where
-        F: Fn(&Variable) -> bool,
-    {
-        let mut deleted = Vec::new();
-        let mut index = 0;
-        self.variables.retain(|var_by_id| {
-            let keep = keep(&var_by_id.0);
-            if !keep {
-                deleted.push(index);
-            }
-            index += 1;
-            keep
-        });
-        if !deleted.is_empty() {
-            self.update_dict_indexes(&|index| match deleted.binary_search(&index) {
-                Ok(_) => None,
-                Err(position) => Some(position),
-            })
-        }
-    }
-
-    pub fn delete_vars<R>(&mut self, range: R)
-    where
-        R: RangeBounds<DictIndex>,
-    {
-        let start = match range.start_bound() {
-            Bound::Included(&start) => start,
-            Bound::Excluded(&start) => start + 1,
-            Bound::Unbounded => 0,
-        };
-        let end = match range.end_bound() {
-            Bound::Included(&end) => end + 1,
-            Bound::Excluded(&end) => end,
-            Bound::Unbounded => self.variables.len(),
-        };
-        if end > start {
-            self.variables.drain(start..end);
-            self.update_dict_indexes(&|index| {
-                if index < start {
-                    Some(index)
-                } else if index < end {
-                    None
-                } else {
-                    Some(index - end - start)
-                }
-            })
-        }
-    }
-
-    fn update_dict_indexes<F>(&mut self, f: &F)
-    where
-        F: Fn(DictIndex) -> Option<DictIndex>,
-    {
-        update_dict_index_vec(&mut self.split_file, f);
-        self.weight = self.weight.map(|index| f(index)).flatten();
-        self.filter = self.filter.map(|index| f(index)).flatten();
-        self.vectors = self
-            .vectors
-            .drain()
-            .filter_map(|vector_by_id| {
-                vector_by_id
-                    .0
-                    .with_updated_dict_indexes(f)
-                    .map(|vector| ByIdentifier::new(vector))
-            })
-            .collect();
-        self.mrsets = self
-            .mrsets
-            .drain()
-            .filter_map(|mrset_by_id| {
-                mrset_by_id
-                    .0
-                    .with_updated_dict_indexes(f)
-                    .map(|mrset| ByIdentifier::new(mrset))
-            })
-            .collect();
-        self.variable_sets = self
-            .variable_sets
-            .drain()
-            .filter_map(|var_set_by_id| {
-                var_set_by_id
-                    .0
-                    .with_updated_dict_indexes(f)
-                    .map(|var_set| ByIdentifier::new(var_set))
-            })
-            .collect();
-    }
-}
-
-fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
-where
-    F: Fn(DictIndex) -> Option<DictIndex>,
-{
-    dict_indexes.retain_mut(|index| {
-        if let Some(new) = f(*index) {
-            *index = new;
-            true
-        } else {
-            false
-        }
-    });
-}
-
-#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
-pub enum Role {
-    Input,
-    Target,
-    Both,
-    None,
-    Partition,
-    Split,
-}
-
-impl Default for Role {
-    fn default() -> Self {
-        Self::Input
-    }
-}
-
-pub enum DictClass {
-    Ordinary,
-    System,
-    Scratch,
-}
-
-impl DictClass {
-    pub fn from_identifier(id: &Identifier) -> Self {
-        if id.0.starts_with('$') {
-            Self::System
-        } else if id.0.starts_with('#') {
-            Self::Scratch
-        } else {
-            Self::Ordinary
-        }
-    }
-
-    pub fn must_leave(self) -> bool {
-        match self {
-            DictClass::Ordinary => false,
-            DictClass::System => false,
-            DictClass::Scratch => true,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Variable {
-    pub name: Identifier,
-    pub width: VarWidth,
-    pub missing_values: MissingValues,
-    pub print_format: Spec,
-    pub write_format: Spec,
-    pub value_labels: HashMap<Value, String>,
-    pub label: Option<String>,
-    pub measure: Option<Measure>,
-    pub role: Role,
-    pub display_width: u32,
-    pub alignment: Alignment,
-    pub leave: bool,
-    pub short_names: Vec<Identifier>,
-    pub attributes: HashSet<ByIdentifier<Attribute>>,
-}
-
-impl Variable {
-    pub fn new(name: Identifier, width: VarWidth) -> Self {
-        let var_type = VarType::from_width(width);
-        let leave = DictClass::from_identifier(&name).must_leave();
-        Self {
-            name,
-            width,
-            missing_values: MissingValues::default(),
-            print_format: Spec::default_for_width(width),
-            write_format: Spec::default_for_width(width),
-            value_labels: HashMap::new(),
-            label: None,
-            measure: Measure::default_for_type(var_type),
-            role: Role::default(),
-            display_width: width.default_display_width(),
-            alignment: Alignment::default_for_type(var_type),
-            leave,
-            short_names: Vec::new(),
-            attributes: HashSet::new()
-        }
-    }
-}
-
-impl HasIdentifier for Variable {
-    fn identifier(&self) -> &Identifier {
-        &self.name
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Vector {
-    pub name: Identifier,
-    pub variables: Vec<DictIndex>,
-}
-
-impl Vector {
-    fn with_updated_dict_indexes(
-        mut self,
-        f: impl Fn(DictIndex) -> Option<DictIndex>,
-    ) -> Option<Self> {
-        update_dict_index_vec(&mut self.variables, f);
-        (!self.variables.is_empty()).then_some(self)
-    }
-}
-
-impl HasIdentifier for Vector {
-    fn identifier(&self) -> &Identifier {
-        &self.name
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Attribute {
-    pub name: Identifier,
-    pub values: Vec<String>,
-}
-
-impl HasIdentifier for Attribute {
-    fn identifier(&self) -> &Identifier {
-        &self.name
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct MultipleResponseSet {
-    pub name: Identifier,
-    pub label: String,
-    pub mr_type: MultipleResponseType,
-    pub variables: Vec<DictIndex>,
-}
-
-impl MultipleResponseSet {
-    fn with_updated_dict_indexes(
-        mut self,
-        f: impl Fn(DictIndex) -> Option<DictIndex>,
-    ) -> Option<Self> {
-        update_dict_index_vec(&mut self.variables, f);
-        (self.variables.len() > 1).then_some(self)
-    }
-}
-
-impl HasIdentifier for MultipleResponseSet {
-    fn identifier(&self) -> &Identifier {
-        &self.name
-    }
-}
-
-#[derive(Clone, Debug)]
-pub enum MultipleResponseType {
-    MultipleDichotomy {
-        value: Value,
-        labels: CategoryLabels,
-    },
-    MultipleCategory,
-}
-
-#[derive(Clone, Debug)]
-pub struct VariableSet {
-    pub name: Identifier,
-    pub variables: Vec<DictIndex>,
-}
-
-impl VariableSet {
-    fn with_updated_dict_indexes(
-        mut self,
-        f: impl Fn(DictIndex) -> Option<DictIndex>,
-    ) -> Option<Self> {
-        update_dict_index_vec(&mut self.variables, f);
-        (!self.variables.is_empty()).then_some(self)
-    }
-}
-
-impl HasIdentifier for VariableSet {
-    fn identifier(&self) -> &Identifier {
-        &self.name
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use std::collections::HashSet;
-
-    use crate::identifier::Identifier;
-
-    use super::{ByIdentifier, HasIdentifier};
-
-    #[derive(PartialEq, Eq, Debug, Clone)]
-    struct Variable {
-        name: Identifier,
-        value: i32,
-    }
-
-    impl HasIdentifier for Variable {
-        fn identifier(&self) -> &Identifier {
-            &self.name
-        }
-    }
-
-    #[test]
-    fn test() {
-        // Variables should not be the same if their values differ.
-        let abcd = Identifier::new_utf8("abcd").unwrap();
-        let abcd1 = Variable {
-            name: abcd.clone(),
-            value: 1,
-        };
-        let abcd2 = Variable {
-            name: abcd,
-            value: 2,
-        };
-        assert_ne!(abcd1, abcd2);
-
-        // But `ByName` should treat them the same.
-        let abcd1_by_name = ByIdentifier::new(abcd1);
-        let abcd2_by_name = ByIdentifier::new(abcd2);
-        assert_eq!(abcd1_by_name, abcd2_by_name);
-
-        // And a `HashSet` of `ByName` should also treat them the same.
-        let mut vars: HashSet<ByIdentifier<Variable>> = HashSet::new();
-        assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
-        assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
-        assert_eq!(
-            vars.get(&Identifier::new_utf8("abcd").unwrap())
-                .unwrap()
-                .0
-                .value,
-            1
-        );
-    }
-}