2 collections::{HashMap, HashSet},
6 use encoding_rs::Encoding;
7 use indexmap::IndexSet;
10 cooked::{Alignment, Measure, MissingValues, Value, VarWidth},
12 identifier::{ByIdentifier, HasIdentifier, Identifier},
16 pub type DictIndex = usize;
18 #[derive(Clone, Debug)]
19 pub struct Dictionary {
20 pub variables: IndexSet<ByIdentifier<Variable>>,
21 pub split_file: Vec<DictIndex>,
22 pub weight: Option<DictIndex>,
23 pub filter: Option<DictIndex>,
24 pub case_limit: Option<u64>,
25 pub file_label: Option<String>,
26 pub documents: Vec<String>,
27 pub vectors: HashSet<ByIdentifier<Vector>>,
28 pub attributes: HashSet<ByIdentifier<Attribute>>,
29 pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
30 pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
31 pub encoding: &'static Encoding,
35 pub fn new(encoding: &'static Encoding) -> Self {
37 variables: IndexSet::new(),
38 split_file: Vec::new(),
43 documents: Vec::new(),
44 vectors: HashSet::new(),
45 attributes: HashSet::new(),
46 mrsets: HashSet::new(),
47 variable_sets: HashSet::new(),
52 pub fn delete_vars(&mut self, start: DictIndex, count: usize) {
53 self.update_dict_indexes(&|index| {
56 } else if index < start + count {
64 fn update_dict_indexes<F>(&mut self, f: &F)
66 F: Fn(DictIndex) -> Option<DictIndex>,
68 update_dict_index_vec(&mut self.split_file, f);
69 self.weight = self.weight.map(|index| f(index)).flatten();
70 self.filter = self.filter.map(|index| f(index)).flatten();
74 .filter_map(|vector_by_id| {
77 .with_updated_dict_indexes(f)
78 .map(|vector| ByIdentifier::new(vector))
84 .filter_map(|mrset_by_id| {
87 .with_updated_dict_indexes(f)
88 .map(|mrset| ByIdentifier::new(mrset))
91 self.variable_sets = self
94 .filter_map(|var_set_by_id| {
97 .with_updated_dict_indexes(f)
98 .map(|var_set| ByIdentifier::new(var_set))
104 fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
106 F: Fn(DictIndex) -> Option<DictIndex>,
108 dict_indexes.retain_mut(|index| {
109 if let Some(new) = f(*index) {
118 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
128 #[derive(Clone, Debug)]
129 pub struct Variable {
130 pub name: Identifier,
132 pub missing_values: MissingValues,
133 pub print_format: Format,
134 pub write_format: Format,
135 pub value_labels: HashMap<Value, String>,
136 pub label: Option<String>,
137 pub measure: Measure,
139 pub display_width: u32,
140 pub alignment: Alignment,
142 pub short_names: Vec<Identifier>,
143 pub attributes: HashSet<ByIdentifier<Attribute>>,
146 impl HasIdentifier for Variable {
147 fn identifier(&self) -> &Identifier {
152 #[derive(Clone, Debug)]
154 pub name: Identifier,
155 pub variables: Vec<DictIndex>,
159 fn with_updated_dict_indexes(
161 f: impl Fn(DictIndex) -> Option<DictIndex>,
163 update_dict_index_vec(&mut self.variables, f);
164 (!self.variables.is_empty()).then_some(self)
168 impl HasIdentifier for Vector {
169 fn identifier(&self) -> &Identifier {
174 #[derive(Clone, Debug)]
175 pub struct Attribute {
176 pub name: Identifier,
177 pub values: Vec<String>,
180 impl HasIdentifier for Attribute {
181 fn identifier(&self) -> &Identifier {
186 #[derive(Clone, Debug)]
187 pub struct MultipleResponseSet {
188 pub name: Identifier,
190 pub mr_type: MultipleResponseType,
191 pub variables: Vec<DictIndex>,
194 impl MultipleResponseSet {
195 fn with_updated_dict_indexes(
197 f: impl Fn(DictIndex) -> Option<DictIndex>,
199 update_dict_index_vec(&mut self.variables, f);
200 (self.variables.len() > 1).then_some(self)
204 impl HasIdentifier for MultipleResponseSet {
205 fn identifier(&self) -> &Identifier {
210 #[derive(Clone, Debug)]
211 pub enum MultipleResponseType {
214 labels: CategoryLabels,
219 #[derive(Clone, Debug)]
220 pub struct VariableSet {
221 pub name: Identifier,
222 pub variables: Vec<DictIndex>,
226 fn with_updated_dict_indexes(
228 f: impl Fn(DictIndex) -> Option<DictIndex>,
230 update_dict_index_vec(&mut self.variables, f);
231 (!self.variables.is_empty()).then_some(self)
235 impl HasIdentifier for VariableSet {
236 fn identifier(&self) -> &Identifier {
243 use std::collections::HashSet;
245 use crate::identifier::Identifier;
247 use super::{ByIdentifier, HasIdentifier};
249 #[derive(PartialEq, Eq, Debug, Clone)]
255 impl HasIdentifier for Variable {
256 fn identifier(&self) -> &Identifier {
263 // Variables should not be the same if their values differ.
264 let abcd = Identifier::new_utf8("abcd").unwrap();
265 let abcd1 = Variable {
269 let abcd2 = Variable {
273 assert_ne!(abcd1, abcd2);
275 // But `ByName` should treat them the same.
276 let abcd1_by_name = ByIdentifier::new(abcd1);
277 let abcd2_by_name = ByIdentifier::new(abcd2);
278 assert_eq!(abcd1_by_name, abcd2_by_name);
280 // And a `HashSet` of `ByName` should also treat them the same.
281 let mut vars: HashSet<ByIdentifier<Variable>> = HashSet::new();
282 assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
283 assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
285 vars.get(&Identifier::new_utf8("abcd").unwrap())