2 collections::{HashMap, HashSet},
4 ops::{Bound, RangeBounds},
7 use encoding_rs::Encoding;
8 use indexmap::IndexSet;
11 cooked::{Alignment, Measure, MissingValues, Value, VarWidth},
13 identifier::{ByIdentifier, HasIdentifier, Identifier},
17 pub type DictIndex = usize;
19 #[derive(Clone, Debug)]
20 pub struct Dictionary {
21 pub variables: IndexSet<ByIdentifier<Variable>>,
22 pub split_file: Vec<DictIndex>,
23 pub weight: Option<DictIndex>,
24 pub filter: Option<DictIndex>,
25 pub case_limit: Option<u64>,
26 pub file_label: Option<String>,
27 pub documents: Vec<String>,
28 pub vectors: HashSet<ByIdentifier<Vector>>,
29 pub attributes: HashSet<ByIdentifier<Attribute>>,
30 pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
31 pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
32 pub encoding: &'static Encoding,
36 pub fn new(encoding: &'static Encoding) -> Self {
38 variables: IndexSet::new(),
39 split_file: Vec::new(),
44 documents: Vec::new(),
45 vectors: HashSet::new(),
46 attributes: HashSet::new(),
47 mrsets: HashSet::new(),
48 variable_sets: HashSet::new(),
53 pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) {
54 if from_index != to_index {
55 self.variables.move_index(from_index, to_index);
56 self.update_dict_indexes(&|index| {
57 if index == from_index {
59 } else if from_index < to_index {
60 if index > from_index && index <= to_index {
66 if index >= to_index && index < from_index {
76 pub fn retain_vars<F>(&mut self, keep: F)
78 F: Fn(&Variable) -> bool,
80 let mut deleted = Vec::new();
82 self.variables.retain(|var_by_id| {
83 let keep = keep(&var_by_id.0);
90 if !deleted.is_empty() {
91 self.update_dict_indexes(&|index| match deleted.binary_search(&index) {
93 Err(position) => Some(position),
98 pub fn delete_vars<R>(&mut self, range: R)
100 R: RangeBounds<DictIndex>,
102 let start = match range.start_bound() {
103 Bound::Included(&start) => start,
104 Bound::Excluded(&start) => start + 1,
105 Bound::Unbounded => 0,
107 let end = match range.end_bound() {
108 Bound::Included(&end) => end + 1,
109 Bound::Excluded(&end) => end,
110 Bound::Unbounded => self.variables.len(),
113 self.variables.drain(start..end);
114 self.update_dict_indexes(&|index| {
117 } else if index < end {
120 Some(index - end - start)
126 fn update_dict_indexes<F>(&mut self, f: &F)
128 F: Fn(DictIndex) -> Option<DictIndex>,
130 update_dict_index_vec(&mut self.split_file, f);
131 self.weight = self.weight.map(|index| f(index)).flatten();
132 self.filter = self.filter.map(|index| f(index)).flatten();
136 .filter_map(|vector_by_id| {
139 .with_updated_dict_indexes(f)
140 .map(|vector| ByIdentifier::new(vector))
146 .filter_map(|mrset_by_id| {
149 .with_updated_dict_indexes(f)
150 .map(|mrset| ByIdentifier::new(mrset))
153 self.variable_sets = self
156 .filter_map(|var_set_by_id| {
159 .with_updated_dict_indexes(f)
160 .map(|var_set| ByIdentifier::new(var_set))
166 fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
168 F: Fn(DictIndex) -> Option<DictIndex>,
170 dict_indexes.retain_mut(|index| {
171 if let Some(new) = f(*index) {
180 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
190 #[derive(Clone, Debug)]
191 pub struct Variable {
192 pub name: Identifier,
194 pub missing_values: MissingValues,
195 pub print_format: Format,
196 pub write_format: Format,
197 pub value_labels: HashMap<Value, String>,
198 pub label: Option<String>,
199 pub measure: Measure,
201 pub display_width: u32,
202 pub alignment: Alignment,
204 pub short_names: Vec<Identifier>,
205 pub attributes: HashSet<ByIdentifier<Attribute>>,
208 impl HasIdentifier for Variable {
209 fn identifier(&self) -> &Identifier {
214 #[derive(Clone, Debug)]
216 pub name: Identifier,
217 pub variables: Vec<DictIndex>,
221 fn with_updated_dict_indexes(
223 f: impl Fn(DictIndex) -> Option<DictIndex>,
225 update_dict_index_vec(&mut self.variables, f);
226 (!self.variables.is_empty()).then_some(self)
230 impl HasIdentifier for Vector {
231 fn identifier(&self) -> &Identifier {
236 #[derive(Clone, Debug)]
237 pub struct Attribute {
238 pub name: Identifier,
239 pub values: Vec<String>,
242 impl HasIdentifier for Attribute {
243 fn identifier(&self) -> &Identifier {
248 #[derive(Clone, Debug)]
249 pub struct MultipleResponseSet {
250 pub name: Identifier,
252 pub mr_type: MultipleResponseType,
253 pub variables: Vec<DictIndex>,
256 impl MultipleResponseSet {
257 fn with_updated_dict_indexes(
259 f: impl Fn(DictIndex) -> Option<DictIndex>,
261 update_dict_index_vec(&mut self.variables, f);
262 (self.variables.len() > 1).then_some(self)
266 impl HasIdentifier for MultipleResponseSet {
267 fn identifier(&self) -> &Identifier {
272 #[derive(Clone, Debug)]
273 pub enum MultipleResponseType {
276 labels: CategoryLabels,
281 #[derive(Clone, Debug)]
282 pub struct VariableSet {
283 pub name: Identifier,
284 pub variables: Vec<DictIndex>,
288 fn with_updated_dict_indexes(
290 f: impl Fn(DictIndex) -> Option<DictIndex>,
292 update_dict_index_vec(&mut self.variables, f);
293 (!self.variables.is_empty()).then_some(self)
297 impl HasIdentifier for VariableSet {
298 fn identifier(&self) -> &Identifier {
305 use std::collections::HashSet;
307 use crate::identifier::Identifier;
309 use super::{ByIdentifier, HasIdentifier};
311 #[derive(PartialEq, Eq, Debug, Clone)]
317 impl HasIdentifier for Variable {
318 fn identifier(&self) -> &Identifier {
325 // Variables should not be the same if their values differ.
326 let abcd = Identifier::new_utf8("abcd").unwrap();
327 let abcd1 = Variable {
331 let abcd2 = Variable {
335 assert_ne!(abcd1, abcd2);
337 // But `ByName` should treat them the same.
338 let abcd1_by_name = ByIdentifier::new(abcd1);
339 let abcd2_by_name = ByIdentifier::new(abcd2);
340 assert_eq!(abcd1_by_name, abcd2_by_name);
342 // And a `HashSet` of `ByName` should also treat them the same.
343 let mut vars: HashSet<ByIdentifier<Variable>> = HashSet::new();
344 assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
345 assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
347 vars.get(&Identifier::new_utf8("abcd").unwrap())