2 collections::{HashMap, HashSet},
4 ops::{Bound, RangeBounds},
7 use encoding_rs::Encoding;
8 use indexmap::IndexSet;
11 cooked::{Value, VarWidth},
13 identifier::{ByIdentifier, HasIdentifier, Identifier},
14 raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType},
17 pub type DictIndex = usize;
19 #[derive(Clone, Debug)]
20 pub struct Dictionary {
21 pub variables: IndexSet<ByIdentifier<Variable>>,
22 pub split_file: Vec<DictIndex>,
23 pub weight: Option<DictIndex>,
24 pub filter: Option<DictIndex>,
25 pub case_limit: Option<u64>,
26 pub file_label: Option<String>,
27 pub documents: Vec<String>,
28 pub vectors: HashSet<ByIdentifier<Vector>>,
29 pub attributes: HashMap<Identifier, Vec<String>>,
30 pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
31 pub variable_sets: HashSet<ByIdentifier<VariableSet>>,
32 pub encoding: &'static Encoding,
36 pub fn new(encoding: &'static Encoding) -> Self {
38 variables: IndexSet::new(),
39 split_file: Vec::new(),
44 documents: Vec::new(),
45 vectors: HashSet::new(),
46 attributes: HashMap::new(),
47 mrsets: HashSet::new(),
48 variable_sets: HashSet::new(),
53 pub fn add_var(&mut self, variable: Variable) -> Result<(), ()> {
54 if self.variables.insert(ByIdentifier::new(variable)) {
61 pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) {
62 if from_index != to_index {
63 self.variables.move_index(from_index, to_index);
64 self.update_dict_indexes(&|index| {
65 if index == from_index {
67 } else if from_index < to_index {
68 if index > from_index && index <= to_index {
74 if index >= to_index && index < from_index {
84 pub fn retain_vars<F>(&mut self, keep: F)
86 F: Fn(&Variable) -> bool,
88 let mut deleted = Vec::new();
90 self.variables.retain(|var_by_id| {
91 let keep = keep(&var_by_id.0);
98 if !deleted.is_empty() {
99 self.update_dict_indexes(&|index| match deleted.binary_search(&index) {
101 Err(position) => Some(position),
106 pub fn delete_vars<R>(&mut self, range: R)
108 R: RangeBounds<DictIndex>,
110 let start = match range.start_bound() {
111 Bound::Included(&start) => start,
112 Bound::Excluded(&start) => start + 1,
113 Bound::Unbounded => 0,
115 let end = match range.end_bound() {
116 Bound::Included(&end) => end + 1,
117 Bound::Excluded(&end) => end,
118 Bound::Unbounded => self.variables.len(),
121 self.variables.drain(start..end);
122 self.update_dict_indexes(&|index| {
125 } else if index < end {
128 Some(index - end - start)
134 fn update_dict_indexes<F>(&mut self, f: &F)
136 F: Fn(DictIndex) -> Option<DictIndex>,
138 update_dict_index_vec(&mut self.split_file, f);
139 self.weight = self.weight.map(|index| f(index)).flatten();
140 self.filter = self.filter.map(|index| f(index)).flatten();
144 .filter_map(|vector_by_id| {
147 .with_updated_dict_indexes(f)
148 .map(|vector| ByIdentifier::new(vector))
154 .filter_map(|mrset_by_id| {
157 .with_updated_dict_indexes(f)
158 .map(|mrset| ByIdentifier::new(mrset))
161 self.variable_sets = self
164 .filter_map(|var_set_by_id| {
167 .with_updated_dict_indexes(f)
168 .map(|var_set| ByIdentifier::new(var_set))
174 fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
176 F: Fn(DictIndex) -> Option<DictIndex>,
178 dict_indexes.retain_mut(|index| {
179 if let Some(new) = f(*index) {
188 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
198 impl Default for Role {
199 fn default() -> Self {
211 pub fn from_identifier(id: &Identifier) -> Self {
212 if id.0.starts_with('$') {
214 } else if id.0.starts_with('#') {
221 pub fn must_leave(self) -> bool {
223 DictClass::Ordinary => false,
224 DictClass::System => false,
225 DictClass::Scratch => true,
230 #[derive(Clone, Debug)]
231 pub struct Variable {
232 pub name: Identifier,
234 pub missing_values: MissingValues,
235 pub print_format: Spec,
236 pub write_format: Spec,
237 pub value_labels: HashMap<Value, String>,
238 pub label: Option<String>,
239 pub measure: Option<Measure>,
241 pub display_width: u32,
242 pub alignment: Alignment,
244 pub short_names: Vec<Identifier>,
245 pub attributes: HashSet<ByIdentifier<Attribute>>,
249 pub fn new(name: Identifier, width: VarWidth) -> Self {
250 let var_type = VarType::from_width(width);
251 let leave = DictClass::from_identifier(&name).must_leave();
255 missing_values: MissingValues::default(),
256 print_format: Spec::default_for_width(width),
257 write_format: Spec::default_for_width(width),
258 value_labels: HashMap::new(),
260 measure: Measure::default_for_type(var_type),
261 role: Role::default(),
262 display_width: width.default_display_width(),
263 alignment: Alignment::default_for_type(var_type),
265 short_names: Vec::new(),
266 attributes: HashSet::new()
271 impl HasIdentifier for Variable {
272 fn identifier(&self) -> &Identifier {
277 #[derive(Clone, Debug)]
279 pub name: Identifier,
280 pub variables: Vec<DictIndex>,
284 fn with_updated_dict_indexes(
286 f: impl Fn(DictIndex) -> Option<DictIndex>,
288 update_dict_index_vec(&mut self.variables, f);
289 (!self.variables.is_empty()).then_some(self)
293 impl HasIdentifier for Vector {
294 fn identifier(&self) -> &Identifier {
299 #[derive(Clone, Debug)]
300 pub struct Attribute {
301 pub name: Identifier,
302 pub values: Vec<String>,
305 impl HasIdentifier for Attribute {
306 fn identifier(&self) -> &Identifier {
311 #[derive(Clone, Debug)]
312 pub struct MultipleResponseSet {
313 pub name: Identifier,
315 pub mr_type: MultipleResponseType,
316 pub variables: Vec<DictIndex>,
319 impl MultipleResponseSet {
320 fn with_updated_dict_indexes(
322 f: impl Fn(DictIndex) -> Option<DictIndex>,
324 update_dict_index_vec(&mut self.variables, f);
325 (self.variables.len() > 1).then_some(self)
329 impl HasIdentifier for MultipleResponseSet {
330 fn identifier(&self) -> &Identifier {
335 #[derive(Clone, Debug)]
336 pub enum MultipleResponseType {
339 labels: CategoryLabels,
344 #[derive(Clone, Debug)]
345 pub struct VariableSet {
346 pub name: Identifier,
347 pub variables: Vec<DictIndex>,
351 fn with_updated_dict_indexes(
353 f: impl Fn(DictIndex) -> Option<DictIndex>,
355 update_dict_index_vec(&mut self.variables, f);
356 (!self.variables.is_empty()).then_some(self)
360 impl HasIdentifier for VariableSet {
361 fn identifier(&self) -> &Identifier {
368 use std::collections::HashSet;
370 use crate::identifier::Identifier;
372 use super::{ByIdentifier, HasIdentifier};
374 #[derive(PartialEq, Eq, Debug, Clone)]
380 impl HasIdentifier for Variable {
381 fn identifier(&self) -> &Identifier {
388 // Variables should not be the same if their values differ.
389 let abcd = Identifier::new_utf8("abcd").unwrap();
390 let abcd1 = Variable {
394 let abcd2 = Variable {
398 assert_ne!(abcd1, abcd2);
400 // But `ByName` should treat them the same.
401 let abcd1_by_name = ByIdentifier::new(abcd1);
402 let abcd2_by_name = ByIdentifier::new(abcd2);
403 assert_eq!(abcd1_by_name, abcd2_by_name);
405 // And a `HashSet` of `ByName` should also treat them the same.
406 let mut vars: HashSet<ByIdentifier<Variable>> = HashSet::new();
407 assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
408 assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
410 vars.get(&Identifier::new_utf8("abcd").unwrap())