encoding: &'static Encoding,
}
+impl PartialEq for Dictionary {
+ fn eq(&self, other: &Self) -> bool {
+ // We have to compare the dereferenced versions of fields that use
+ // [ByIdentifier. Otherwise we would just be comparing their names.
+ self.variables
+ .iter()
+ .map(|var| &*var)
+ .eq(other.variables.iter().map(|var| &*var))
+ && self.split_file == other.split_file
+ && self.weight == other.weight
+ && self.filter == other.filter
+ && self.case_limit == other.case_limit
+ && self.file_label == other.file_label
+ && self.documents == other.documents
+ && self
+ .vectors
+ .iter()
+ .map(|vector| &*vector)
+ .eq(other.vectors.iter().map(|vector| &*vector))
+ && self.attributes == other.attributes
+ && self
+ .mrsets
+ .iter()
+ .map(|mrset| &*mrset)
+ .eq(other.mrsets.iter().map(|mrset| &*mrset))
+ && self.variable_sets == other.variable_sets
+ && self.encoding == other.encoding
+ }
+}
+
impl Serialize for Dictionary {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
#[error("Filter variable must be numeric.")]
pub struct InvalidFilterVariable;
+/// Invalid dictionary index.
+#[derive(Debug, ThisError)]
+#[error("Invalid dictionary index.")]
+pub struct DictIndexError;
+
impl Dictionary {
/// Creates a new, empty dictionary with the specified `encoding`.
pub fn new(encoding: &'static Encoding) -> Self {
/// Returns the split variables.
pub fn split_vars(&self) -> MappedVariables<'_> {
- MappedVariables::new(self, &self.split_file)
+ MappedVariables::new_unchecked(self, &self.split_file)
}
pub fn vectors(&self) -> Vectors<'_> {
Vectors::new(self)
}
+ pub fn vectors_mut(&mut self) -> VectorsMut<'_> {
+ VectorsMut::new(self)
+ }
+
pub fn mrsets(&self) -> MultipleResponseSets<'_> {
MultipleResponseSets::new(self)
}
}
impl<'a> Vector<'a> {
+ fn new_unchecked(dictionary: &'a Dictionary, vector: &'a DictIndexVector) -> Self {
+ Self { dictionary, vector }
+ }
+ pub fn new(
+ dictionary: &'a Dictionary,
+ vector: &'a DictIndexVector,
+ ) -> Result<Self, DictIndexError> {
+ MappedVariables::new(dictionary, &vector.variables)?;
+ Ok(Self::new_unchecked(dictionary, vector))
+ }
pub fn name(&self) -> &'a Identifier {
&self.vector.name
}
pub fn variables(&self) -> MappedVariables<'a> {
- MappedVariables::new(self.dictionary, &self.vector.variables)
+ MappedVariables::new_unchecked(self.dictionary, &self.vector.variables)
}
}
}
impl<'a> MappedVariables<'a> {
- fn new(dictionary: &'a Dictionary, dict_indexes: &'a [DictIndex]) -> Self {
+ fn new_unchecked(dictionary: &'a Dictionary, dict_indexes: &'a [DictIndex]) -> Self {
Self {
dictionary,
dict_indexes,
}
}
+ pub fn new(
+ dictionary: &'a Dictionary,
+ dict_indexes: &'a [DictIndex],
+ ) -> Result<Self, DictIndexError> {
+ if dict_indexes
+ .iter()
+ .all(|dict_index| *dict_index < dictionary.variables.len())
+ {
+ Ok(Self::new_unchecked(dictionary, dict_indexes))
+ } else {
+ Err(DictIndexError)
+ }
+ }
+
pub fn len(&self) -> usize {
self.dict_indexes.len()
}
type Item = Vector<'a>;
fn next(&mut self) -> Option<Self::Item> {
- self.iter.next().map(|vector| Vector {
- dictionary: self.dictionary,
- vector,
- })
+ self.iter
+ .next()
+ .map(|vector| Vector::new_unchecked(self.dictionary, vector))
+ }
+}
+
+#[derive(Debug)]
+pub struct VectorsMut<'a>(&'a mut Dictionary);
+
+impl<'a> VectorsMut<'a> {
+ fn new(dictionary: &'a mut Dictionary) -> Self {
+ Self(dictionary)
+ }
+ pub fn as_vectors(&'a self) -> Vectors<'a> {
+ Vectors(self.0)
+ }
+ pub fn insert(&mut self, vector: DictIndexVector) -> Result<(), DictIndexError> {
+ Vector::new(self.0, &vector)?;
+ self.0.vectors.insert(ByIdentifier(vector));
+ Ok(())
}
}
self.0.vectors.len()
}
pub fn get(&self, name: &Identifier) -> Option<Vector<'a>> {
- self.0.vectors.get(&name.0).map(|vector| Vector {
- dictionary: self.0,
- vector: &*vector,
- })
+ self.0
+ .vectors
+ .get(&name.0)
+ .map(|vector| Vector::new_unchecked(self.0, &*vector))
}
pub fn iter(&self) -> VectorsIter<'a> {
VectorsIter::new(self.0)
&self.variable_set.name
}
pub fn variables(&self) -> MappedVariables<'a> {
- MappedVariables::new(self.dictionary, &self.variable_set.variables)
+ MappedVariables::new_unchecked(self.dictionary, &self.variable_set.variables)
}
}
}
pub fn variables(&self) -> MappedVariables<'a> {
- MappedVariables::new(self.dictionary, &self.mrset.variables)
+ MappedVariables::new_unchecked(self.dictionary, &self.mrset.variables)
}
}
}
#[cfg(test)]
-mod test {
- use std::collections::HashSet;
-
+mod tests {
use encoding_rs::{UTF_8, WINDOWS_1252};
use smallvec::SmallVec;
- use unicase::UniCase;
use crate::{
- dictionary::Dictionary,
+ dictionary::{DictIndexVector, Dictionary},
identifier::Identifier,
variable::{VarWidth, Variable},
};
- use super::{ByIdentifier, HasIdentifier};
-
- #[derive(PartialEq, Eq, Debug, Clone)]
- struct SimpleVar {
- name: Identifier,
- value: i32,
- }
-
- impl HasIdentifier for SimpleVar {
- fn identifier(&self) -> &UniCase<String> {
- &self.name.0
- }
- }
-
- #[test]
- fn test() {
- // Variables should not be the same if their values differ.
- let abcd = Identifier::new("abcd").unwrap();
- let abcd1 = SimpleVar {
- name: abcd.clone(),
- value: 1,
- };
- let abcd2 = SimpleVar {
- name: abcd,
- value: 2,
- };
- assert_ne!(abcd1, abcd2);
-
- // But `ByName` should treat them the same.
- let abcd1_by_name = ByIdentifier::new(abcd1);
- let abcd2_by_name = ByIdentifier::new(abcd2);
- assert_eq!(abcd1_by_name, abcd2_by_name);
-
- // And a `HashSet` of `ByName` should also treat them the same.
- let mut vars: HashSet<ByIdentifier<SimpleVar>> = HashSet::new();
- assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
- assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
- assert_eq!(
- vars.get(&UniCase::new(String::from("abcd")))
- .unwrap()
- .0
- .value,
- 1
- );
- }
-
#[test]
fn short_names() {
for (variables, expected, encoding) in [
assert_eq!(expected, dict.short_names());
}
}
+
+ #[test]
+ fn codepage_to_unicode() {
+ let mut dictionary = Dictionary::new(WINDOWS_1252);
+
+ dictionary
+ .add_var(Variable::new(
+ Identifier::new("ééééééééééééééééééééééééééééééééa").unwrap(),
+ VarWidth::Numeric,
+ WINDOWS_1252,
+ ))
+ .unwrap();
+ dictionary
+ .add_var(Variable::new(
+ Identifier::new("ééééééééééééééééééééééééééééééééb").unwrap(),
+ VarWidth::Numeric,
+ WINDOWS_1252,
+ ))
+ .unwrap();
+
+ dictionary
+ .vectors_mut()
+ .insert(DictIndexVector {
+ name: Identifier::new("àààààààààààààààààààààààààààààààà").unwrap(),
+ variables: vec![0, 1],
+ })
+ .unwrap();
+ dictionary
+ .vectors_mut()
+ .insert(DictIndexVector {
+ name: Identifier::new("ààààààààààààààààààààààààààààààààx").unwrap(),
+ variables: vec![0, 1],
+ })
+ .unwrap();
+
+ dictionary.codepage_to_unicode();
+ dbg!(&dictionary);
+ todo!()
+ }
}
#[cfg(test)]
mod tests {
+ use std::collections::HashSet;
+
use encoding_rs::{Encoding, UTF_8, WINDOWS_1252};
+ use unicase::UniCase;
use crate::identifier::Identifier;
+ use super::{ByIdentifier, HasIdentifier};
+
+ #[derive(PartialEq, Eq, Debug, Clone)]
+ struct SimpleVar {
+ name: Identifier,
+ value: i32,
+ }
+
+ impl HasIdentifier for SimpleVar {
+ fn identifier(&self) -> &UniCase<String> {
+ &self.name.0
+ }
+ }
+
+ #[test]
+ fn identifier() {
+ // Variables should not be the same if their values differ.
+ let abcd = Identifier::new("abcd").unwrap();
+ let abcd1 = SimpleVar {
+ name: abcd.clone(),
+ value: 1,
+ };
+ let abcd2 = SimpleVar {
+ name: abcd,
+ value: 2,
+ };
+ assert_ne!(abcd1, abcd2);
+
+ // But [ByIdentifier]` should treat them the same.
+ let abcd1_by_name = ByIdentifier::new(abcd1);
+ let abcd2_by_name = ByIdentifier::new(abcd2);
+ assert_eq!(abcd1_by_name, abcd2_by_name);
+
+ // And a [HashSet] of [ByIdentifier] should also treat them the same.
+ let mut vars: HashSet<ByIdentifier<SimpleVar>> = HashSet::new();
+ assert!(vars.insert(ByIdentifier::new(abcd1_by_name.0.clone())));
+ assert!(!vars.insert(ByIdentifier::new(abcd2_by_name.0.clone())));
+ assert_eq!(
+ vars.get(&UniCase::new(String::from("abcd")))
+ .unwrap()
+ .0
+ .value,
+ 1
+ );
+ }
+
#[test]
fn with_suffix() {
for (head, suffix, encoding, max_len, expected) in [
self.0.get(value).map(|s| s.as_str())
}
- pub fn insert(&mut self, value: Datum<ByteString>, label: String) -> Option<String> {
- self.0.insert(value, label)
+ pub fn insert(&mut self, value: Datum<ByteString>, label: impl Into<String>) -> Option<String> {
+ self.0.insert(value, label.into())
}
pub fn is_resizable(&self, width: VarWidth) -> bool {
}
}
+// Currently doesn't filter out duplicates (should it?).
#[derive(Clone, Default, Serialize, PartialEq)]
pub struct MissingValues {
/// Individual missing values, up to 3 of them.
use crate::{
data::{ByteString, Datum, RawString, WithEncoding},
- variable::{MissingValues, VarWidth},
+ variable::{MissingValues, ValueLabels, VarWidth},
};
#[test]
assert_eq!(&actual, &expected);
}
+
+ #[test]
+ fn value_labels_codepage_to_unicode() {
+ fn windows_1252(s: &str) -> Datum<ByteString> {
+ Datum::String(ByteString::from(WINDOWS_1252.encode(s).0))
+ }
+
+ let mut actual = ValueLabels::new();
+ actual.insert(windows_1252("abcd"), "Label 1");
+ actual.insert(windows_1252("éèäî"), "Label 2");
+ actual.codepage_to_unicode(WINDOWS_1252);
+
+ let mut expected = ValueLabels::new();
+ expected.insert(Datum::String(ByteString::from("abcd ")), "Label 1");
+ expected.insert(Datum::String(ByteString::from("éèäî ")), "Label 2");
+
+ assert_eq!(&actual, &expected);
+ }
}