};
use crate::{
- variable::{VarType, VarWidth},
format::DisplayPlain,
+ variable::{VarType, VarWidth},
};
pub trait RawString: Debug + PartialEq + Eq + PartialOrd + Ord + Hash {
ByteStr(self.raw_string_bytes())
}
+ fn without_trailing_spaces(&self) -> ByteStr<'_> {
+ let mut raw = self.raw_string_bytes();
+ while let Some(trimmed) = raw.strip_suffix(b" ") {
+ raw = trimmed;
+ }
+ ByteStr(raw)
+ }
+
fn as_encoded(&self, encoding: &'static Encoding) -> WithEncoding<ByteStr<'_>>
where
Self: Sized,
use unicase::UniCase;
use crate::{
- data::{ByteString, Datum},
+ data::{ByteString, Datum, RawString},
identifier::{ByIdentifier, HasIdentifier, Identifier},
output::pivot::{
Axis3, Dimension, Display26Adic, Footnote, Footnotes, Group, PivotTable, Value,
pub attributes: Attributes,
/// Multiple response sets.
- pub mrsets: BTreeSet<ByIdentifier<DictIndexMultipleResponseSet>>,
+ mrsets: BTreeSet<ByIdentifier<DictIndexMultipleResponseSet>>,
/// Variable sets.
///
pub struct InvalidFilterVariable;
/// Invalid dictionary index.
-#[derive(Debug, ThisError)]
-#[error("Invalid dictionary index.")]
-pub struct DictIndexError;
+#[derive(Debug, Clone, ThisError)]
+#[error("Invalid index {index} in dictionary with {n} variables.")]
+pub struct DictIndexError {
+ index: usize,
+ n: usize,
+}
impl Dictionary {
/// Creates a new, empty dictionary with the specified `encoding`.
MultipleResponseSets::new(self)
}
+ pub fn mrsets_mut(&mut self) -> MultipleResponseSetsMut<'_> {
+ MultipleResponseSetsMut::new(self)
+ }
+
pub fn variable_sets(&self) -> VariableSets<'_> {
VariableSets::new(self)
}
}
self.variables = variables;
- let mut vectors = HashSet::new();
let mut index = 0;
- for mut vector in self.vectors.drain() {
+ let mut vectors = self.vectors.drain().collect::<Vec<_>>();
+ vectors.sort();
+ for mut vector in vectors {
vector.codepage_to_unicode();
- while vectors.contains(&vector) {
+ while self.vectors.contains(&vector) {
index += 1;
vector.name = Identifier::new(format!("Vec{index}")).unwrap();
}
- vectors.insert(vector);
+ self.vectors.insert(vector);
}
- self.vectors = vectors;
self.attributes.codepage_to_unicode();
mrset.codepage_to_unicode();
while mrsets.contains(&mrset) {
index += 1;
- mrset.name = Identifier::new(format!("Mr{index}")).unwrap();
+ mrset.name = Identifier::new(format!("MrSet{index}")).unwrap();
}
mrsets.insert(mrset);
}
dictionary: &'a Dictionary,
dict_indexes: &'a [DictIndex],
) -> Result<Self, DictIndexError> {
- if dict_indexes
- .iter()
- .all(|dict_index| *dict_index < dictionary.variables.len())
- {
- Ok(Self::new_unchecked(dictionary, dict_indexes))
- } else {
- Err(DictIndexError)
+ let n = dictionary.variables.len();
+ for index in dict_indexes.iter().copied() {
+ if index >= n {
+ return Err(DictIndexError { index, n });
+ }
}
+ Ok(Self::new_unchecked(dictionary, dict_indexes))
}
pub fn len(&self) -> usize {
pub fn iter(&self) -> MappedVariablesIter<'a> {
MappedVariablesIter::new(self.dictionary, self.dict_indexes.iter())
}
+
+ pub fn dict_indexes(&self) -> &[DictIndex] {
+ self.dict_indexes
+ }
}
impl<'a> Index<usize> for MappedVariables<'a> {
}
}
+#[derive(Debug)]
+pub struct MultipleResponseSetsMut<'a>(&'a mut Dictionary);
+
+#[derive(ThisError, Clone, Debug)]
+pub enum MrSetError {
+ #[error("{0}")]
+ DictIndexError(#[from] DictIndexError),
+
+ /// Counted value {value} has width {width}, but it must be no wider than
+ /// {max_width}, the width of the narrowest variable in multiple response
+ /// set {mr_set}.
+ #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
+ TooWideMDGroupCountedValue {
+ /// Multiple response set name.
+ mr_set: Identifier,
+ /// Counted value.
+ value: String,
+ /// Width of counted value.
+ width: usize,
+ /// Maximum allowed width of counted value.
+ max_width: u16,
+ },
+
+ /// Multiple response set {0} contains both string and numeric variables.
+ #[error("Multiple response set {0} contains both string and numeric variables.")]
+ MixedMrSet(
+ /// Multiple response set name.
+ Identifier,
+ ),
+}
+
+impl<'a> MultipleResponseSetsMut<'a> {
+ fn new(dictionary: &'a mut Dictionary) -> Self {
+ Self(dictionary)
+ }
+
+ pub fn mrsets(&'a self) -> MultipleResponseSets<'a> {
+ MultipleResponseSets::new(self.0)
+ }
+
+ pub fn insert(&mut self, mrset: DictIndexMultipleResponseSet) -> Result<(), MrSetError> {
+ MultipleResponseSet::new(self.0, &mrset)?;
+ self.0.mrsets.insert(ByIdentifier(mrset));
+ Ok(())
+ }
+}
+
#[derive(Clone, Debug)]
pub struct MultipleResponseSets<'a>(&'a Dictionary);
self.0
.mrsets
.get(&name.0)
- .map(|mrset| MultipleResponseSet::new(self.0, mrset))
+ .map(|mrset| MultipleResponseSet::new_unchecked(self.0, mrset))
}
pub fn iter(&self) -> MultipleResponseSetIter<'a> {
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
- .map(|set| MultipleResponseSet::new(self.dictionary, set))
+ .map(|set| MultipleResponseSet::new_unchecked(self.dictionary, set))
}
}
}
impl<'a> MultipleResponseSet<'a> {
- fn new(dictionary: &'a Dictionary, mrset: &'a DictIndexMultipleResponseSet) -> Self {
+ fn new_unchecked(dictionary: &'a Dictionary, mrset: &'a DictIndexMultipleResponseSet) -> Self {
Self { dictionary, mrset }
}
+ fn new(
+ dictionary: &'a Dictionary,
+ mrset: &'a DictIndexMultipleResponseSet,
+ ) -> Result<Self, MrSetError> {
+ let variables = MappedVariables::new(dictionary, &mrset.variables)?;
+ let (min_width, _max_width) = Self::widths(&variables)
+ .ok_or_else(|| MrSetError::MixedMrSet(mrset.name.clone()))?
+ .into_inner();
+
+ if let MultipleResponseType::MultipleDichotomy { datum, labels: _ } = &mrset.mr_type {
+ match (datum, min_width) {
+ (Datum::Number(_), VarWidth::Numeric) => (),
+ (Datum::String(s), VarWidth::String(min_width)) => {
+ if s.without_trailing_spaces().len() > min_width as usize {}
+ }
+ _ => return Err(MrSetError::MixedMrSet(mrset.name.clone())),
+ }
+ }
+ Ok(Self::new_unchecked(dictionary, mrset))
+ }
+
+ fn widths(variables: &MappedVariables<'_>) -> Option<RangeInclusive<VarWidth>> {
+ variables
+ .iter()
+ .map(|v| Some((v.width, v.width)))
+ .reduce(|a, b| {
+ let (na, wa) = a?;
+ let (nb, wb) = b?;
+ Some((VarWidth::narrower(na, nb)?, VarWidth::wider(wa, wb)?))
+ })
+ .flatten()
+ .map(|(min_width, max_width)| min_width..=max_width)
+ }
+
pub fn name(&self) -> &Identifier {
&self.mrset.name
}
}
pub fn width(&self) -> RangeInclusive<VarWidth> {
- self.mrset.width.clone()
+ Self::widths(&self.variables()).unwrap()
}
pub fn mr_type(&self) -> &MultipleResponseType {
/// A description for the set.
pub label: String,
- /// Range of widths among the variables.
- pub width: RangeInclusive<VarWidth>,
-
/// What kind of multiple response set this is.
pub mr_type: MultipleResponseType,
use smallvec::SmallVec;
use crate::{
- dictionary::{DictIndexVector, Dictionary},
+ data::Datum,
+ dictionary::{
+ CategoryLabels, DictIndexMultipleResponseSet, DictIndexVector, Dictionary,
+ MultipleResponseType,
+ },
identifier::Identifier,
variable::{VarWidth, Variable},
};
.vectors_mut()
.insert(DictIndexVector {
name: Identifier::new("ààààààààààààààààààààààààààààààààx").unwrap(),
+ variables: vec![1, 0],
+ })
+ .unwrap();
+
+ dictionary
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
+ name: Identifier::new("üüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüasdf").unwrap(),
+ label: String::from("my mcgroup"),
+ mr_type: MultipleResponseType::MultipleCategory,
+ variables: vec![0, 1],
+ })
+ .unwrap();
+ dictionary
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
+ name: Identifier::new("üüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüquux").unwrap(),
+ label: String::new(),
+ mr_type: MultipleResponseType::MultipleDichotomy {
+ datum: Datum::Number(Some(55.0)),
+ labels: CategoryLabels::VarLabels,
+ },
variables: vec![0, 1],
})
.unwrap();
dictionary.codepage_to_unicode();
dbg!(&dictionary);
- todo!()
+ assert_eq!(
+ &dictionary.variables[0].name,
+ "éééééééééééééééééééééééééééééééé"
+ );
+ assert_eq!(&dictionary.variables[1].name, "Var1");
+ assert_eq!(
+ dictionary
+ .vectors()
+ .get(&Identifier::new("àààààààààààààààààààààààààààààààà").unwrap())
+ .unwrap()
+ .variables()
+ .dict_indexes(),
+ &[0, 1]
+ );
+ assert_eq!(
+ dictionary
+ .vectors()
+ .get(&Identifier::new("Vec1").unwrap())
+ .unwrap()
+ .variables()
+ .dict_indexes(),
+ &[1, 0]
+ );
+ assert!(matches!(
+ dictionary
+ .mrsets()
+ .get(&Identifier::new("üüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüü").unwrap())
+ .unwrap()
+ .mr_type(),
+ MultipleResponseType::MultipleCategory
+ ));
+ assert!(matches!(
+ dictionary
+ .mrsets()
+ .get(&Identifier::new("MrSet1").unwrap())
+ .unwrap()
+ .mr_type(),
+ MultipleResponseType::MultipleDichotomy { .. }
+ ));
}
}
crypto::EncryptedFile,
data::{ByteString, Case, Datum, MutRawString, RawString},
dictionary::{
- DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, MultipleResponseType,
+ DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, MrSetError,
+ MultipleResponseType,
},
format::{Error as FormatError, Format, UncheckedFormat},
hexfloat::HexFloat,
- identifier::{ByIdentifier, Error as IdError, Identifier},
+ identifier::{Error as IdError, Identifier},
output::pivot::{Group, Value},
sys::{
raw::{
},
serialize_endian,
},
- variable::{InvalidRole, MissingValues, MissingValuesError, VarWidth, Variable},
+ variable::{InvalidRole, MissingValues, MissingValuesError, VarType, VarWidth, Variable},
};
use anyhow::{anyhow, Error as AnyError};
use binrw::{BinRead, BinWrite, Endian};
Identifier,
),
- /// Multiple response set {0} contains both string and numeric variables.
- #[error("Multiple response set {0} contains both string and numeric variables.")]
- MixedMrSet(
- /// Multiple response set name.
- Identifier,
- ),
+ /// Error adding multiple response set.
+ #[error("{0}")]
+ MrSetError(#[from] MrSetError),
/// Invalid numeric format for counted value {number} in multiple response set {mr_set}.
#[error(
number: String,
},
- /// Counted value {value} has width {width}, but it must be no wider than
- /// {max_width}, the width of the narrowest variable in multiple response
- /// set {mr_set}.
- #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
- TooWideMDGroupCountedValue {
- /// Multiple response set name.
- mr_set: Identifier,
- /// Counted value.
- value: String,
- /// Width of counted value.
- width: usize,
- /// Maximum allowed width of counted value.
- max_width: u16,
- },
-
/// Ignoring long string value label for unknown variable {0}.
#[error("Ignoring long string value label for unknown variable {0}.")]
UnknownLongStringValueLabelVariable(
{
match DictIndexMultipleResponseSet::decode(&dictionary, record, &mut warn) {
Ok(mrset) => {
- dictionary.mrsets.insert(ByIdentifier::new(mrset));
+ dictionary.mrsets_mut().insert(mrset).unwrap();
}
Err(error) => warn(error),
}
_ => (),
}
- let Some((Some(min_width), Some(max_width))) = variables
+ let Ok(var_type) = variables
.iter()
- .copied()
- .map(|dict_index| dictionary.variables[dict_index].width)
- .map(|w| (Some(w), Some(w)))
- .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
+ .map(|dict_index| VarType::from(dictionary.variables[*dict_index].width))
+ .all_equal_value()
else {
- return Err(Error::MixedMrSet(mr_set_name));
+ return Err(MrSetError::MixedMrSet(mr_set_name).into());
};
- let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width)?;
+ let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, var_type)?;
Ok(DictIndexMultipleResponseSet {
name: mr_set_name,
- width: min_width..=max_width,
label: input.label.to_string(),
mr_type,
variables,
fn decode(
mr_set: &Identifier,
input: &raw::records::MultipleResponseType,
- min_width: VarWidth,
+ var_type: VarType,
) -> Result<Self, Error> {
match input {
raw::records::MultipleResponseType::MultipleDichotomy { value, labels } => {
- let value = match min_width {
- VarWidth::Numeric => {
+ let value = match var_type {
+ VarType::Numeric => {
let string = String::from_utf8_lossy(&value.0);
let number: f64 = string.trim().parse().map_err(|_| {
Error::InvalidMDGroupCountedValue {
})?;
Datum::Number(Some(number))
}
- VarWidth::String(max_width) => {
- let mut value = value.0.as_slice();
- while value.ends_with(b" ") {
- value = &value[..value.len() - 1];
- }
- let width = value.len();
- if width > max_width as usize {
- return Err(Error::TooWideMDGroupCountedValue {
- mr_set: mr_set.clone(),
- value: String::from_utf8_lossy(value).into(),
- width,
- max_width,
- });
- };
- Datum::String(value.into())
+ VarType::String => {
+ let mut value = value.clone();
+ value.trim_end();
+ Datum::String(value)
}
};
Ok(MultipleResponseType::MultipleDichotomy {
let mut output = Vec::new();
for set in self
.dictionary
- .mrsets
+ .mrsets()
.iter()
- .filter(|set| set.mr_type.supported_before_v14() == pre_v14)
+ .filter(|set| set.mr_type().supported_before_v14() == pre_v14)
{
- output.extend_from_slice(&self.dictionary.encoding().encode(&set.name).0[..]);
+ output.extend_from_slice(&self.dictionary.encoding().encode(set.name()).0[..]);
output.push(b'=');
- match &set.mr_type {
+ match set.mr_type() {
MultipleResponseType::MultipleDichotomy { datum, labels } => {
let leader = match labels {
CategoryLabels::VarLabels => b"D".as_slice(),
MultipleResponseType::MultipleCategory => write!(&mut output, "C ").unwrap(),
}
- let label = if set.mr_type.label_from_var_label() {
+ let label = if set.mr_type().label_from_var_label() {
Cow::from(&[])
} else {
- self.dictionary.encoding().encode(&set.label).0
+ self.dictionary.encoding().encode(set.label()).0
};
write!(&mut output, "{} ", label.len()).unwrap();
output.extend_from_slice(&label[..]);
- for variable in set.variables.iter().copied() {
+ for variable in set.variables().dict_indexes().iter().copied() {
// Only lowercase ASCII characters because other characters
// might expand upon lowercasing.
let short_name = self.short_names[variable][0].as_str().to_ascii_lowercase();
CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary,
MultipleResponseType,
},
- identifier::{ByIdentifier, Identifier},
+ identifier::Identifier,
sys::{
raw::{
records::{
}
}
dictionary
- .mrsets
- .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
name: Identifier::new("$a").unwrap(),
label: String::from("my mcgroup"),
- width: VarWidth::Numeric..=VarWidth::Numeric,
mr_type: MultipleResponseType::MultipleCategory,
variables: vec![0, 1, 2],
- }));
+ })
+ .unwrap();
dictionary
- .mrsets
- .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
name: Identifier::new("$b").unwrap(),
label: String::new(),
- width: VarWidth::Numeric..=VarWidth::Numeric,
mr_type: MultipleResponseType::MultipleDichotomy {
datum: Datum::Number(Some(55.0)),
labels: CategoryLabels::VarLabels,
},
variables: vec![6, 4, 5, 3],
- }));
+ })
+ .unwrap();
dictionary
- .mrsets
- .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
name: Identifier::new("$c").unwrap(),
label: String::from("mdgroup #2"),
- width: VarWidth::String(3)..=VarWidth::String(3),
mr_type: MultipleResponseType::MultipleDichotomy {
datum: Datum::String("Yes".into()),
labels: CategoryLabels::VarLabels,
},
variables: vec![7, 8, 9],
- }));
+ })
+ .unwrap();
dictionary
- .mrsets
- .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
name: Identifier::new("$d").unwrap(),
label: String::from("third mdgroup"),
- width: VarWidth::Numeric..=VarWidth::Numeric,
mr_type: MultipleResponseType::MultipleDichotomy {
datum: Datum::Number(Some(34.0)),
labels: CategoryLabels::CountedValues {
},
},
variables: vec![10, 11, 12],
- }));
+ })
+ .unwrap();
dictionary
- .mrsets
- .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+ .mrsets_mut()
+ .insert(DictIndexMultipleResponseSet {
name: Identifier::new("$e").unwrap(),
label: String::new(),
- width: VarWidth::String(6)..=VarWidth::String(6),
mr_type: MultipleResponseType::MultipleDichotomy {
datum: Datum::String("choice".into()),
labels: CategoryLabels::CountedValues {
},
},
variables: vec![13, 14, 15],
- }));
+ })
+ .unwrap();
fn get_mrsets(dictionary: &Dictionary, pre_v14: bool) -> String {
let mut raw = Vec::new();
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
pub enum VarWidth {
Numeric,
- String(u16),
+ String(u16), // XXX change to NonZeroU16, or to 1..=32767 range type
}
impl VarWidth {
}
}
- fn width_predicate(
- a: Option<VarWidth>,
- b: Option<VarWidth>,
- f: impl Fn(u16, u16) -> u16,
- ) -> Option<VarWidth> {
+ fn width_predicate(a: VarWidth, b: VarWidth, f: impl Fn(u16, u16) -> u16) -> Option<VarWidth> {
match (a, b) {
- (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
- (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
- Some(VarWidth::String(f(a, b)))
- }
+ (VarWidth::Numeric, VarWidth::Numeric) => Some(VarWidth::Numeric),
+ (VarWidth::String(a), VarWidth::String(b)) => Some(VarWidth::String(f(a, b))),
_ => None,
}
}
/// - Numerical variable widths are equally wide.
/// - Longer strings are wider than shorter strings.
/// - Numerical and string types are incomparable, so result in `None`.
- /// - Any `None` in the input yields `None` in the output.
- pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ pub fn wider(a: VarWidth, b: VarWidth) -> Option<VarWidth> {
Self::width_predicate(a, b, |a, b| a.max(b))
}
/// Returns the narrower of `self` and `other` (see [`Self::wider`]).
- pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ pub fn narrower(a: VarWidth, b: VarWidth) -> Option<VarWidth> {
Self::width_predicate(a, b, |a, b| a.min(b))
}