From 240a640e30cefb5b69918bbee600475655c2bcb7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 21 Aug 2023 08:55:01 -0700 Subject: [PATCH] work --- rust/src/cooked.rs | 131 ++++++++++++++++++++++++++++++++++++++++++--- rust/src/raw.rs | 4 +- 2 files changed, 127 insertions(+), 8 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 02f5c23d79..8d748778b9 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -4,6 +4,7 @@ use crate::{ format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, raw::{self, MissingValues, VarType}, + CategoryLabels, {endian::Endian, Compression}, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; @@ -73,6 +74,24 @@ pub enum Error { )] InvalidLongStringValueLabel(Identifier), + #[error("Invalid multiple response set name. {0}")] + InvalidMrSetName(#[from] IdError), + + #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")] + UnknownMrSetVariable { + mr_set: Identifier, + short_name: Identifier, + }, + + #[error("Multiple response set {0} has no variables.")] + EmptyMrSet(Identifier), + + #[error("Multiple response set {0} has only one variable.")] + OneVarMrSet(Identifier), + + #[error("Multiple response set {0} contains both string and numeric variables.")] + MixedMrSet(Identifier), + #[error("Details TBD")] TBD, } @@ -87,7 +106,7 @@ pub enum Record { FloatInfo(FloatInfoRecord), VariableSets(VariableSetRecord), VarDisplay(VarDisplayRecord), - //MultipleResponse(MultipleResponseRecord), + MultipleResponse(MultipleResponseRecord), //LongStringValueLabels(LongStringValueLabelRecord), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), @@ -103,9 +122,9 @@ pub enum Record { //Case(Vec), } -pub use crate::raw::IntegerInfoRecord; -pub use crate::raw::FloatInfoRecord; pub use crate::raw::EncodingRecord; +pub use crate::raw::FloatInfoRecord; +pub use crate::raw::IntegerInfoRecord; pub use crate::raw::NumberOfCasesRecord; type DictIndex = usize; @@ -146,6 +165,14 @@ impl Decoder { } output } + pub fn decode_identifier( + &self, + input: &[u8], + warn: &impl Fn(Error), + ) -> Result { + let s = self.decode_string(input, warn); + Identifier::new(&s, self.encoding) + } fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> { let max_index = self.n_dict_indexes - 1; if dict_index == 0 || dict_index as usize > max_index { @@ -263,6 +290,21 @@ impl VarWidth { VarWidth::String(w) => div_ceil(w as usize, 8), } } + + /// Returns the wider of `self` and `other`: + /// - Numerical variable widths are equally wide. + /// - Longer strings are wider than shorter strings. + /// - Numerical and string types are incomparable, so result in `None`. + /// - Any `None` in the input yields `None` in the output. + pub fn wider(a: Option, b: Option) -> Option { + match (a, b) { + (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric), + (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => { + Some(VarWidth::String(a.max(b))) + } + _ => None, + } + } } impl From for VarType { @@ -312,8 +354,7 @@ impl VariableRecord { }) } }; - let name = decoder.decode_string(&input.name.0, &warn); - let name = match Identifier::new(&name, decoder.encoding) { + let name = match decoder.decode_identifier(&input.name.0, &warn) { Ok(name) => { if !decoder.var_names.contains_key(&name) { name @@ -508,7 +549,10 @@ impl ValueLabelRecord { .iter() .map(|(value, label)| { let label = decoder.decode_string(&label.0, &warn); - let value = Value::decode(raw::Value::from_raw(*value, var_type, decoder.endian), &decoder); + let value = Value::decode( + raw::Value::from_raw(*value, var_type, decoder.endian), + &decoder, + ); (value, label.into()) }) .collect(); @@ -755,6 +799,81 @@ pub struct VarDisplay { #[derive(Clone, Debug)] pub struct VarDisplayRecord(pub Vec); +#[derive(Clone, Debug)] +pub enum MultipleResponseType { + MultipleDichotomy { + value: Value, + labels: CategoryLabels, + }, + MultipleCategory, +} + +#[derive(Clone, Debug)] +pub struct MultipleResponseSet { + pub name: Identifier, + pub label: String, + pub mr_type: MultipleResponseType, + pub dict_indexes: Vec, +} + +impl MultipleResponseSet { + fn decode( + decoder: &Decoder, + input: &raw::MultipleResponseSet, + warn: &impl Fn(Error), + ) -> Result, Error> { + let mr_set_name = decoder + .decode_identifier(&input.name.0, warn) + .map_err(|error| Error::InvalidMrSetName(error))?; + + let label = decoder.decode_string(&input.label.0, warn).into(); + + let dict_indexes = Vec::with_capacity(input.short_names.len()); + for &short_name in input.short_names.iter() { + let short_name = match decoder.decode_identifier(&short_name.0, warn) { + Ok(name) => name, + Err(error) => { + warn(Error::InvalidMrSetName(error)); + continue; + } + }; + let Some(dict_index) = decoder.var_names.get(&short_name) else { + warn(Error::UnknownMrSetVariable { + mr_set: mr_set_name.clone(), + short_name: short_name.clone(), + }); + continue; + }; + dict_indexes.push(dict_index); + } + + match dict_indexes.len() { + 0 => return Err(Error::EmptyMrSet(mr_set_name)), + 1 => return Err(Error::OneVarMrSet(mr_set_name)), + _ => (), + } + + let Some(var_width) = dict_indexes + .iter() + .map(|&dict_index| Some(decoder.variables[dict_index].width)) + .reduce(|a, b| VarWidth::wider(a, b)) + .flatten() + else { + return Err(Error::MixedMrSet(mr_set_name)); + }; + } +} + +#[derive(Clone, Debug)] +pub struct MultipleResponseRecord(Vec); + +impl Decode for MultipleResponseRecord { + type Input = raw::MultipleResponseRecord; + + fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result { + } +} + #[cfg(test)] mod test { use encoding_rs::WINDOWS_1252; diff --git a/rust/src/raw.rs b/rust/src/raw.rs index b5968e67b9..a9f463425c 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -1210,7 +1210,7 @@ pub struct MultipleResponseSet { pub name: UnencodedString, pub label: UnencodedString, pub mr_type: MultipleResponseType, - pub vars: Vec, + pub short_names: Vec, } impl MultipleResponseSet { @@ -1280,7 +1280,7 @@ impl MultipleResponseSet { name: name.into(), label: label.into(), mr_type, - vars, + short_names: vars, }, input, )) -- 2.30.2