work
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 21 Aug 2023 15:55:01 +0000 (08:55 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 21 Aug 2023 15:55:01 +0000 (08:55 -0700)
rust/src/cooked.rs
rust/src/raw.rs

index 02f5c23d79d079a6e1509959833433821375868c..8d748778b99797151813894b98d2d258b9f3dce2 100644 (file)
@@ -4,6 +4,7 @@ use crate::{
     format::{Error as FormatError, Spec, UncheckedSpec},
     identifier::{Error as IdError, Identifier},
     raw::{self, MissingValues, VarType},
+    CategoryLabels,
     {endian::Endian, Compression},
 };
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
@@ -73,6 +74,24 @@ pub enum Error {
     )]
     InvalidLongStringValueLabel(Identifier),
 
+    #[error("Invalid multiple response set name.  {0}")]
+    InvalidMrSetName(#[from] IdError),
+
+    #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
+    UnknownMrSetVariable {
+        mr_set: Identifier,
+        short_name: Identifier,
+    },
+
+    #[error("Multiple response set {0} has no variables.")]
+    EmptyMrSet(Identifier),
+
+    #[error("Multiple response set {0} has only one variable.")]
+    OneVarMrSet(Identifier),
+
+    #[error("Multiple response set {0} contains both string and numeric variables.")]
+    MixedMrSet(Identifier),
+
     #[error("Details TBD")]
     TBD,
 }
@@ -87,7 +106,7 @@ pub enum Record {
     FloatInfo(FloatInfoRecord),
     VariableSets(VariableSetRecord),
     VarDisplay(VarDisplayRecord),
-    //MultipleResponse(MultipleResponseRecord),
+    MultipleResponse(MultipleResponseRecord),
     //LongStringValueLabels(LongStringValueLabelRecord),
     Encoding(EncodingRecord),
     NumberOfCases(NumberOfCasesRecord),
@@ -103,9 +122,9 @@ pub enum Record {
     //Case(Vec<Value>),
 }
 
-pub use crate::raw::IntegerInfoRecord;
-pub use crate::raw::FloatInfoRecord;
 pub use crate::raw::EncodingRecord;
+pub use crate::raw::FloatInfoRecord;
+pub use crate::raw::IntegerInfoRecord;
 pub use crate::raw::NumberOfCasesRecord;
 
 type DictIndex = usize;
@@ -146,6 +165,14 @@ impl Decoder {
         }
         output
     }
+    pub fn decode_identifier(
+        &self,
+        input: &[u8],
+        warn: &impl Fn(Error),
+    ) -> Result<Identifier, IdError> {
+        let s = self.decode_string(input, warn);
+        Identifier::new(&s, self.encoding)
+    }
     fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
         let max_index = self.n_dict_indexes - 1;
         if dict_index == 0 || dict_index as usize > max_index {
@@ -263,6 +290,21 @@ impl VarWidth {
             VarWidth::String(w) => div_ceil(w as usize, 8),
         }
     }
+
+    /// Returns the wider of `self` and `other`:
+    /// - Numerical variable widths are equally wide.
+    /// - Longer strings are wider than shorter strings.
+    /// - Numerical and string types are incomparable, so result in `None`.
+    /// - Any `None` in the input yields `None` in the output.
+    pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+        match (a, b) {
+            (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
+            (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
+                Some(VarWidth::String(a.max(b)))
+            }
+            _ => None,
+        }
+    }
 }
 
 impl From<VarWidth> for VarType {
@@ -312,8 +354,7 @@ impl VariableRecord {
                 })
             }
         };
-        let name = decoder.decode_string(&input.name.0, &warn);
-        let name = match Identifier::new(&name, decoder.encoding) {
+        let name = match decoder.decode_identifier(&input.name.0, &warn) {
             Ok(name) => {
                 if !decoder.var_names.contains_key(&name) {
                     name
@@ -508,7 +549,10 @@ impl ValueLabelRecord {
             .iter()
             .map(|(value, label)| {
                 let label = decoder.decode_string(&label.0, &warn);
-                let value = Value::decode(raw::Value::from_raw(*value, var_type, decoder.endian), &decoder);
+                let value = Value::decode(
+                    raw::Value::from_raw(*value, var_type, decoder.endian),
+                    &decoder,
+                );
                 (value, label.into())
             })
             .collect();
@@ -755,6 +799,81 @@ pub struct VarDisplay {
 #[derive(Clone, Debug)]
 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
 
+#[derive(Clone, Debug)]
+pub enum MultipleResponseType {
+    MultipleDichotomy {
+        value: Value,
+        labels: CategoryLabels,
+    },
+    MultipleCategory,
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet {
+    pub name: Identifier,
+    pub label: String,
+    pub mr_type: MultipleResponseType,
+    pub dict_indexes: Vec<DictIndex>,
+}
+
+impl MultipleResponseSet {
+    fn decode(
+        decoder: &Decoder,
+        input: &raw::MultipleResponseSet,
+        warn: &impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let mr_set_name = decoder
+            .decode_identifier(&input.name.0, warn)
+            .map_err(|error| Error::InvalidMrSetName(error))?;
+
+        let label = decoder.decode_string(&input.label.0, warn).into();
+
+        let dict_indexes = Vec::with_capacity(input.short_names.len());
+        for &short_name in input.short_names.iter() {
+            let short_name = match decoder.decode_identifier(&short_name.0, warn) {
+                Ok(name) => name,
+                Err(error) => {
+                    warn(Error::InvalidMrSetName(error));
+                    continue;
+                }
+            };
+            let Some(dict_index) = decoder.var_names.get(&short_name) else {
+                warn(Error::UnknownMrSetVariable {
+                    mr_set: mr_set_name.clone(),
+                    short_name: short_name.clone(),
+                });
+                continue;
+            };
+            dict_indexes.push(dict_index);
+        }
+
+        match dict_indexes.len() {
+            0 => return Err(Error::EmptyMrSet(mr_set_name)),
+            1 => return Err(Error::OneVarMrSet(mr_set_name)),
+            _ => (),
+        }
+
+        let Some(var_width) = dict_indexes
+            .iter()
+            .map(|&dict_index| Some(decoder.variables[dict_index].width))
+            .reduce(|a, b| VarWidth::wider(a, b))
+            .flatten()
+        else {
+            return Err(Error::MixedMrSet(mr_set_name));
+        };
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
+
+impl Decode for MultipleResponseRecord {
+    type Input = raw::MultipleResponseRecord;
+
+    fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
+    }
+}
+
 #[cfg(test)]
 mod test {
     use encoding_rs::WINDOWS_1252;
index b5968e67b94a24780bab517702b5dbe6dfbba710..a9f463425c8e348c3f0929161ecceaee251ce8a9 100644 (file)
@@ -1210,7 +1210,7 @@ pub struct MultipleResponseSet {
     pub name: UnencodedString,
     pub label: UnencodedString,
     pub mr_type: MultipleResponseType,
-    pub vars: Vec<UnencodedString>,
+    pub short_names: Vec<UnencodedString>,
 }
 
 impl MultipleResponseSet {
@@ -1280,7 +1280,7 @@ impl MultipleResponseSet {
                 name: name.into(),
                 label: label.into(),
                 mr_type,
-                vars,
+                short_names: vars,
             },
             input,
         ))