long string missing values
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 18 Nov 2023 01:07:33 +0000 (17:07 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 18 Nov 2023 01:07:33 +0000 (17:07 -0800)
rust/src/cooked.rs
rust/src/raw.rs

index f3c6adbbc4f15e0f93a0f304740c035cc5ea8294..d00f3f3c34f7a6087570c6315e98b0539f82c2ab 100644 (file)
@@ -5,7 +5,7 @@ use crate::{
     endian::Endian,
     format::{Error as FormatError, Spec, UncheckedSpec},
     identifier::{Error as IdError, Identifier},
-    raw::{self, MissingValues, UnencodedStr, VarType},
+    raw::{self, UnencodedStr, VarType},
 };
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
 use encoding_rs::{DecoderResult, Encoding};
@@ -169,6 +169,7 @@ pub enum Record {
     VariableSets(VariableSetRecord),
     VarDisplay(VarDisplayRecord),
     MultipleResponse(MultipleResponseRecord),
+    LongStringMissingValues(LongStringMissingValuesRecord),
     LongStringValueLabels(LongStringValueLabelRecord),
     Encoding(EncodingRecord),
     NumberOfCases(NumberOfCasesRecord),
@@ -302,6 +303,11 @@ pub fn decode(
                     output.push(Record::MultipleResponse(mrr))
                 }
             }
+            raw::Record::LongStringMissingValues(ref input) => {
+                if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::LongStringMissingValues(mrr))
+                }
+            }
             raw::Record::LongStringValueLabels(ref input) => {
                 if let Some(mrr) =
                     LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)?
@@ -580,6 +586,31 @@ pub struct VariableRecord {
     pub label: Option<String>,
 }
 
+#[derive(Clone, Debug)]
+pub struct MissingValues {
+    /// Individual missing values, up to 3 of them.
+    pub values: Vec<Value>,
+
+    /// Optional range of missing values.
+    pub range: Option<(Value, Value)>,
+}
+
+impl Decode<raw::MissingValues> for MissingValues {
+    fn decode(decoder: &Decoder, input: &raw::MissingValues, _warn: impl Fn(Error)) -> Self {
+        MissingValues {
+            values: input
+                .values
+                .iter()
+                .map(|value| Value::decode(value, decoder))
+                .collect(),
+            range: input
+                .range
+                .as_ref()
+                .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))),
+        }
+    }
+}
+
 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
     UncheckedSpec::try_from(raw)
         .and_then(Spec::try_from)
@@ -672,7 +703,7 @@ impl TryDecode for VariableRecord {
             name,
             print_format,
             write_format,
-            missing_values: input.missing_values.clone(),
+            missing_values: MissingValues::decode(decoder, &input.missing_values, warn),
             label,
         }))
     }
@@ -746,7 +777,7 @@ pub enum Value {
 }
 
 impl Value {
-    pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
+    pub fn decode(raw: &raw::Value, decoder: &Decoder) -> Self {
         match raw {
             raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
             raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
@@ -817,7 +848,7 @@ impl TryDecode for ValueLabelRecord {
             .map(|(value, label)| {
                 let label = decoder.decode_string(&label.0, &warn);
                 let value = Value::decode(
-                    raw::Value::from_raw(*value, var_type, decoder.endian),
+                    &raw::Value::from_raw(value, var_type, decoder.endian),
                     decoder,
                 );
                 ValueLabel { value, label }
@@ -1305,6 +1336,56 @@ impl TryDecode for MultipleResponseRecord {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValues {
+    /// Variable name.
+    pub var_name: Identifier,
+
+    /// Missing values.
+    pub missing_values: MissingValues,
+}
+
+impl LongStringMissingValues {
+    fn decode(
+        decoder: &Decoder,
+        input: &raw::LongStringMissingValues,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let var_name = decoder.decode_string(&input.var_name.0, warn);
+        let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+            .map_err(Error::InvalidLongStringValueLabelName)?;
+
+        let missing_values = MissingValues::decode(decoder, &input.missing_values, warn);
+
+        Ok(LongStringMissingValues {
+            var_name,
+            missing_values
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValuesRecord(Vec<LongStringMissingValues>);
+
+impl TryDecode for LongStringMissingValuesRecord {
+    type Input = raw::LongStringMissingValueSet;
+
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let mut labels = Vec::with_capacity(input.0.len());
+        for label in &input.0 {
+            match LongStringMissingValues::decode(decoder, label, &warn) {
+                Ok(set) => labels.push(set),
+                Err(error) => warn(error),
+            }
+        }
+        Ok(Some(LongStringMissingValuesRecord(labels)))
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct LongStringValueLabels {
     pub var_name: Identifier,
index db508ae93ffe2c9f42f749163b8a98dd7b04f09b..8722febe887afa1ab73f7e34ab3f526a4b831ac2 100644 (file)
@@ -543,13 +543,13 @@ impl Debug for Value {
 impl Value {
     fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Value, IoError> {
         Ok(Self::from_raw(
-            UntypedValue(read_bytes(r)?),
+            &UntypedValue(read_bytes(r)?),
             var_type,
             endian,
         ))
     }
 
-    pub fn from_raw(raw: UntypedValue, var_type: VarType, endian: Endian) -> Value {
+    pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Value {
         match var_type {
             VarType::String => Value::String(UnencodedStr(raw.0)),
             VarType::Numeric => {
@@ -579,7 +579,7 @@ impl Value {
                     });
                 }
             };
-            values.push(Value::from_raw(UntypedValue(raw), var_type, endian));
+            values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
         }
         Ok(Some(values))
     }
@@ -630,7 +630,7 @@ impl Value {
                         }
                     }
                     253 => {
-                        break Value::from_raw(UntypedValue(read_bytes(reader)?), var_type, endian)
+                        break Value::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
                     }
                     254 => match var_type {
                         VarType::String => break Value::String(UnencodedStr(*b"        ")), // XXX EBCDIC
@@ -1410,7 +1410,7 @@ pub struct LongStringMissingValues {
 }
 
 #[derive(Clone, Debug)]
-pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
+pub struct LongStringMissingValueSet(pub Vec<LongStringMissingValues>);
 
 impl ExtensionRecord for LongStringMissingValueSet {
     const SUBTYPE: u32 = 22;