work
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 24 Dec 2023 17:57:39 +0000 (09:57 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 24 Dec 2023 17:57:39 +0000 (09:57 -0800)
rust/src/cooked.rs
rust/src/dictionary.rs
rust/src/raw.rs

index 095bfe507db1bc1297168fa7e4a8bfdefaa072f8..e17e9f3e7a87271bffcf5bc6b97d1b0956570bdf 100644 (file)
@@ -8,7 +8,7 @@ use crate::{
     endian::Endian,
     format::{Error as FormatError, Spec, UncheckedSpec},
     identifier::{Error as IdError, Identifier},
-    raw::{self, RawStr, RawString, VarType, RawDocumentLine},
+    raw::{self, RawStr, RawString, VarType, RawDocumentLine, VarDisplayRecord},
 };
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
 use encoding_rs::{DecoderResult, Encoding};
@@ -151,12 +151,6 @@ pub enum Error {
     #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
     MalformedString { encoding: String, text: String },
 
-    #[error("Invalid variable measurement level value {0}")]
-    InvalidMeasurement(u32),
-
-    #[error("Invalid variable display alignment value {0}")]
-    InvalidAlignment(u32),
-
     #[error("Details TBD")]
     TBD,
 }
@@ -372,9 +366,7 @@ pub fn decode(
     }
     // XXX weight
     if let Some(raw) = h.var_display {
-        if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, raw, warn)? {
-            output.push(Record::VarDisplay(vdr))
-        }
+        output.push(Record::VarDisplay(raw.clone()));
     }
 
     // Decode records that use short names.
@@ -1169,93 +1161,6 @@ impl VariableAttributeRecord {
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum Measure {
-    Nominal,
-    Ordinal,
-    Scale,
-}
-
-impl Measure {
-    fn try_decode(source: u32) -> Result<Option<Measure>, Error> {
-        match source {
-            0 => Ok(None),
-            1 => Ok(Some(Measure::Nominal)),
-            2 => Ok(Some(Measure::Ordinal)),
-            3 => Ok(Some(Measure::Scale)),
-            _ => Err(Error::InvalidMeasurement(source)),
-        }
-    }
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum Alignment {
-    Left,
-    Right,
-    Center,
-}
-
-impl Alignment {
-    fn try_decode(source: u32) -> Result<Option<Alignment>, Error> {
-        match source {
-            0 => Ok(None),
-            1 => Ok(Some(Alignment::Left)),
-            2 => Ok(Some(Alignment::Right)),
-            3 => Ok(Some(Alignment::Center)),
-            _ => Err(Error::InvalidAlignment(source)),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VarDisplay {
-    pub measure: Option<Measure>,
-    pub width: Option<u32>,
-    pub alignment: Option<Alignment>,
-}
-
-#[derive(Clone, Debug)]
-pub struct VarDisplayRecord(pub Vec<VarDisplay>);
-
-impl TryDecode for VarDisplayRecord {
-    type Input = raw::VarDisplayRecord;
-    fn try_decode(
-        decoder: &mut Decoder,
-        input: &Self::Input,
-        warn: impl Fn(Error),
-    ) -> Result<Option<Self>, Error> {
-        let n_vars = decoder.variables.len();
-        let n_per_var = if input.0.len() == 3 * n_vars {
-            3
-        } else if input.0.len() == 2 * n_vars {
-            2
-        } else {
-            return Err(Error::TBD);
-        };
-
-        let var_displays = input
-            .0
-            .chunks(n_per_var)
-            .map(|chunk| {
-                let (measure, width, alignment) = match n_per_var == 3 {
-                    true => (chunk[0], Some(chunk[1]), chunk[2]),
-                    false => (chunk[0], None, chunk[1]),
-                };
-                let measure = Measure::try_decode(measure).warn_on_error(&warn).flatten();
-                let alignment = Alignment::try_decode(alignment)
-                    .warn_on_error(&warn)
-                    .flatten();
-                VarDisplay {
-                    measure,
-                    width,
-                    alignment,
-                }
-            })
-            .collect();
-        Ok(Some(VarDisplayRecord(var_displays)))
-    }
-}
-
 #[derive(Clone, Debug)]
 pub enum MultipleResponseType {
     MultipleDichotomy {
index 7e98575c5ff714462dc577d0368689da889caafc..f9886641f7f582398627b9ab937cf63b1ab8caa3 100644 (file)
@@ -8,10 +8,10 @@ use encoding_rs::Encoding;
 use indexmap::IndexSet;
 
 use crate::{
-    cooked::{Alignment, Measure, MissingValues, Value, VarWidth},
+    cooked::{MissingValues, Value, VarWidth},
     format::Format,
     identifier::{ByIdentifier, HasIdentifier, Identifier},
-    raw::CategoryLabels,
+    raw::{CategoryLabels, Alignment, Measure},
 };
 
 pub type DictIndex = usize;
index ac2d1960acab3c9f03cfcc682666699acfbe748d..18e71d3623f5e4d98de552cc8653100f5b631f23 100644 (file)
@@ -152,6 +152,12 @@ pub enum Error {
     #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
     MalformedString { encoding: String, text: String },
 
+    #[error("Invalid variable measurement level value {0}")]
+    InvalidMeasurement(u32),
+
+    #[error("Invalid variable display alignment value {0}")]
+    InvalidAlignment(u32),
+
     #[error("Details TBD")]
     TBD,
 }
@@ -198,7 +204,7 @@ impl Record {
             2 => Ok(Some(VariableRecord::read(reader, endian)?)),
             3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
             6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
-            7 => Extension::read(reader, endian, warn),
+            7 => Extension::read(reader, endian, var_types.len(), warn),
             999 => Ok(Some(Record::EndOfHeaders(
                 endian.parse(read_bytes(reader)?),
             ))),
@@ -403,7 +409,7 @@ impl Decoder {
         }
         output
     }
-        
+
     fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
         self.decode_slice(input.0.as_slice())
     }
@@ -1464,7 +1470,11 @@ impl DocumentRecord<RawDocumentLine> {
     fn decode<'a>(&'a self, decoder: &Decoder) -> DocumentRecord<Cow<'a, str>> {
         DocumentRecord {
             offsets: self.offsets.clone(),
-            lines: self.lines.iter().map(|s| decoder.decode_slice(&s.0)).collect(),
+            lines: self
+                .lines
+                .iter()
+                .map(|s| decoder.decode_slice(&s.0))
+                .collect(),
         }
     }
 }
@@ -1692,23 +1702,97 @@ fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Error> {
     Ok((string.into(), rest))
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Measure {
+    Nominal,
+    Ordinal,
+    Scale,
+}
+
+impl Measure {
+    fn try_decode(source: u32) -> Result<Option<Measure>, Error> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Measure::Nominal)),
+            2 => Ok(Some(Measure::Ordinal)),
+            3 => Ok(Some(Measure::Scale)),
+            _ => Err(Error::InvalidMeasurement(source)),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Alignment {
+    Left,
+    Right,
+    Center,
+}
+
+impl Alignment {
+    fn try_decode(source: u32) -> Result<Option<Alignment>, Error> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Alignment::Left)),
+            2 => Ok(Some(Alignment::Right)),
+            3 => Ok(Some(Alignment::Center)),
+            _ => Err(Error::InvalidAlignment(source)),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VarDisplay {
+    pub measure: Option<Measure>,
+    pub width: Option<u32>,
+    pub alignment: Option<Alignment>,
+}
+
 #[derive(Clone, Debug)]
-pub struct VarDisplayRecord(pub Vec<u32>);
+pub struct VarDisplayRecord(pub Vec<VarDisplay>);
 
-impl ExtensionRecord for VarDisplayRecord {
+impl VarDisplayRecord {
     const SUBTYPE: u32 = 11;
-    const SIZE: Option<u32> = Some(4);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "variable display record";
 
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Error> {
-        ext.check_size::<Self>()?;
+    fn parse(
+        ext: &Extension,
+        n_vars: usize,
+        endian: Endian,
+        warn: &Box<dyn Fn(Error)>,
+    ) -> Result<Record, Error> {
+        if ext.size != 4 {
+            return Err(Error::BadRecordSize {
+                offset: ext.offsets.start,
+                record: String::from("variable display record"),
+                size: ext.size,
+                expected_size: 4,
+            });
+        }
 
+        let has_width = if ext.count as usize == 3 * n_vars {
+            true
+        } else if ext.count as usize == 2 * n_vars {
+            false
+        } else {
+            return Err(Error::TBD);
+        };
+
+        let mut var_displays = Vec::new();
         let mut input = &ext.data[..];
-        let display = (0..ext.count)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(Record::VarDisplay(VarDisplayRecord(display)))
+        for _ in 0..n_vars {
+            let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .warn_on_error(&warn)
+                .flatten();
+            let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
+            let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .warn_on_error(&warn)
+                .flatten();
+            var_displays.push(VarDisplay {
+                measure,
+                width,
+                alignment,
+            });
+        }
+        Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
     }
 }
 
@@ -1859,7 +1943,7 @@ impl VariableSet {
 #[derive(Clone, Debug)]
 pub struct VariableSetRecord {
     pub offsets: Range<u64>,
-    pub sets: Vec<VariableSet>
+    pub sets: Vec<VariableSet>,
 }
 
 impl VariableSetRecord {
@@ -1871,7 +1955,10 @@ impl VariableSetRecord {
                 sets.push(set)
             }
         }
-        VariableSetRecord { offsets: source.offsets.clone(), sets }
+        VariableSetRecord {
+            offsets: source.offsets.clone(),
+            sets,
+        }
     }
 }
 
@@ -1935,6 +2022,7 @@ impl Extension {
     fn read<R: Read + Seek>(
         r: &mut R,
         endian: Endian,
+        n_vars: usize,
         warn: &Box<dyn Fn(Error)>,
     ) -> Result<Option<Record>, Error> {
         let subtype = endian.parse(read_bytes(r)?);
@@ -1962,7 +2050,7 @@ impl Extension {
         let result = match subtype {
             IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
             FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
-            VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, endian),
+            VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
             MultipleResponseRecord::SUBTYPE | 19 => {
                 MultipleResponseRecord::parse(&extension, endian)
             }