work
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 19 Aug 2023 22:10:53 +0000 (15:10 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 19 Aug 2023 23:29:32 +0000 (16:29 -0700)
rust/src/cooked.rs
rust/src/format.rs
rust/src/raw.rs

index 3c0d2e4ff60af1210c0c3a01640a18835bf06aa2..93fe21f2bd39edbb0963ea35c94445439dad530b 100644 (file)
@@ -1,12 +1,12 @@
-use std::{borrow::Cow, collections::{HashSet, HashMap}};
+use std::{borrow::Cow, collections::{HashSet, HashMap}, cmp::Ordering};
 
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
 use encoding_rs::Encoding;
 use num::integer::div_ceil;
 use crate::{
-    format::{Spec, UncheckedSpec, Width},
+    format::{Spec, UncheckedSpec},
     identifier::{Error as IdError, Identifier},
-    raw::{self, MissingValues},
+    raw::{self, MissingValues, VarType},
     {endian::Endian, Compression},
 };
 use thiserror::Error as ThisError;
@@ -57,10 +57,10 @@ impl Decoder {
             assert!(self.n_generated_names < usize::MAX);
         }
     }
-    fn take_dict_indexes(&mut self, id: &Identifier, width: Width) -> usize {
+    fn take_dict_indexes(&mut self, id: &Identifier, width: VarWidth) -> usize {
         let n = match width {
-            0 => 1,
-            w => div_ceil(w, 8) as usize,
+            VarWidth::Numeric => 1,
+            VarWidth::String(w) => div_ceil(w as usize, 8),
         };
         let dict_index = self.n_dict_indexes;
         self.dict_indexes.insert(self.n_dict_indexes, id.clone());
@@ -122,8 +122,33 @@ impl Decode for Header {
     }
 }
 
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum VarWidth {
+    Numeric,
+    String(u16),
+}
+
+impl PartialOrd for VarWidth {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match (self, other) {
+            (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
+            (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
+            _ => None,
+        }
+    }
+}
+
+impl From<VarWidth> for VarType {
+    fn from(source: VarWidth) -> Self {
+        match source {
+            VarWidth::Numeric => VarType::Numeric,
+            VarWidth::String(_) => VarType::String,
+        }
+    }
+}
+
 pub struct Variable {
-    pub width: Width,
+    pub width: VarWidth,
     pub name: Identifier,
     pub print_format: Spec,
     pub write_format: Spec,
@@ -131,7 +156,7 @@ pub struct Variable {
     pub label: Option<String>,
 }
 
-fn decode_format(raw: raw::Spec, name: &str, width: Width) -> Spec {
+fn decode_format(raw: raw::Spec, name: &str, width: VarWidth) -> Spec {
     UncheckedSpec::try_from(raw)
         .and_then(Spec::try_from)
         .and_then(|x| x.check_width_compatibility(Some(name), width))
@@ -147,8 +172,9 @@ impl Variable {
         input: &crate::raw::Variable,
         warn: impl Fn(Error),
     ) -> Result<Option<Variable>, Error> {
-        match input.width {
-            0..=255 => (),
+        let width = match input.width {
+            0 => VarWidth::Numeric,
+            w @ 1..=255 => VarWidth::String(w as u16),
             -1 => return Ok(None),
             _ => {
                 return Err(Error::BadVariableWidth {
@@ -157,7 +183,6 @@ impl Variable {
                 })
             }
         };
-        let width = input.width as Width;
         let name = decoder.decode_string(&input.name.0, &warn);
         let name = match Identifier::new(&name, decoder.encoding) {
             Ok(name) => {
@@ -234,6 +259,11 @@ impl VariableSet {
     }
 }
 
+/*
+pub struct ValueLabelRecord {
+    pub labels: Vec<(
+}
+*/
 pub struct VariableSetRecord(Vec<VariableSet>);
 
 impl TextRecord for VariableSetRecord {
index 857c05e67f16490ea4dc7b7ac55ee3e21816469a..0fc82a3987670b0cf08b79e1d386df78057d0a5d 100644 (file)
@@ -5,7 +5,10 @@ use std::{
 
 use thiserror::Error as ThisError;
 
-use crate::raw::{VarType, self};
+use crate::{
+    cooked::VarWidth,
+    raw::{self, VarType},
+};
 
 #[derive(ThisError, Debug)]
 pub enum Error {
@@ -173,8 +176,6 @@ pub enum Format {
     AHex,
 }
 
-pub const MAX_STRING: Width = 32767;
-
 pub type Width = u16;
 pub type SignedWidth = i16;
 
@@ -185,8 +186,8 @@ impl Format {
         match self {
             Self::P | Self::PK | Self::PIBHex | Self::RBHex => 16,
             Self::IB | Self::PIB | Self::RB => 8,
-            Self::A => MAX_STRING,
-            Self::AHex => MAX_STRING * 2,
+            Self::A => 32767,
+            Self::AHex => 32767 * 2,
             _ => 40,
         }
     }
@@ -305,10 +306,12 @@ impl Format {
     pub fn var_type(self) -> VarType {
         match self {
             Self::A | Self::AHex => VarType::String,
-            _ => VarType::Number,
+            _ => VarType::Numeric,
         }
     }
 
+    /// Checks whether this format is valid for a variable with the given
+    /// `var_type`.
     pub fn check_type_compatibility(
         self,
         variable: Option<&str>,
@@ -316,7 +319,7 @@ impl Format {
     ) -> Result<(), Error> {
         let my_type = self.var_type();
         match (my_type, var_type) {
-            (VarType::Number, VarType::String) => {
+            (VarType::Numeric, VarType::String) => {
                 if let Some(variable) = variable {
                     Err(Error::NamedVariableNotCompatibleWithNumericFormat {
                         variable: variable.into(),
@@ -326,7 +329,7 @@ impl Format {
                     Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self))
                 }
             }
-            (VarType::String, VarType::Number) => {
+            (VarType::String, VarType::Numeric) => {
                 if let Some(variable) = variable {
                     Err(Error::NamedVariableNotCompatibleWithStringFormat {
                         variable: variable.into(),
@@ -404,10 +407,18 @@ impl Spec {
         self.d
     }
 
-    pub fn default_for_width(w: Width) -> Self {
-        match w {
-            0 => Spec { format: Format::F, w: 8, d: 2 },
-            _ => Spec { format: Format::A, w: w, d: 0 },
+    pub fn default_for_width(var_width: VarWidth) -> Self {
+        match var_width {
+            VarWidth::Numeric => Spec {
+                format: Format::F,
+                w: 8,
+                d: 2,
+            },
+            VarWidth::String(w) => Spec {
+                format: Format::A,
+                w,
+                d: 0,
+            },
         }
     }
 
@@ -425,11 +436,11 @@ impl Spec {
         Self { format, w, d }
     }
 
-    pub fn var_width(self) -> Width {
+    pub fn var_width(self) -> VarWidth {
         match self.format {
-            Format::A => self.w,
-            Format::AHex => self.w / 2,
-            _ => 0,
+            Format::A => VarWidth::String(self.w),
+            Format::AHex => VarWidth::String(self.w / 2),
+            _ => VarWidth::Numeric,
         }
     }
 
@@ -437,33 +448,43 @@ impl Spec {
         self.format.var_type()
     }
 
-    pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result<Self, Error> {
-        self.format.check_type_compatibility(variable, self.var_type())?;
-        let expected_width = self.var_width();
-        if w != expected_width {
-            let bad_spec = self;
-            let good_spec = if self.format == Format::A {
-                Spec { w, ..self }
-            } else {
-                Spec { w: w * 2, ..self }
-            };
-            if let Some(variable) = variable {
-                Err(Error::NamedStringVariableBadSpecWidth {
-                    variable: variable.into(),
-                    width: w,
-                    bad_spec,
-                    good_spec,
-                })
-            } else {
-                Err(Error::UnnamedStringVariableBadSpecWidth {
-                    width: w,
-                    bad_spec,
-                    good_spec,
-                })
+    /// Checks whether this format specification is valid for a variable with
+    /// width `var_width`.
+    pub fn check_width_compatibility(
+        self,
+        variable: Option<&str>,
+        var_width: VarWidth,
+    ) -> Result<Self, Error> {
+        // Verify that the format is right for the variable's type.
+        self.format
+            .check_type_compatibility(variable, var_width.into())?;
+
+        if let VarWidth::String(w) = var_width {
+            if var_width != self.var_width() {
+                let bad_spec = self;
+                let good_spec = if self.format == Format::A {
+                    Spec { w, ..self }
+                } else {
+                    Spec { w: w * 2, ..self }
+                };
+                if let Some(variable) = variable {
+                    return Err(Error::NamedStringVariableBadSpecWidth {
+                        variable: variable.into(),
+                        width: w,
+                        bad_spec,
+                        good_spec,
+                    });
+                } else {
+                    return Err(Error::UnnamedStringVariableBadSpecWidth {
+                        width: w,
+                        bad_spec,
+                        good_spec,
+                    });
+                }
             }
-        } else {
-            Ok(self)
         }
+
+        Ok(self)
     }
 }
 
index ab3ad844d9a3cee79b731e00733ebb0a2e0145f3..9fb92f52b07defbb4467e2942d567f1e0a7f53c2 100644 (file)
@@ -326,14 +326,14 @@ impl TryFrom<[u8; 4]> for Magic {
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub enum VarType {
-    Number,
+    Numeric,
     String,
 }
 
 impl VarType {
     fn from_width(width: i32) -> VarType {
         match width {
-            0 => VarType::Number,
+            0 => VarType::Numeric,
             _ => VarType::String,
         }
     }
@@ -506,7 +506,7 @@ impl Value {
     pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
         match var_type {
             VarType::String => Value::String(UnencodedStr(raw)),
-            VarType::Number => {
+            VarType::Numeric => {
                 let number: f64 = endian.parse(raw);
                 Value::Number((number != -f64::MAX).then_some(number))
             }
@@ -567,7 +567,7 @@ impl Value {
                 match code {
                     0 => (),
                     1..=251 => match var_type {
-                        VarType::Number => break Value::Number(Some(code as f64 - bias)),
+                        VarType::Numeric => break Value::Number(Some(code as f64 - bias)),
                         VarType::String => {
                             break Value::String(UnencodedStr(endian.to_bytes(code as f64 - bias)))
                         }
@@ -586,7 +586,7 @@ impl Value {
                     253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
                     254 => match var_type {
                         VarType::String => break Value::String(UnencodedStr(*b"        ")), // XXX EBCDIC
-                        VarType::Number => {
+                        VarType::Numeric => {
                             return Err(Error::CompressedStringExpected {
                                 offset: case_start,
                                 case_ofs: reader.stream_position()? - case_start,
@@ -594,7 +594,7 @@ impl Value {
                         }
                     },
                     255 => match var_type {
-                        VarType::Number => break Value::Number(None),
+                        VarType::Numeric => break Value::Number(None),
                         VarType::String => {
                             return Err(Error::CompressedNumberExpected {
                                 offset: case_start,