work
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 13 Aug 2023 17:25:57 +0000 (10:25 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 13 Aug 2023 17:25:57 +0000 (10:25 -0700)
rust/src/cooked.rs
rust/src/format.rs [new file with mode: 0644]

index c84ffd65de177cc97b091db6e059aeac7c33777a..ae87b5abc225673c496ca28ddda14c470fc7e326 100644 (file)
@@ -6,7 +6,7 @@ use encoding_rs::Encoding;
 use crate::{
     Error,
     {endian::Endian, CategoryLabels, Compression},
-    format::UncheckedFormat,
+    format::Spec,
 };
 
 pub struct Decoder {
@@ -73,8 +73,8 @@ impl Decode for Header {
 pub struct Variable {
     pub width: i32,
     pub name: String,
-    pub print_format: UncheckedFormat,
-    pub write_format: UncheckedFormat,
+    pub print_format: Spec,
+    pub write_format: Spec,
 }
 
 #[derive(Clone)]
diff --git a/rust/src/format.rs b/rust/src/format.rs
new file mode 100644 (file)
index 0000000..81f50ba
--- /dev/null
@@ -0,0 +1,576 @@
+use std::{
+    fmt::{Display, Formatter, Result as FmtResult},
+    ops::RangeInclusive,
+};
+
+use thiserror::Error as ThisError;
+
+use crate::raw::VarType;
+
+#[derive(ThisError, Debug)]
+pub enum Error {
+    #[error("Unknown format type {value}")]
+    UnknownFormat { value: u16 },
+
+    #[error("Output format {0} specifies width {}, but {} requires an even width.", .0.w, .0.format)]
+    OddWidthNotAllowed(UncheckedSpec),
+
+    #[error("Output format {0} specifies width {}, but {} requires a width between {} and {}.", .0.w, .0.format, .0.format.min_width(), .0.format.max_width())]
+    BadWidth(UncheckedSpec),
+
+    #[error("Output format {0} specifies decimal places, but {} format does not allow any decimals.", .0.format)]
+    DecimalsNotAllowedForFormat(UncheckedSpec),
+
+    #[error("Output format {0} specifies {} decimal places, but with a width of {}, {} does not allow any decimal places.", .0.d, .0.w, .0.format)]
+    DecimalsNotAllowedForWidth(UncheckedSpec),
+
+    #[error("Output format {spec} specifies {} decimal places but, with a width of {}, {} allows at most {max_d} decimal places.", .spec.d, .spec.w, .spec.format)]
+    TooManyDecimalsForWidth {
+        spec: UncheckedSpec,
+        max_d: Decimals,
+    },
+
+    #[error("String variable is not compatible with numeric format {0}.")]
+    UnnamedVariableNotCompatibleWithNumericFormat(Format),
+
+    #[error("Numeric variable is not compatible with string format {0}.")]
+    UnnamedVariableNotCompatibleWithStringFormat(Format),
+
+    #[error("String variable {variable} is not compatible with numeric format {format}.")]
+    NamedVariableNotCompatibleWithNumericFormat { variable: String, format: Format },
+
+    #[error("Numeric variable {variable} is not compatible with string format {format}.")]
+    NamedVariableNotCompatibleWithStringFormat { variable: String, format: Format },
+
+    #[error("String variable {variable} with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
+    NamedStringVariableBadSpecWidth {
+        variable: String,
+        width: Width,
+        bad_spec: Spec,
+        good_spec: Spec,
+    },
+
+    #[error("String variable with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
+    UnnamedStringVariableBadSpecWidth {
+        width: Width,
+        bad_spec: Spec,
+        good_spec: Spec,
+    },
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum Category {
+    // Numeric formats.
+    Basic,
+    Custom,
+    Legacy,
+    Binary,
+    Hex,
+    Date,
+    Time,
+    DateComponent,
+
+    // String formats.
+    String,
+}
+
+impl From<Format> for Category {
+    fn from(source: Format) -> Self {
+        match source {
+            Format::F | Format::Comma | Format::Dot | Format::Dollar | Format::Pct | Format::E => {
+                Self::Basic
+            }
+            Format::CC(_) => Self::Custom,
+            Format::N | Format::Z => Self::Legacy,
+            Format::P | Format::PK | Format::IB | Format::PIB | Format::RB => Self::Binary,
+            Format::PIBHex | Format::RBHex => Self::Hex,
+            Format::Date
+            | Format::ADate
+            | Format::EDate
+            | Format::JDate
+            | Format::SDate
+            | Format::QYr
+            | Format::MoYr
+            | Format::WkYr
+            | Format::DateTime
+            | Format::YMDHMS => Self::Date,
+            Format::MTime | Format::Time | Format::DTime => Self::Time,
+            Format::WkDay | Format::Month => Self::DateComponent,
+            Format::A | Format::AHex => Self::String,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum CC {
+    A,
+    B,
+    C,
+    D,
+    E,
+}
+
+impl Display for CC {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let s = match self {
+            CC::A => "A",
+            CC::B => "B",
+            CC::C => "C",
+            CC::D => "D",
+            CC::E => "E",
+        };
+        write!(f, "{}", s)
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum Format {
+    // Basic numeric formats.
+    F,
+    Comma,
+    Dot,
+    Dollar,
+    Pct,
+    E,
+
+    // Custom currency formats.
+    CC(CC),
+
+    // Legacy numeric formats.
+    N,
+    Z,
+
+    // Binary and hexadecimal formats.
+    P,
+    PK,
+    IB,
+    PIB,
+    PIBHex,
+    RB,
+    RBHex,
+
+    // Time and date formats.
+    Date,
+    ADate,
+    EDate,
+    JDate,
+    SDate,
+    QYr,
+    MoYr,
+    WkYr,
+    DateTime,
+    YMDHMS,
+    MTime,
+    Time,
+    DTime,
+
+    // Date component formats.
+    WkDay,
+    Month,
+
+    // String formats.
+    A,
+    AHex,
+}
+
+pub const MAX_STRING: Width = 32767;
+
+type Width = u16;
+type SignedWidth = i16;
+
+type Decimals = u8;
+
+impl Format {
+    pub fn max_width(self) -> Width {
+        match self {
+            Self::P | Self::PK | Self::PIBHex | Self::RBHex => 16,
+            Self::IB | Self::PIB | Self::RB => 8,
+            Self::A => MAX_STRING,
+            Self::AHex => MAX_STRING * 2,
+            _ => 40,
+        }
+    }
+
+    pub fn min_width(self) -> Width {
+        match self {
+            // Basic numeric formats.
+            Self::F => 1,
+            Self::Comma => 1,
+            Self::Dot => 1,
+            Self::Dollar => 2,
+            Self::Pct => 2,
+            Self::E => 6,
+
+            // Custom currency formats.
+            Self::CC(_) => 2,
+
+            // Legacy numeric formats.
+            Self::N => 1,
+            Self::Z => 1,
+
+            // Binary and hexadecimal formats.
+            Self::P => 1,
+            Self::PK => 1,
+            Self::IB => 1,
+            Self::PIB => 1,
+            Self::PIBHex => 2,
+            Self::RB => 2,
+            Self::RBHex => 4,
+
+            // Time and date formats.
+            Self::Date => 9,
+            Self::ADate => 8,
+            Self::EDate => 8,
+            Self::JDate => 5,
+            Self::SDate => 8,
+            Self::QYr => 6,
+            Self::MoYr => 6,
+            Self::WkYr => 8,
+            Self::DateTime => 17,
+            Self::YMDHMS => 16,
+            Self::MTime => 5,
+            Self::Time => 5,
+            Self::DTime => 8,
+
+            // Date component formats.
+            Self::WkDay => 2,
+            Self::Month => 3,
+
+            // String formats.
+            Self::A => 1,
+            Self::AHex => 2,
+        }
+    }
+
+    pub fn width_range(self) -> RangeInclusive<Width> {
+        self.min_width()..=self.max_width()
+    }
+
+    pub fn max_decimals(self, width: Width) -> Decimals {
+        let width = width.clamp(1, 40) as SignedWidth;
+        let max = match self {
+            Self::F | Self::Comma | Self::Dot | Self::CC(_) => width - 1,
+            Self::Dollar | Self::Pct => width - 2,
+            Self::E => width - 7,
+            Self::N | Self::Z => width,
+            Self::P => width * 2 - 1,
+            Self::PK => width * 2,
+            Self::IB | Self::PIB => max_digits_for_bytes(width as usize) as SignedWidth,
+            Self::PIBHex => 0,
+            Self::RB | Self::RBHex => 16,
+            Self::Date
+            | Self::ADate
+            | Self::EDate
+            | Self::JDate
+            | Self::SDate
+            | Self::QYr
+            | Self::MoYr
+            | Self::WkYr => 0,
+            Self::DateTime => width - 21,
+            Self::YMDHMS => width - 20,
+            Self::MTime => width - 6,
+            Self::Time => width - 9,
+            Self::DTime => width - 12,
+            Self::WkDay | Self::Month | Self::A | Self::AHex => 0,
+        };
+        max.clamp(0, 16) as Decimals
+    }
+
+    pub fn takes_decimals(self) -> bool {
+        self.max_decimals(Width::MAX) > 0
+    }
+
+    pub fn category(self) -> Category {
+        self.into()
+    }
+
+    pub fn width_step(self) -> Width {
+        if self.category() == Category::Hex || self == Self::AHex {
+            2
+        } else {
+            1
+        }
+    }
+
+    pub fn clamp_width(self, width: Width) -> Width {
+        let (min, max) = self.width_range().into_inner();
+        let width = width.clamp(min, max);
+        if self.width_step() == 2 {
+            width / 2 * 2
+        } else {
+            width
+        }
+    }
+
+    pub fn var_type(self) -> VarType {
+        match self {
+            Self::A | Self::AHex => VarType::String,
+            _ => VarType::Number,
+        }
+    }
+
+    pub fn check_type_compatibility(
+        self,
+        variable: Option<&str>,
+        var_type: VarType,
+    ) -> Result<(), Error> {
+        let my_type = self.var_type();
+        match (my_type, var_type) {
+            (VarType::Number, VarType::String) => {
+                if let Some(variable) = variable {
+                    Err(Error::NamedVariableNotCompatibleWithNumericFormat {
+                        variable: variable.into(),
+                        format: self,
+                    })
+                } else {
+                    Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self))
+                }
+            }
+            (VarType::String, VarType::Number) => {
+                if let Some(variable) = variable {
+                    Err(Error::NamedVariableNotCompatibleWithStringFormat {
+                        variable: variable.into(),
+                        format: self,
+                    })
+                } else {
+                    Err(Error::UnnamedVariableNotCompatibleWithStringFormat(self))
+                }
+            }
+            _ => Ok(()),
+        }
+    }
+}
+
+impl Display for Format {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let s = match self {
+            Self::F => "F",
+            Self::Comma => "COMMA",
+            Self::Dot => "DOT",
+            Self::Dollar => "DOLLAR",
+            Self::Pct => "PCT",
+            Self::E => "E",
+            Self::CC(cc) => return write!(f, "{}", cc),
+            Self::N => "N",
+            Self::Z => "Z",
+            Self::P => "P",
+            Self::PK => "PK",
+            Self::IB => "IB",
+            Self::PIB => "PIB",
+            Self::PIBHex => "PIBHEX",
+            Self::RB => "RB",
+            Self::RBHex => "RBHEX",
+            Self::Date => "DATE",
+            Self::ADate => "ADATE",
+            Self::EDate => "EDATE",
+            Self::JDate => "JDATE",
+            Self::SDate => "SDATE",
+            Self::QYr => "QYR",
+            Self::MoYr => "MOYR",
+            Self::WkYr => "WKYR",
+            Self::DateTime => "DATETIME",
+            Self::YMDHMS => "YMDHMS",
+            Self::MTime => "MTIME",
+            Self::Time => "TIME",
+            Self::DTime => "DTIME",
+            Self::WkDay => "WKDAY",
+            Self::Month => "MONTH",
+            Self::A => "A",
+            Self::AHex => "AHEX",
+        };
+        write!(f, "{}", s)
+    }
+}
+
+fn max_digits_for_bytes(bytes: usize) -> usize {
+    *[0, 3, 5, 8, 10, 13, 15, 17].get(bytes).unwrap_or(&20)
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub struct Spec {
+    format: Format,
+    w: Width,
+    d: Decimals,
+}
+
+impl Spec {
+    pub fn format(self) -> Format {
+        self.format
+    }
+    pub fn w(self) -> Width {
+        self.w
+    }
+    pub fn d(self) -> Decimals {
+        self.d
+    }
+
+    pub fn fixed_from(source: &UncheckedSpec) -> Self {
+        let UncheckedSpec { format, w, d } = *source;
+        let (min, max) = format.width_range().into_inner();
+        let mut w = w.clamp(min, max);
+        if d <= format.max_decimals(Width::MAX) {
+            while d > format.max_decimals(w) {
+                w += 1;
+                assert!(w <= 40);
+            }
+        }
+        let d = d.clamp(0, format.max_decimals(w));
+        Self { format, w, d }
+    }
+
+    pub fn var_width(self) -> Width {
+        match self.format {
+            Format::A => self.w,
+            Format::AHex => self.w / 2,
+            _ => 0,
+        }
+    }
+
+    pub fn var_type(self) -> VarType {
+        self.format.var_type()
+    }
+
+    pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result<(), Error> {
+        self.format.check_type_compatibility(variable, self.var_type())?;
+        let expected_width = self.var_width();
+        if w != expected_width {
+            let bad_spec = self;
+            let good_spec = if self.format == Format::A {
+                Spec { w, ..self }
+            } else {
+                Spec { w: w * 2, ..self }
+            };
+            if let Some(variable) = variable {
+                Err(Error::NamedStringVariableBadSpecWidth {
+                    variable: variable.into(),
+                    width: w,
+                    bad_spec,
+                    good_spec,
+                })
+            } else {
+                Err(Error::UnnamedStringVariableBadSpecWidth {
+                    width: w,
+                    bad_spec,
+                    good_spec,
+                })
+            }
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl Display for Spec {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}{}", self.format, self.w)?;
+        if self.format.takes_decimals() || self.d > 0 {
+            write!(f, ".{}", self.d)?;
+        }
+        Ok(())
+    }
+}
+
+impl TryFrom<UncheckedSpec> for Spec {
+    type Error = Error;
+
+    fn try_from(source: UncheckedSpec) -> Result<Self, Self::Error> {
+        let UncheckedSpec { format, w, d } = source;
+        let max_d = format.max_decimals(w);
+        if w % format.width_step() != 0 {
+            Err(Error::OddWidthNotAllowed(source))
+        } else if !format.width_range().contains(&w) {
+            Err(Error::BadWidth(source))
+        } else if d > max_d {
+            if format.takes_decimals() {
+                Err(Error::DecimalsNotAllowedForFormat(source))
+            } else if max_d > 0 {
+                Err(Error::TooManyDecimalsForWidth {
+                    spec: source,
+                    max_d,
+                })
+            } else {
+                Err(Error::DecimalsNotAllowedForWidth(source))
+            }
+        } else {
+            Ok(Spec { format, w, d })
+        }
+    }
+}
+
+impl TryFrom<u16> for Format {
+    type Error = Error;
+
+    fn try_from(source: u16) -> Result<Self, Self::Error> {
+        match source {
+            1 => Ok(Self::A),
+            2 => Ok(Self::AHex),
+            3 => Ok(Self::Comma),
+            4 => Ok(Self::Dollar),
+            5 => Ok(Self::F),
+            6 => Ok(Self::IB),
+            7 => Ok(Self::PIBHex),
+            8 => Ok(Self::P),
+            9 => Ok(Self::PIB),
+            10 => Ok(Self::PK),
+            11 => Ok(Self::RB),
+            12 => Ok(Self::RBHex),
+            15 => Ok(Self::Z),
+            16 => Ok(Self::N),
+            17 => Ok(Self::E),
+            20 => Ok(Self::Date),
+            21 => Ok(Self::Time),
+            22 => Ok(Self::DateTime),
+            23 => Ok(Self::ADate),
+            24 => Ok(Self::JDate),
+            25 => Ok(Self::DTime),
+            26 => Ok(Self::WkDay),
+            27 => Ok(Self::Month),
+            28 => Ok(Self::MoYr),
+            29 => Ok(Self::QYr),
+            30 => Ok(Self::WkYr),
+            31 => Ok(Self::Pct),
+            32 => Ok(Self::Dot),
+            33 => Ok(Self::CC(CC::A)),
+            34 => Ok(Self::CC(CC::B)),
+            35 => Ok(Self::CC(CC::C)),
+            36 => Ok(Self::CC(CC::D)),
+            37 => Ok(Self::CC(CC::E)),
+            38 => Ok(Self::EDate),
+            39 => Ok(Self::SDate),
+            40 => Ok(Self::MTime),
+            41 => Ok(Self::YMDHMS),
+            _ => Err(Error::UnknownFormat { value: source }),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub struct UncheckedSpec {
+    pub format: Format,
+
+    pub w: Width,
+
+    pub d: Decimals,
+}
+
+impl TryFrom<u32> for UncheckedSpec {
+    type Error = Error;
+
+    fn try_from(source: u32) -> Result<Self, Self::Error> {
+        let raw_format = (source >> 16) as u16;
+        let format = raw_format.try_into()?;
+        let w = ((source >> 8) & 0xff) as Width;
+        let d = (source & 0xff) as Decimals;
+        Ok(Self { format, w, d })
+    }
+}
+
+impl Display for UncheckedSpec {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}{}", self.format, self.w)?;
+        if self.format.takes_decimals() || self.d > 0 {
+            write!(f, ".{}", self.d)?;
+        }
+        Ok(())
+    }
+}