From b44494fd38aed720a165fc41a34efb8f095b6132 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 13 Aug 2023 10:25:57 -0700 Subject: [PATCH] work --- rust/src/cooked.rs | 6 +- rust/src/format.rs | 576 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 579 insertions(+), 3 deletions(-) create mode 100644 rust/src/format.rs diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index c84ffd65de..ae87b5abc2 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -6,7 +6,7 @@ use encoding_rs::Encoding; use crate::{ Error, {endian::Endian, CategoryLabels, Compression}, - format::UncheckedFormat, + format::Spec, }; pub struct Decoder { @@ -73,8 +73,8 @@ impl Decode for Header { pub struct Variable { pub width: i32, pub name: String, - pub print_format: UncheckedFormat, - pub write_format: UncheckedFormat, + pub print_format: Spec, + pub write_format: Spec, } #[derive(Clone)] diff --git a/rust/src/format.rs b/rust/src/format.rs new file mode 100644 index 0000000000..81f50ba0ba --- /dev/null +++ b/rust/src/format.rs @@ -0,0 +1,576 @@ +use std::{ + fmt::{Display, Formatter, Result as FmtResult}, + ops::RangeInclusive, +}; + +use thiserror::Error as ThisError; + +use crate::raw::VarType; + +#[derive(ThisError, Debug)] +pub enum Error { + #[error("Unknown format type {value}")] + UnknownFormat { value: u16 }, + + #[error("Output format {0} specifies width {}, but {} requires an even width.", .0.w, .0.format)] + OddWidthNotAllowed(UncheckedSpec), + + #[error("Output format {0} specifies width {}, but {} requires a width between {} and {}.", .0.w, .0.format, .0.format.min_width(), .0.format.max_width())] + BadWidth(UncheckedSpec), + + #[error("Output format {0} specifies decimal places, but {} format does not allow any decimals.", .0.format)] + DecimalsNotAllowedForFormat(UncheckedSpec), + + #[error("Output format {0} specifies {} decimal places, but with a width of {}, {} does not allow any decimal places.", .0.d, .0.w, .0.format)] + DecimalsNotAllowedForWidth(UncheckedSpec), + + #[error("Output format {spec} specifies {} decimal places but, with a width of {}, {} allows at most {max_d} decimal places.", .spec.d, .spec.w, .spec.format)] + TooManyDecimalsForWidth { + spec: UncheckedSpec, + max_d: Decimals, + }, + + #[error("String variable is not compatible with numeric format {0}.")] + UnnamedVariableNotCompatibleWithNumericFormat(Format), + + #[error("Numeric variable is not compatible with string format {0}.")] + UnnamedVariableNotCompatibleWithStringFormat(Format), + + #[error("String variable {variable} is not compatible with numeric format {format}.")] + NamedVariableNotCompatibleWithNumericFormat { variable: String, format: Format }, + + #[error("Numeric variable {variable} is not compatible with string format {format}.")] + NamedVariableNotCompatibleWithStringFormat { variable: String, format: Format }, + + #[error("String variable {variable} with width {width} is not compatible with format {bad_spec}. Use format {good_spec} instead.")] + NamedStringVariableBadSpecWidth { + variable: String, + width: Width, + bad_spec: Spec, + good_spec: Spec, + }, + + #[error("String variable with width {width} is not compatible with format {bad_spec}. Use format {good_spec} instead.")] + UnnamedStringVariableBadSpecWidth { + width: Width, + bad_spec: Spec, + good_spec: Spec, + }, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum Category { + // Numeric formats. + Basic, + Custom, + Legacy, + Binary, + Hex, + Date, + Time, + DateComponent, + + // String formats. + String, +} + +impl From for Category { + fn from(source: Format) -> Self { + match source { + Format::F | Format::Comma | Format::Dot | Format::Dollar | Format::Pct | Format::E => { + Self::Basic + } + Format::CC(_) => Self::Custom, + Format::N | Format::Z => Self::Legacy, + Format::P | Format::PK | Format::IB | Format::PIB | Format::RB => Self::Binary, + Format::PIBHex | Format::RBHex => Self::Hex, + Format::Date + | Format::ADate + | Format::EDate + | Format::JDate + | Format::SDate + | Format::QYr + | Format::MoYr + | Format::WkYr + | Format::DateTime + | Format::YMDHMS => Self::Date, + Format::MTime | Format::Time | Format::DTime => Self::Time, + Format::WkDay | Format::Month => Self::DateComponent, + Format::A | Format::AHex => Self::String, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum CC { + A, + B, + C, + D, + E, +} + +impl Display for CC { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let s = match self { + CC::A => "A", + CC::B => "B", + CC::C => "C", + CC::D => "D", + CC::E => "E", + }; + write!(f, "{}", s) + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum Format { + // Basic numeric formats. + F, + Comma, + Dot, + Dollar, + Pct, + E, + + // Custom currency formats. + CC(CC), + + // Legacy numeric formats. + N, + Z, + + // Binary and hexadecimal formats. + P, + PK, + IB, + PIB, + PIBHex, + RB, + RBHex, + + // Time and date formats. + Date, + ADate, + EDate, + JDate, + SDate, + QYr, + MoYr, + WkYr, + DateTime, + YMDHMS, + MTime, + Time, + DTime, + + // Date component formats. + WkDay, + Month, + + // String formats. + A, + AHex, +} + +pub const MAX_STRING: Width = 32767; + +type Width = u16; +type SignedWidth = i16; + +type Decimals = u8; + +impl Format { + pub fn max_width(self) -> Width { + match self { + Self::P | Self::PK | Self::PIBHex | Self::RBHex => 16, + Self::IB | Self::PIB | Self::RB => 8, + Self::A => MAX_STRING, + Self::AHex => MAX_STRING * 2, + _ => 40, + } + } + + pub fn min_width(self) -> Width { + match self { + // Basic numeric formats. + Self::F => 1, + Self::Comma => 1, + Self::Dot => 1, + Self::Dollar => 2, + Self::Pct => 2, + Self::E => 6, + + // Custom currency formats. + Self::CC(_) => 2, + + // Legacy numeric formats. + Self::N => 1, + Self::Z => 1, + + // Binary and hexadecimal formats. + Self::P => 1, + Self::PK => 1, + Self::IB => 1, + Self::PIB => 1, + Self::PIBHex => 2, + Self::RB => 2, + Self::RBHex => 4, + + // Time and date formats. + Self::Date => 9, + Self::ADate => 8, + Self::EDate => 8, + Self::JDate => 5, + Self::SDate => 8, + Self::QYr => 6, + Self::MoYr => 6, + Self::WkYr => 8, + Self::DateTime => 17, + Self::YMDHMS => 16, + Self::MTime => 5, + Self::Time => 5, + Self::DTime => 8, + + // Date component formats. + Self::WkDay => 2, + Self::Month => 3, + + // String formats. + Self::A => 1, + Self::AHex => 2, + } + } + + pub fn width_range(self) -> RangeInclusive { + self.min_width()..=self.max_width() + } + + pub fn max_decimals(self, width: Width) -> Decimals { + let width = width.clamp(1, 40) as SignedWidth; + let max = match self { + Self::F | Self::Comma | Self::Dot | Self::CC(_) => width - 1, + Self::Dollar | Self::Pct => width - 2, + Self::E => width - 7, + Self::N | Self::Z => width, + Self::P => width * 2 - 1, + Self::PK => width * 2, + Self::IB | Self::PIB => max_digits_for_bytes(width as usize) as SignedWidth, + Self::PIBHex => 0, + Self::RB | Self::RBHex => 16, + Self::Date + | Self::ADate + | Self::EDate + | Self::JDate + | Self::SDate + | Self::QYr + | Self::MoYr + | Self::WkYr => 0, + Self::DateTime => width - 21, + Self::YMDHMS => width - 20, + Self::MTime => width - 6, + Self::Time => width - 9, + Self::DTime => width - 12, + Self::WkDay | Self::Month | Self::A | Self::AHex => 0, + }; + max.clamp(0, 16) as Decimals + } + + pub fn takes_decimals(self) -> bool { + self.max_decimals(Width::MAX) > 0 + } + + pub fn category(self) -> Category { + self.into() + } + + pub fn width_step(self) -> Width { + if self.category() == Category::Hex || self == Self::AHex { + 2 + } else { + 1 + } + } + + pub fn clamp_width(self, width: Width) -> Width { + let (min, max) = self.width_range().into_inner(); + let width = width.clamp(min, max); + if self.width_step() == 2 { + width / 2 * 2 + } else { + width + } + } + + pub fn var_type(self) -> VarType { + match self { + Self::A | Self::AHex => VarType::String, + _ => VarType::Number, + } + } + + pub fn check_type_compatibility( + self, + variable: Option<&str>, + var_type: VarType, + ) -> Result<(), Error> { + let my_type = self.var_type(); + match (my_type, var_type) { + (VarType::Number, VarType::String) => { + if let Some(variable) = variable { + Err(Error::NamedVariableNotCompatibleWithNumericFormat { + variable: variable.into(), + format: self, + }) + } else { + Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self)) + } + } + (VarType::String, VarType::Number) => { + if let Some(variable) = variable { + Err(Error::NamedVariableNotCompatibleWithStringFormat { + variable: variable.into(), + format: self, + }) + } else { + Err(Error::UnnamedVariableNotCompatibleWithStringFormat(self)) + } + } + _ => Ok(()), + } + } +} + +impl Display for Format { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + let s = match self { + Self::F => "F", + Self::Comma => "COMMA", + Self::Dot => "DOT", + Self::Dollar => "DOLLAR", + Self::Pct => "PCT", + Self::E => "E", + Self::CC(cc) => return write!(f, "{}", cc), + Self::N => "N", + Self::Z => "Z", + Self::P => "P", + Self::PK => "PK", + Self::IB => "IB", + Self::PIB => "PIB", + Self::PIBHex => "PIBHEX", + Self::RB => "RB", + Self::RBHex => "RBHEX", + Self::Date => "DATE", + Self::ADate => "ADATE", + Self::EDate => "EDATE", + Self::JDate => "JDATE", + Self::SDate => "SDATE", + Self::QYr => "QYR", + Self::MoYr => "MOYR", + Self::WkYr => "WKYR", + Self::DateTime => "DATETIME", + Self::YMDHMS => "YMDHMS", + Self::MTime => "MTIME", + Self::Time => "TIME", + Self::DTime => "DTIME", + Self::WkDay => "WKDAY", + Self::Month => "MONTH", + Self::A => "A", + Self::AHex => "AHEX", + }; + write!(f, "{}", s) + } +} + +fn max_digits_for_bytes(bytes: usize) -> usize { + *[0, 3, 5, 8, 10, 13, 15, 17].get(bytes).unwrap_or(&20) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct Spec { + format: Format, + w: Width, + d: Decimals, +} + +impl Spec { + pub fn format(self) -> Format { + self.format + } + pub fn w(self) -> Width { + self.w + } + pub fn d(self) -> Decimals { + self.d + } + + pub fn fixed_from(source: &UncheckedSpec) -> Self { + let UncheckedSpec { format, w, d } = *source; + let (min, max) = format.width_range().into_inner(); + let mut w = w.clamp(min, max); + if d <= format.max_decimals(Width::MAX) { + while d > format.max_decimals(w) { + w += 1; + assert!(w <= 40); + } + } + let d = d.clamp(0, format.max_decimals(w)); + Self { format, w, d } + } + + pub fn var_width(self) -> Width { + match self.format { + Format::A => self.w, + Format::AHex => self.w / 2, + _ => 0, + } + } + + pub fn var_type(self) -> VarType { + self.format.var_type() + } + + pub fn check_width_compatibility(self, variable: Option<&str>, w: Width) -> Result<(), Error> { + self.format.check_type_compatibility(variable, self.var_type())?; + let expected_width = self.var_width(); + if w != expected_width { + let bad_spec = self; + let good_spec = if self.format == Format::A { + Spec { w, ..self } + } else { + Spec { w: w * 2, ..self } + }; + if let Some(variable) = variable { + Err(Error::NamedStringVariableBadSpecWidth { + variable: variable.into(), + width: w, + bad_spec, + good_spec, + }) + } else { + Err(Error::UnnamedStringVariableBadSpecWidth { + width: w, + bad_spec, + good_spec, + }) + } + } else { + Ok(()) + } + } +} + +impl Display for Spec { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "{}{}", self.format, self.w)?; + if self.format.takes_decimals() || self.d > 0 { + write!(f, ".{}", self.d)?; + } + Ok(()) + } +} + +impl TryFrom for Spec { + type Error = Error; + + fn try_from(source: UncheckedSpec) -> Result { + let UncheckedSpec { format, w, d } = source; + let max_d = format.max_decimals(w); + if w % format.width_step() != 0 { + Err(Error::OddWidthNotAllowed(source)) + } else if !format.width_range().contains(&w) { + Err(Error::BadWidth(source)) + } else if d > max_d { + if format.takes_decimals() { + Err(Error::DecimalsNotAllowedForFormat(source)) + } else if max_d > 0 { + Err(Error::TooManyDecimalsForWidth { + spec: source, + max_d, + }) + } else { + Err(Error::DecimalsNotAllowedForWidth(source)) + } + } else { + Ok(Spec { format, w, d }) + } + } +} + +impl TryFrom for Format { + type Error = Error; + + fn try_from(source: u16) -> Result { + match source { + 1 => Ok(Self::A), + 2 => Ok(Self::AHex), + 3 => Ok(Self::Comma), + 4 => Ok(Self::Dollar), + 5 => Ok(Self::F), + 6 => Ok(Self::IB), + 7 => Ok(Self::PIBHex), + 8 => Ok(Self::P), + 9 => Ok(Self::PIB), + 10 => Ok(Self::PK), + 11 => Ok(Self::RB), + 12 => Ok(Self::RBHex), + 15 => Ok(Self::Z), + 16 => Ok(Self::N), + 17 => Ok(Self::E), + 20 => Ok(Self::Date), + 21 => Ok(Self::Time), + 22 => Ok(Self::DateTime), + 23 => Ok(Self::ADate), + 24 => Ok(Self::JDate), + 25 => Ok(Self::DTime), + 26 => Ok(Self::WkDay), + 27 => Ok(Self::Month), + 28 => Ok(Self::MoYr), + 29 => Ok(Self::QYr), + 30 => Ok(Self::WkYr), + 31 => Ok(Self::Pct), + 32 => Ok(Self::Dot), + 33 => Ok(Self::CC(CC::A)), + 34 => Ok(Self::CC(CC::B)), + 35 => Ok(Self::CC(CC::C)), + 36 => Ok(Self::CC(CC::D)), + 37 => Ok(Self::CC(CC::E)), + 38 => Ok(Self::EDate), + 39 => Ok(Self::SDate), + 40 => Ok(Self::MTime), + 41 => Ok(Self::YMDHMS), + _ => Err(Error::UnknownFormat { value: source }), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct UncheckedSpec { + pub format: Format, + + pub w: Width, + + pub d: Decimals, +} + +impl TryFrom for UncheckedSpec { + type Error = Error; + + fn try_from(source: u32) -> Result { + let raw_format = (source >> 16) as u16; + let format = raw_format.try_into()?; + let w = ((source >> 8) & 0xff) as Width; + let d = (source & 0xff) as Decimals; + Ok(Self { format, w, d }) + } +} + +impl Display for UncheckedSpec { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "{}{}", self.format, self.w)?; + if self.format.takes_decimals() || self.d > 0 { + write!(f, ".{}", self.d)?; + } + Ok(()) + } +} -- 2.30.2