From 1420e0cba5d2dd48eb992aa48e360b87940ab891 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 25 Mar 2025 18:55:14 -0700 Subject: [PATCH] work --- rust/pspp/src/dictionary.rs | 2 +- rust/pspp/src/format/mod.rs | 76 +++++++++++++++++++++++------------ rust/pspp/src/format/parse.rs | 63 +++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 26 deletions(-) create mode 100644 rust/pspp/src/format/parse.rs diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 3cc252692c..1e0e57e70e 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -222,7 +222,7 @@ impl Hash for Value { } impl Value { - pub fn sysmis() -> Self { + pub const fn sysmis() -> Self { Self::Number(None) } diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index 70cebcc78b..5a345f7cfc 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -1,4 +1,5 @@ use std::{ + borrow::Cow, fmt::{Display, Formatter, Result as FmtResult}, ops::{Not, RangeInclusive}, str::{Chars, FromStr}, @@ -12,11 +13,12 @@ use thiserror::Error as ThisError; use unicode_width::UnicodeWidthStr; use crate::{ - dictionary::VarWidth, + dictionary::{Value, VarWidth}, raw::{self, VarType}, }; mod display; +mod parse; pub use display::DisplayValue; #[derive(ThisError, Debug)] @@ -614,6 +616,13 @@ impl Format { Ok(self) } + + pub fn default_value(&self) -> Value { + match self.var_width() { + VarWidth::Numeric => Value::sysmis(), + VarWidth::String(width) => Value::String((0..width).map(|_| 0u8).collect()), + } + } } impl Display for Format { @@ -849,7 +858,7 @@ impl StyleSet { fn new(f: impl Fn(StyleParams) -> NumberStyle) -> Self { Self(EnumMap::from_fn(f)) } - fn get(&self, settings: &Settings) -> &NumberStyle { + const fn get(&self, settings: &Settings) -> &NumberStyle { &self.0[settings.into()] } } @@ -866,20 +875,21 @@ impl Settings { } } fn number_style(&self, type_: Type) -> &NumberStyle { - static DEFAULT: LazyLock = - LazyLock::new(|| NumberStyle::new("", "", Decimal::Dot, None, false)); + static DEFAULT: NumberStyle = NumberStyle::new_static("", "", Decimal::Dot, None, false); match type_ { Type::F | Type::E => { static F: LazyLock = LazyLock::new(|| { - StyleSet::new(|p| NumberStyle::new("", "", p.decimal, None, p.leading_zero)) + StyleSet::new(|p| { + NumberStyle::new_static("", "", p.decimal, None, p.leading_zero) + }) }); &F.get(self) } Type::Comma => { static COMMA: LazyLock = LazyLock::new(|| { StyleSet::new(|p| { - NumberStyle::new("", "", p.decimal, Some(!p.decimal), p.leading_zero) + NumberStyle::new_static("", "", p.decimal, Some(!p.decimal), p.leading_zero) }) }); &COMMA.get(self) @@ -887,20 +897,22 @@ impl Settings { Type::Dot => { static DOT: LazyLock = LazyLock::new(|| { StyleSet::new(|p| { - NumberStyle::new("", "", !p.decimal, Some(p.decimal), p.leading_zero) + NumberStyle::new_static("", "", !p.decimal, Some(p.decimal), p.leading_zero) }) }); &DOT.get(self) } Type::Dollar => { static DOLLAR: LazyLock = LazyLock::new(|| { - StyleSet::new(|p| NumberStyle::new("$", "", p.decimal, Some(!p.decimal), false)) + StyleSet::new(|p| { + NumberStyle::new_static("$", "", p.decimal, Some(!p.decimal), false) + }) }); &DOLLAR.get(self) } Type::Pct => { static PCT: LazyLock = LazyLock::new(|| { - StyleSet::new(|p| NumberStyle::new("", "%", p.decimal, None, false)) + StyleSet::new(|p| NumberStyle::new_static("", "%", p.decimal, None, false)) }); &PCT.get(self) } @@ -964,26 +976,31 @@ pub struct NumberStyle { } impl NumberStyle { - fn new( - prefix: &str, - suffix: &str, + const fn new_static( + prefix: &'static str, + suffix: &'static str, decimal: Decimal, grouping: Option, leading_zero: bool, ) -> Self { - // These assertions ensure that zero is correct for `extra_bytes`. - debug_assert!(prefix.is_ascii()); - debug_assert!(suffix.is_ascii()); + let neg_prefix = Affix::new_static("-"); + let prefix = Affix::new_static(prefix); + let suffix = Affix::new_static(suffix); + let neg_suffix = Affix::new_static(""); + let extra_bytes = neg_prefix.extra_bytes() + + prefix.extra_bytes() + + suffix.extra_bytes() + + neg_suffix.extra_bytes(); Self { - neg_prefix: Affix::new("-"), - prefix: Affix::new(prefix), - suffix: Affix::new(suffix), - neg_suffix: Affix::new(""), + neg_prefix, + prefix, + suffix, + neg_suffix, decimal, grouping, leading_zero, - extra_bytes: 0, + extra_bytes, } } @@ -995,22 +1012,31 @@ impl NumberStyle { #[derive(Clone, Debug)] pub struct Affix { /// String contents of affix. - pub s: String, + pub s: Cow<'static, str>, /// Display width in columns (see [unicode_width]) pub width: usize, } impl Affix { - fn new(s: impl Into) -> Self { - let s = s.into(); + pub const fn new_static(s: &'static str) -> Self { + // [UnicodeWidthStr::width] is non-const, so we use `s.len()` instead, + // which is valid if `s` is ASCII. + assert!(s.is_ascii()); + Self { + width: s.len(), + s: Cow::Borrowed(s), + } + } + + fn new(s: String) -> Self { Self { width: s.width(), - s, + s: Cow::from(s), } } - fn extra_bytes(&self) -> usize { + const fn extra_bytes(&self) -> usize { self.s.len().checked_sub(self.width).unwrap() } } diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs new file mode 100644 index 0000000000..d9685da96f --- /dev/null +++ b/rust/pspp/src/format/parse.rs @@ -0,0 +1,63 @@ +use crate::{ + dictionary::Value, + format::{Format, NumberStyle}, +}; +use encoding_rs::Encoding; +use thiserror::Error as ThisError; + +#[derive(Clone, Debug, ThisError)] +enum ParseError {} + +impl Format { + /// Parses `s` as this format. For string formats, `encoding` specifies the + /// output encoding. + fn parse(&self, s: &str, encoding: &'static Encoding) -> Result { + if s.is_empty() { + return Ok(self.default_value()); + } + match self.type_ { + crate::format::Type::F + | crate::format::Type::Comma + | crate::format::Type::Dot + | crate::format::Type::Dollar + | crate::format::Type::Pct + | crate::format::Type::E + | crate::format::Type::CC(_) => self.parse_number(s), + crate::format::Type::N => todo!(), + crate::format::Type::Z => todo!(), + crate::format::Type::P => todo!(), + crate::format::Type::PK => todo!(), + crate::format::Type::IB => todo!(), + crate::format::Type::PIB => todo!(), + crate::format::Type::PIBHex => todo!(), + crate::format::Type::RB => todo!(), + crate::format::Type::RBHex => todo!(), + crate::format::Type::Date => todo!(), + crate::format::Type::ADate => todo!(), + crate::format::Type::EDate => todo!(), + crate::format::Type::JDate => todo!(), + crate::format::Type::SDate => todo!(), + crate::format::Type::QYr => todo!(), + crate::format::Type::MoYr => todo!(), + crate::format::Type::WkYr => todo!(), + crate::format::Type::DateTime => todo!(), + crate::format::Type::YMDHMS => todo!(), + crate::format::Type::MTime => todo!(), + crate::format::Type::Time => todo!(), + crate::format::Type::DTime => todo!(), + crate::format::Type::WkDay => todo!(), + crate::format::Type::Month => todo!(), + crate::format::Type::A => todo!(), + crate::format::Type::AHex => todo!(), + } + } + + fn parse_number(&self, s: &str) -> Result { + let s = s.trim(); + if s.is_empty() || s == "." { + return Ok(Value::sysmis()); + } + //let style = NumberStyle + todo!() + } +} -- 2.30.2