work
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 26 Mar 2025 01:55:14 +0000 (18:55 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 26 Mar 2025 01:55:14 +0000 (18:55 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/format/mod.rs
rust/pspp/src/format/parse.rs [new file with mode: 0644]

index 3cc252692ced66bc2d801312d24ec1a715727f6f..1e0e57e70e857c2b4256038622a72339c67bdf18 100644 (file)
@@ -222,7 +222,7 @@ impl Hash for Value {
 }
 
 impl Value {
-    pub fn sysmis() -> Self {
+    pub const fn sysmis() -> Self {
         Self::Number(None)
     }
 
index 70cebcc78bae2cf2e4b72d7bec85cc314437bdab..5a345f7cfc15a1b69377fbee54338e63c598f26d 100644 (file)
@@ -1,4 +1,5 @@
 use std::{
+    borrow::Cow,
     fmt::{Display, Formatter, Result as FmtResult},
     ops::{Not, RangeInclusive},
     str::{Chars, FromStr},
@@ -12,11 +13,12 @@ use thiserror::Error as ThisError;
 use unicode_width::UnicodeWidthStr;
 
 use crate::{
-    dictionary::VarWidth,
+    dictionary::{Value, VarWidth},
     raw::{self, VarType},
 };
 
 mod display;
+mod parse;
 pub use display::DisplayValue;
 
 #[derive(ThisError, Debug)]
@@ -614,6 +616,13 @@ impl Format {
 
         Ok(self)
     }
+
+    pub fn default_value(&self) -> Value {
+        match self.var_width() {
+            VarWidth::Numeric => Value::sysmis(),
+            VarWidth::String(width) => Value::String((0..width).map(|_| 0u8).collect()),
+        }
+    }
 }
 
 impl Display for Format {
@@ -849,7 +858,7 @@ impl StyleSet {
     fn new(f: impl Fn(StyleParams) -> NumberStyle) -> Self {
         Self(EnumMap::from_fn(f))
     }
-    fn get(&self, settings: &Settings) -> &NumberStyle {
+    const fn get(&self, settings: &Settings) -> &NumberStyle {
         &self.0[settings.into()]
     }
 }
@@ -866,20 +875,21 @@ impl Settings {
         }
     }
     fn number_style(&self, type_: Type) -> &NumberStyle {
-        static DEFAULT: LazyLock<NumberStyle> =
-            LazyLock::new(|| NumberStyle::new("", "", Decimal::Dot, None, false));
+        static DEFAULT: NumberStyle = NumberStyle::new_static("", "", Decimal::Dot, None, false);
 
         match type_ {
             Type::F | Type::E => {
                 static F: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| NumberStyle::new("", "", p.decimal, None, p.leading_zero))
+                    StyleSet::new(|p| {
+                        NumberStyle::new_static("", "", p.decimal, None, p.leading_zero)
+                    })
                 });
                 &F.get(self)
             }
             Type::Comma => {
                 static COMMA: LazyLock<StyleSet> = LazyLock::new(|| {
                     StyleSet::new(|p| {
-                        NumberStyle::new("", "", p.decimal, Some(!p.decimal), p.leading_zero)
+                        NumberStyle::new_static("", "", p.decimal, Some(!p.decimal), p.leading_zero)
                     })
                 });
                 &COMMA.get(self)
@@ -887,20 +897,22 @@ impl Settings {
             Type::Dot => {
                 static DOT: LazyLock<StyleSet> = LazyLock::new(|| {
                     StyleSet::new(|p| {
-                        NumberStyle::new("", "", !p.decimal, Some(p.decimal), p.leading_zero)
+                        NumberStyle::new_static("", "", !p.decimal, Some(p.decimal), p.leading_zero)
                     })
                 });
                 &DOT.get(self)
             }
             Type::Dollar => {
                 static DOLLAR: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| NumberStyle::new("$", "", p.decimal, Some(!p.decimal), false))
+                    StyleSet::new(|p| {
+                        NumberStyle::new_static("$", "", p.decimal, Some(!p.decimal), false)
+                    })
                 });
                 &DOLLAR.get(self)
             }
             Type::Pct => {
                 static PCT: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| NumberStyle::new("", "%", p.decimal, None, false))
+                    StyleSet::new(|p| NumberStyle::new_static("", "%", p.decimal, None, false))
                 });
                 &PCT.get(self)
             }
@@ -964,26 +976,31 @@ pub struct NumberStyle {
 }
 
 impl NumberStyle {
-    fn new(
-        prefix: &str,
-        suffix: &str,
+    const fn new_static(
+        prefix: &'static str,
+        suffix: &'static str,
         decimal: Decimal,
         grouping: Option<Decimal>,
         leading_zero: bool,
     ) -> Self {
-        // These assertions ensure that zero is correct for `extra_bytes`.
-        debug_assert!(prefix.is_ascii());
-        debug_assert!(suffix.is_ascii());
+        let neg_prefix = Affix::new_static("-");
+        let prefix = Affix::new_static(prefix);
+        let suffix = Affix::new_static(suffix);
+        let neg_suffix = Affix::new_static("");
+        let extra_bytes = neg_prefix.extra_bytes()
+            + prefix.extra_bytes()
+            + suffix.extra_bytes()
+            + neg_suffix.extra_bytes();
 
         Self {
-            neg_prefix: Affix::new("-"),
-            prefix: Affix::new(prefix),
-            suffix: Affix::new(suffix),
-            neg_suffix: Affix::new(""),
+            neg_prefix,
+            prefix,
+            suffix,
+            neg_suffix,
             decimal,
             grouping,
             leading_zero,
-            extra_bytes: 0,
+            extra_bytes,
         }
     }
 
@@ -995,22 +1012,31 @@ impl NumberStyle {
 #[derive(Clone, Debug)]
 pub struct Affix {
     /// String contents of affix.
-    pub s: String,
+    pub s: Cow<'static, str>,
 
     /// Display width in columns (see [unicode_width])
     pub width: usize,
 }
 
 impl Affix {
-    fn new(s: impl Into<String>) -> Self {
-        let s = s.into();
+    pub const fn new_static(s: &'static str) -> Self {
+        // [UnicodeWidthStr::width] is non-const, so we use `s.len()` instead,
+        // which is valid if `s` is ASCII.
+        assert!(s.is_ascii());
+        Self {
+            width: s.len(),
+            s: Cow::Borrowed(s),
+        }
+    }
+
+    fn new(s: String) -> Self {
         Self {
             width: s.width(),
-            s,
+            s: Cow::from(s),
         }
     }
 
-    fn extra_bytes(&self) -> usize {
+    const fn extra_bytes(&self) -> usize {
         self.s.len().checked_sub(self.width).unwrap()
     }
 }
diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs
new file mode 100644 (file)
index 0000000..d9685da
--- /dev/null
@@ -0,0 +1,63 @@
+use crate::{
+    dictionary::Value,
+    format::{Format, NumberStyle},
+};
+use encoding_rs::Encoding;
+use thiserror::Error as ThisError;
+
+#[derive(Clone, Debug, ThisError)]
+enum ParseError {}
+
+impl Format {
+    /// Parses `s` as this format. For string formats, `encoding` specifies the
+    /// output encoding.
+    fn parse(&self, s: &str, encoding: &'static Encoding) -> Result<Value, ParseError> {
+        if s.is_empty() {
+            return Ok(self.default_value());
+        }
+        match self.type_ {
+            crate::format::Type::F
+            | crate::format::Type::Comma
+            | crate::format::Type::Dot
+            | crate::format::Type::Dollar
+            | crate::format::Type::Pct
+            | crate::format::Type::E
+            | crate::format::Type::CC(_) => self.parse_number(s),
+            crate::format::Type::N => todo!(),
+            crate::format::Type::Z => todo!(),
+            crate::format::Type::P => todo!(),
+            crate::format::Type::PK => todo!(),
+            crate::format::Type::IB => todo!(),
+            crate::format::Type::PIB => todo!(),
+            crate::format::Type::PIBHex => todo!(),
+            crate::format::Type::RB => todo!(),
+            crate::format::Type::RBHex => todo!(),
+            crate::format::Type::Date => todo!(),
+            crate::format::Type::ADate => todo!(),
+            crate::format::Type::EDate => todo!(),
+            crate::format::Type::JDate => todo!(),
+            crate::format::Type::SDate => todo!(),
+            crate::format::Type::QYr => todo!(),
+            crate::format::Type::MoYr => todo!(),
+            crate::format::Type::WkYr => todo!(),
+            crate::format::Type::DateTime => todo!(),
+            crate::format::Type::YMDHMS => todo!(),
+            crate::format::Type::MTime => todo!(),
+            crate::format::Type::Time => todo!(),
+            crate::format::Type::DTime => todo!(),
+            crate::format::Type::WkDay => todo!(),
+            crate::format::Type::Month => todo!(),
+            crate::format::Type::A => todo!(),
+            crate::format::Type::AHex => todo!(),
+        }
+    }
+
+    fn parse_number(&self, s: &str) -> Result<Value, ParseError> {
+        let s = s.trim();
+        if s.is_empty() || s == "." {
+            return Ok(Value::sysmis());
+        }
+        //let style = NumberStyle
+        todo!()
+    }
+}