work
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 13 Dec 2025 00:34:38 +0000 (16:34 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 13 Dec 2025 00:34:38 +0000 (16:34 -0800)
rust/pspp/src/data.rs
rust/pspp/src/output/pivot.rs
rust/pspp/src/output/pivot/value.rs
rust/pspp/src/spv/read/legacy_xml.rs
rust/pspp/src/spv/write.rs

index e482841c376b25c5af36939270cd6e10bf414165..433a013cda448d27da7fa550344a14cc6f10b9cb 100644 (file)
@@ -262,6 +262,24 @@ impl ByteString {
     pub fn spaces(n: usize) -> Self {
         Self(std::iter::repeat_n(b' ', n).collect())
     }
+
+    pub fn display_hex(&self) -> HexBytes<'_> {
+        HexBytes(self.0.as_slice())
+    }
+    pub fn to_hex(&self) -> String {
+        self.display_hex().to_string()
+    }
+}
+
+pub struct HexBytes<'a>(&'a [u8]);
+
+impl<'a> Display for HexBytes<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        for byte in self.0 {
+            write!(f, "{:02X}", *byte)?;
+        }
+        Ok(())
+    }
 }
 
 impl Borrow<ByteStr> for ByteString {
index a8ccd80fc0ada8b466aa567cfdce5e4f75a8dc80..152e62c489726d075c8b7f296832b288dfcefc9a 100644 (file)
@@ -62,9 +62,7 @@ use crate::{
     format::{F40, F40_2, F40_3, Format, PCT40_1, Settings as FormatSettings},
     output::pivot::{
         look::{Look, Sizing},
-        value::{
-            BareValue, DisplayValue, IntoValueOptions, NumberValue, Value, ValueInner, ValueOptions,
-        },
+        value::{BareValue, DisplayValue, IntoValueOptions, Value, ValueOptions},
     },
     settings::{Settings, Show},
     variable::Variable,
@@ -167,14 +165,9 @@ impl PivotTable {
             Class::Residual => F40_2,
             Class::Count => F40, // XXX
         };
-        let value = Value::new(ValueInner::Number(NumberValue {
-            show: None,
-            format,
-            honor_small: class == Class::Other,
-            value: number,
-            variable: None,
-            value_label: None,
-        }));
+        let value = Value::new_number(number)
+            .with_format(format)
+            .with_honor_small(class == Class::Other);
         self.insert(data_indexes, value);
     }
 
index d5f833c17a3c26c52a8b36120411b7430c4aa8b9..bb908517011b5559ed97a07e7d1a0c4625a5d385 100644 (file)
@@ -146,23 +146,21 @@ impl Value {
     where
         B: EncodedString,
     {
-        match datum {
-            Datum::Number(number) => Self::new_number(*number),
-            Datum::String(string) => Self::new(ValueInner::String(StringValue {
-                show: None,
-                hex: false,
-                s: string.as_str().into_owned(),
-                var_name: None,
-                value_label: None,
-            })),
-        }
+        Self::new(ValueInner::Number(NumberValue::new(datum)))
     }
 
     /// Returns this value with its display format set to `format`.
     pub fn with_format(self, format: Format) -> Self {
         Self {
             inner: self.inner.with_format(format),
-            styling: self.styling,
+            ..self
+        }
+    }
+
+    pub fn with_honor_small(self, honor_small: bool) -> Self {
+        Self {
+            inner: self.inner.with_honor_small(honor_small),
+            ..self
         }
     }
 
@@ -180,12 +178,12 @@ impl Value {
         Self::new_datum(&datum.as_encoded(variable.encoding())).with_source_variable(variable)
     }
 
-    pub fn datum(&self) -> Option<Datum<&str>> {
+    pub fn datum(&self) -> Option<&Datum<WithEncoding<ByteString>>> {
         self.inner.datum()
     }
 
     pub fn new_number(number: Option<f64>) -> Self {
-        Self::new(ValueInner::Number(NumberValue::new(number)))
+        Self::new(ValueInner::Number(NumberValue::new_number(number)))
     }
 
     pub fn new_integer(x: Option<f64>) -> Self {
@@ -417,9 +415,13 @@ impl<'a> DisplayValue<'a> {
     }
 
     pub fn var_type(&self) -> VarType {
-        match self.inner {
-            ValueInner::Number(NumberValue { .. }) if self.show_label.is_none() => VarType::Numeric,
-            _ => VarType::String,
+        if let ValueInner::Number(NumberValue { datum, .. }) = &self.inner
+            && datum.is_number()
+            && self.show_label.is_none()
+        {
+            VarType::Numeric
+        } else {
+            VarType::String
         }
     }
 }
@@ -495,16 +497,13 @@ impl Debug for Value {
     }
 }
 
-/// A numeric value and how to display it.
+/// A datum and how to display it.
 #[derive(Clone, Debug, PartialEq)]
 pub struct NumberValue {
-    /// The numerical value, or `None` if it is a missing value.
-    pub value: Option<f64>,
+    /// The datum.
+    pub datum: Datum<WithEncoding<ByteString>>,
 
-    /// The display format (usually [F] or [Pct]).
-    ///
-    /// [F]: crate::format::Type::F
-    /// [Pct]: crate::format::Type::Pct
+    /// The display format.
     pub format: Format,
 
     /// Whether to show `value` or `value_label` or both.
@@ -530,10 +529,10 @@ impl Serialize for NumberValue {
         S: serde::Serializer,
     {
         if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() {
-            self.value.serialize(serializer)
+            self.datum.serialize(serializer)
         } else {
             let mut s = serializer.serialize_map(None)?;
-            s.serialize_entry("value", &self.value)?;
+            s.serialize_entry("datum", &self.datum)?;
             s.serialize_entry("format", &self.format)?;
             if let Some(show) = self.show {
                 s.serialize_entry("show", &show)?;
@@ -553,9 +552,15 @@ impl Serialize for NumberValue {
 }
 
 impl NumberValue {
-    pub fn new(value: Option<f64>) -> Self {
+    pub fn new_number(number: Option<f64>) -> Self {
+        Self::new(&Datum::<&str>::Number(number))
+    }
+    pub fn new<B>(datum: &Datum<B>) -> Self
+    where
+        B: EncodedString,
+    {
         Self {
-            value,
+            datum: datum.cloned(),
             format: F8_2,
             show: None,
             honor_small: false,
@@ -566,30 +571,44 @@ impl NumberValue {
     pub fn with_format(self, format: Format) -> Self {
         Self { format, ..self }
     }
+    pub fn with_honor_small(self, honor_small: bool) -> Self {
+        Self {
+            honor_small,
+            ..self
+        }
+    }
     pub fn display<'a>(
         &self,
         display: &DisplayValue<'a>,
         f: &mut std::fmt::Formatter<'_>,
     ) -> std::fmt::Result {
         if display.show_value {
-            let format = if self.format.type_() == Type::F
-                && self.honor_small
-                && let Some(value) = self.value
-                && value != 0.0
-                && value.abs() < display.small()
-            {
-                UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix()
-            } else {
-                self.format
-            };
-            Datum::<&str>::Number(self.value)
-                .display(format)
-                .without_leading_spaces()
-                .fmt(f)?;
+            match &self.datum {
+                Datum::Number(number) => {
+                    let format = if self.format.type_() == Type::F
+                        && self.honor_small
+                        && let Some(number) = *number
+                        && number != 0.0
+                        && number.abs() < display.small()
+                    {
+                        UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix()
+                    } else {
+                        self.format
+                    };
+                    self.datum.display(format).without_leading_spaces().fmt(f)?;
+                }
+                Datum::String(s) => {
+                    if self.format.type_() == Type::AHex {
+                        write!(f, "{}", s.inner.display_hex())?;
+                    } else {
+                        f.write_str(&s.as_str())?;
+                    }
+                }
+            }
         }
         if let Some(label) = display.show_label {
             if display.show_value {
-                write!(f, " ")?;
+                f.write_char(' ')?;
             }
             f.write_str(label)?;
         }
@@ -600,14 +619,15 @@ impl NumberValue {
     where
         S: Serializer,
     {
-        if let Some(number) = self.value
+        if let Datum::Number(Some(number)) = &self.datum
+            && let number = *number
             && number.trunc() == number
             && number >= -(1i64 << 53) as f64
             && number <= (1i64 << 53) as f64
         {
             Some(number as u64).serialize(serializer)
         } else {
-            self.value.serialize(serializer)
+            self.datum.serialize(serializer)
         }
     }
 }
@@ -913,10 +933,18 @@ impl ValueInner {
         }
     }
 
-    pub fn datum(&self) -> Option<Datum<&str>> {
+    pub fn with_honor_small(self, honor_small: bool) -> Self {
+        match self {
+            ValueInner::Number(number_value) => {
+                Self::Number(number_value.with_honor_small(honor_small))
+            }
+            _ => self,
+        }
+    }
+
+    pub fn datum(&self) -> Option<&Datum<WithEncoding<ByteString>>> {
         match self {
-            ValueInner::Number(number_value) => Some(Datum::Number(number_value.value)),
-            ValueInner::String(string_value) => Some(Datum::String(&string_value.s)),
+            ValueInner::Number(datum_value) => Some(&datum_value.datum),
             _ => None,
         }
     }
index b1e077c1ee7c78bc51381f1fa74c8303d27b7139..65391c9dba1f3675f2caf0bc7071fc6fa1b31ffd 100644 (file)
@@ -31,7 +31,6 @@ use ordered_float::OrderedFloat;
 use serde::Deserialize;
 
 use crate::{
-    calendar::{date_time_to_pspp, time_to_pspp},
     data::Datum,
     format::{self, Decimal::Dot, F8_0, F40_2, Type, UncheckedFormat},
     output::pivot::{
@@ -40,7 +39,7 @@ use crate::{
             self, Area, AreaStyle, CellStyle, Color, HeadingRegion, HorzAlign, Look, RowParity,
             VertAlign,
         },
-        value::{NumberValue, Value, ValueInner},
+        value::{Value, ValueInner},
     },
     spv::read::legacy_bin::DataValue,
 };
@@ -694,10 +693,9 @@ impl Visualization {
                     }
                 }
             }
-            if let Value {
-                inner: ValueInner::Number(NumberValue { value: None, .. }),
-                styling: None,
-            } = &value
+            if let Some(datum) = value.datum()
+                && datum.is_sysmis()
+                && value.footnotes().is_empty()
             {
                 // A system-missing value without a footnote represents an empty cell.
             } else {
@@ -1863,34 +1861,13 @@ impl Style {
                             && let Ok(date_time) =
                                 NaiveDateTime::parse_from_str(&string.s, "%Y-%m-%dT%H:%M:%S%.3f")
                         {
-                            value.inner = ValueInner::Number(NumberValue {
-                                show: None,
-                                format,
-                                honor_small: false,
-                                value: Some(date_time_to_pspp(date_time)),
-                                variable: None,
-                                value_label: None,
-                            })
+                            value.inner = Value::new_date(date_time).with_format(format).inner;
                         } else if format.type_().category() == format::Category::Time
                             && let Ok(time) = NaiveTime::parse_from_str(&string.s, "%H:%M:%S%.3f")
                         {
-                            value.inner = ValueInner::Number(NumberValue {
-                                show: None,
-                                format,
-                                honor_small: false,
-                                value: Some(time_to_pspp(time)),
-                                variable: None,
-                                value_label: None,
-                            })
+                            value.inner = Value::new_time(time).with_format(format).inner;
                         } else if let Ok(number) = string.s.parse::<f64>() {
-                            value.inner = ValueInner::Number(NumberValue {
-                                show: None,
-                                format,
-                                honor_small: false,
-                                value: Some(number),
-                                variable: None,
-                                value_label: None,
-                            })
+                            value.inner = Value::new_number(Some(number)).with_format(format).inner;
                         }
                     }
                     _ => (),
index 6e49bbf5db54c793b28ccc52ce6898af57558d25..98cfcb65fa90cb904169734132d315fde92aca1b 100644 (file)
@@ -30,6 +30,7 @@ use quick_xml::{
 use zip::{ZipWriter, write::SimpleFileOptions};
 
 use crate::{
+    data::{Datum, EncodedString},
     format::{Format, Type},
     output::{
         Details, Item, Text,
@@ -1261,32 +1262,59 @@ impl BinWrite for Value {
         args: Self::Args<'_>,
     ) -> binrw::BinResult<()> {
         match &self.inner {
-            ValueInner::Number(number) => {
-                let format = SpvFormat {
-                    format: number.format,
-                    honor_small: number.honor_small,
-                };
-                if number.variable.is_some() || number.value_label.is_some() {
-                    (
-                        2u8,
-                        ValueMod::new(self),
-                        format,
-                        number.value.unwrap_or(f64::MIN),
-                        SpvString::optional(&number.variable),
-                        SpvString::optional(&number.value_label),
-                        Show::as_spv(&number.show),
-                    )
-                        .write_options(writer, endian, args)?;
-                } else {
+            ValueInner::Number(number_value) => match &number_value.datum {
+                Datum::Number(number) => {
+                    let format = SpvFormat {
+                        format: number_value.format,
+                        honor_small: number_value.honor_small,
+                    };
+                    if number_value.variable.is_some() || number_value.value_label.is_some() {
+                        (
+                            2u8,
+                            ValueMod::new(self),
+                            format,
+                            number.unwrap_or(f64::MIN),
+                            SpvString::optional(&number_value.variable),
+                            SpvString::optional(&number_value.value_label),
+                            Show::as_spv(&number_value.show),
+                        )
+                            .write_options(writer, endian, args)?;
+                    } else {
+                        (1u8, ValueMod::new(self), format, number.unwrap_or(f64::MIN))
+                            .write_options(writer, endian, args)?;
+                    }
+                }
+                Datum::String(s) => {
+                    let hex;
+                    let utf8;
+                    let (s, format) = if number_value.format.type_() == Type::AHex {
+                        hex = s.inner.to_hex();
+                        (
+                            hex.as_str(),
+                            Format::new(Type::AHex, hex.len().min(65534) as u16, 0).unwrap(),
+                        )
+                    } else {
+                        utf8 = s.as_str();
+                        (
+                            utf8.as_ref(),
+                            Format::new(Type::A, utf8.len().min(32767) as u16, 0).unwrap(),
+                        )
+                    };
                     (
-                        1u8,
+                        4u8,
                         ValueMod::new(self),
-                        format,
-                        number.value.unwrap_or(f64::MIN),
+                        SpvFormat {
+                            format,
+                            honor_small: false,
+                        },
+                        SpvString::optional(&number_value.value_label),
+                        SpvString::optional(&number_value.variable),
+                        Show::as_spv(&number_value.show),
+                        SpvString(s),
                     )
                         .write_options(writer, endian, args)?;
                 }
-            }
+            },
             ValueInner::String(string) => {
                 (
                     4u8,