From 337d86a8ef3881082622d9a24af462cada4d4eef Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 12 Dec 2025 16:34:38 -0800 Subject: [PATCH] work --- rust/pspp/src/data.rs | 18 ++++ rust/pspp/src/output/pivot.rs | 15 +--- rust/pspp/src/output/pivot/value.rs | 122 ++++++++++++++++----------- rust/pspp/src/spv/read/legacy_xml.rs | 37 ++------ rust/pspp/src/spv/write.rs | 70 ++++++++++----- 5 files changed, 153 insertions(+), 109 deletions(-) diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index e482841c37..433a013cda 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -262,6 +262,24 @@ impl ByteString { pub fn spaces(n: usize) -> Self { Self(std::iter::repeat_n(b' ', n).collect()) } + + pub fn display_hex(&self) -> HexBytes<'_> { + HexBytes(self.0.as_slice()) + } + pub fn to_hex(&self) -> String { + self.display_hex().to_string() + } +} + +pub struct HexBytes<'a>(&'a [u8]); + +impl<'a> Display for HexBytes<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for byte in self.0 { + write!(f, "{:02X}", *byte)?; + } + Ok(()) + } } impl Borrow for ByteString { diff --git a/rust/pspp/src/output/pivot.rs b/rust/pspp/src/output/pivot.rs index a8ccd80fc0..152e62c489 100644 --- a/rust/pspp/src/output/pivot.rs +++ b/rust/pspp/src/output/pivot.rs @@ -62,9 +62,7 @@ use crate::{ format::{F40, F40_2, F40_3, Format, PCT40_1, Settings as FormatSettings}, output::pivot::{ look::{Look, Sizing}, - value::{ - BareValue, DisplayValue, IntoValueOptions, NumberValue, Value, ValueInner, ValueOptions, - }, + value::{BareValue, DisplayValue, IntoValueOptions, Value, ValueOptions}, }, settings::{Settings, Show}, variable::Variable, @@ -167,14 +165,9 @@ impl PivotTable { Class::Residual => F40_2, Class::Count => F40, // XXX }; - let value = Value::new(ValueInner::Number(NumberValue { - show: None, - format, - honor_small: class == Class::Other, - value: number, - variable: None, - value_label: None, - })); + let value = Value::new_number(number) + .with_format(format) + .with_honor_small(class == Class::Other); self.insert(data_indexes, value); } diff --git a/rust/pspp/src/output/pivot/value.rs b/rust/pspp/src/output/pivot/value.rs index d5f833c17a..bb90851701 100644 --- a/rust/pspp/src/output/pivot/value.rs +++ b/rust/pspp/src/output/pivot/value.rs @@ -146,23 +146,21 @@ impl Value { where B: EncodedString, { - match datum { - Datum::Number(number) => Self::new_number(*number), - Datum::String(string) => Self::new(ValueInner::String(StringValue { - show: None, - hex: false, - s: string.as_str().into_owned(), - var_name: None, - value_label: None, - })), - } + Self::new(ValueInner::Number(NumberValue::new(datum))) } /// Returns this value with its display format set to `format`. pub fn with_format(self, format: Format) -> Self { Self { inner: self.inner.with_format(format), - styling: self.styling, + ..self + } + } + + pub fn with_honor_small(self, honor_small: bool) -> Self { + Self { + inner: self.inner.with_honor_small(honor_small), + ..self } } @@ -180,12 +178,12 @@ impl Value { Self::new_datum(&datum.as_encoded(variable.encoding())).with_source_variable(variable) } - pub fn datum(&self) -> Option> { + pub fn datum(&self) -> Option<&Datum>> { self.inner.datum() } pub fn new_number(number: Option) -> Self { - Self::new(ValueInner::Number(NumberValue::new(number))) + Self::new(ValueInner::Number(NumberValue::new_number(number))) } pub fn new_integer(x: Option) -> Self { @@ -417,9 +415,13 @@ impl<'a> DisplayValue<'a> { } pub fn var_type(&self) -> VarType { - match self.inner { - ValueInner::Number(NumberValue { .. }) if self.show_label.is_none() => VarType::Numeric, - _ => VarType::String, + if let ValueInner::Number(NumberValue { datum, .. }) = &self.inner + && datum.is_number() + && self.show_label.is_none() + { + VarType::Numeric + } else { + VarType::String } } } @@ -495,16 +497,13 @@ impl Debug for Value { } } -/// A numeric value and how to display it. +/// A datum and how to display it. #[derive(Clone, Debug, PartialEq)] pub struct NumberValue { - /// The numerical value, or `None` if it is a missing value. - pub value: Option, + /// The datum. + pub datum: Datum>, - /// The display format (usually [F] or [Pct]). - /// - /// [F]: crate::format::Type::F - /// [Pct]: crate::format::Type::Pct + /// The display format. pub format: Format, /// Whether to show `value` or `value_label` or both. @@ -530,10 +529,10 @@ impl Serialize for NumberValue { S: serde::Serializer, { if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() { - self.value.serialize(serializer) + self.datum.serialize(serializer) } else { let mut s = serializer.serialize_map(None)?; - s.serialize_entry("value", &self.value)?; + s.serialize_entry("datum", &self.datum)?; s.serialize_entry("format", &self.format)?; if let Some(show) = self.show { s.serialize_entry("show", &show)?; @@ -553,9 +552,15 @@ impl Serialize for NumberValue { } impl NumberValue { - pub fn new(value: Option) -> Self { + pub fn new_number(number: Option) -> Self { + Self::new(&Datum::<&str>::Number(number)) + } + pub fn new(datum: &Datum) -> Self + where + B: EncodedString, + { Self { - value, + datum: datum.cloned(), format: F8_2, show: None, honor_small: false, @@ -566,30 +571,44 @@ impl NumberValue { pub fn with_format(self, format: Format) -> Self { Self { format, ..self } } + pub fn with_honor_small(self, honor_small: bool) -> Self { + Self { + honor_small, + ..self + } + } pub fn display<'a>( &self, display: &DisplayValue<'a>, f: &mut std::fmt::Formatter<'_>, ) -> std::fmt::Result { if display.show_value { - let format = if self.format.type_() == Type::F - && self.honor_small - && let Some(value) = self.value - && value != 0.0 - && value.abs() < display.small() - { - UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix() - } else { - self.format - }; - Datum::<&str>::Number(self.value) - .display(format) - .without_leading_spaces() - .fmt(f)?; + match &self.datum { + Datum::Number(number) => { + let format = if self.format.type_() == Type::F + && self.honor_small + && let Some(number) = *number + && number != 0.0 + && number.abs() < display.small() + { + UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix() + } else { + self.format + }; + self.datum.display(format).without_leading_spaces().fmt(f)?; + } + Datum::String(s) => { + if self.format.type_() == Type::AHex { + write!(f, "{}", s.inner.display_hex())?; + } else { + f.write_str(&s.as_str())?; + } + } + } } if let Some(label) = display.show_label { if display.show_value { - write!(f, " ")?; + f.write_char(' ')?; } f.write_str(label)?; } @@ -600,14 +619,15 @@ impl NumberValue { where S: Serializer, { - if let Some(number) = self.value + if let Datum::Number(Some(number)) = &self.datum + && let number = *number && number.trunc() == number && number >= -(1i64 << 53) as f64 && number <= (1i64 << 53) as f64 { Some(number as u64).serialize(serializer) } else { - self.value.serialize(serializer) + self.datum.serialize(serializer) } } } @@ -913,10 +933,18 @@ impl ValueInner { } } - pub fn datum(&self) -> Option> { + pub fn with_honor_small(self, honor_small: bool) -> Self { + match self { + ValueInner::Number(number_value) => { + Self::Number(number_value.with_honor_small(honor_small)) + } + _ => self, + } + } + + pub fn datum(&self) -> Option<&Datum>> { match self { - ValueInner::Number(number_value) => Some(Datum::Number(number_value.value)), - ValueInner::String(string_value) => Some(Datum::String(&string_value.s)), + ValueInner::Number(datum_value) => Some(&datum_value.datum), _ => None, } } diff --git a/rust/pspp/src/spv/read/legacy_xml.rs b/rust/pspp/src/spv/read/legacy_xml.rs index b1e077c1ee..65391c9dba 100644 --- a/rust/pspp/src/spv/read/legacy_xml.rs +++ b/rust/pspp/src/spv/read/legacy_xml.rs @@ -31,7 +31,6 @@ use ordered_float::OrderedFloat; use serde::Deserialize; use crate::{ - calendar::{date_time_to_pspp, time_to_pspp}, data::Datum, format::{self, Decimal::Dot, F8_0, F40_2, Type, UncheckedFormat}, output::pivot::{ @@ -40,7 +39,7 @@ use crate::{ self, Area, AreaStyle, CellStyle, Color, HeadingRegion, HorzAlign, Look, RowParity, VertAlign, }, - value::{NumberValue, Value, ValueInner}, + value::{Value, ValueInner}, }, spv::read::legacy_bin::DataValue, }; @@ -694,10 +693,9 @@ impl Visualization { } } } - if let Value { - inner: ValueInner::Number(NumberValue { value: None, .. }), - styling: None, - } = &value + if let Some(datum) = value.datum() + && datum.is_sysmis() + && value.footnotes().is_empty() { // A system-missing value without a footnote represents an empty cell. } else { @@ -1863,34 +1861,13 @@ impl Style { && let Ok(date_time) = NaiveDateTime::parse_from_str(&string.s, "%Y-%m-%dT%H:%M:%S%.3f") { - value.inner = ValueInner::Number(NumberValue { - show: None, - format, - honor_small: false, - value: Some(date_time_to_pspp(date_time)), - variable: None, - value_label: None, - }) + value.inner = Value::new_date(date_time).with_format(format).inner; } else if format.type_().category() == format::Category::Time && let Ok(time) = NaiveTime::parse_from_str(&string.s, "%H:%M:%S%.3f") { - value.inner = ValueInner::Number(NumberValue { - show: None, - format, - honor_small: false, - value: Some(time_to_pspp(time)), - variable: None, - value_label: None, - }) + value.inner = Value::new_time(time).with_format(format).inner; } else if let Ok(number) = string.s.parse::() { - value.inner = ValueInner::Number(NumberValue { - show: None, - format, - honor_small: false, - value: Some(number), - variable: None, - value_label: None, - }) + value.inner = Value::new_number(Some(number)).with_format(format).inner; } } _ => (), diff --git a/rust/pspp/src/spv/write.rs b/rust/pspp/src/spv/write.rs index 6e49bbf5db..98cfcb65fa 100644 --- a/rust/pspp/src/spv/write.rs +++ b/rust/pspp/src/spv/write.rs @@ -30,6 +30,7 @@ use quick_xml::{ use zip::{ZipWriter, write::SimpleFileOptions}; use crate::{ + data::{Datum, EncodedString}, format::{Format, Type}, output::{ Details, Item, Text, @@ -1261,32 +1262,59 @@ impl BinWrite for Value { args: Self::Args<'_>, ) -> binrw::BinResult<()> { match &self.inner { - ValueInner::Number(number) => { - let format = SpvFormat { - format: number.format, - honor_small: number.honor_small, - }; - if number.variable.is_some() || number.value_label.is_some() { - ( - 2u8, - ValueMod::new(self), - format, - number.value.unwrap_or(f64::MIN), - SpvString::optional(&number.variable), - SpvString::optional(&number.value_label), - Show::as_spv(&number.show), - ) - .write_options(writer, endian, args)?; - } else { + ValueInner::Number(number_value) => match &number_value.datum { + Datum::Number(number) => { + let format = SpvFormat { + format: number_value.format, + honor_small: number_value.honor_small, + }; + if number_value.variable.is_some() || number_value.value_label.is_some() { + ( + 2u8, + ValueMod::new(self), + format, + number.unwrap_or(f64::MIN), + SpvString::optional(&number_value.variable), + SpvString::optional(&number_value.value_label), + Show::as_spv(&number_value.show), + ) + .write_options(writer, endian, args)?; + } else { + (1u8, ValueMod::new(self), format, number.unwrap_or(f64::MIN)) + .write_options(writer, endian, args)?; + } + } + Datum::String(s) => { + let hex; + let utf8; + let (s, format) = if number_value.format.type_() == Type::AHex { + hex = s.inner.to_hex(); + ( + hex.as_str(), + Format::new(Type::AHex, hex.len().min(65534) as u16, 0).unwrap(), + ) + } else { + utf8 = s.as_str(); + ( + utf8.as_ref(), + Format::new(Type::A, utf8.len().min(32767) as u16, 0).unwrap(), + ) + }; ( - 1u8, + 4u8, ValueMod::new(self), - format, - number.value.unwrap_or(f64::MIN), + SpvFormat { + format, + honor_small: false, + }, + SpvString::optional(&number_value.value_label), + SpvString::optional(&number_value.variable), + Show::as_spv(&number_value.show), + SpvString(s), ) .write_options(writer, endian, args)?; } - } + }, ValueInner::String(string) => { ( 4u8, -- 2.30.2