From 7b643f181406076753401da9ca63aeb91c78d8ae Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 28 Jul 2025 09:46:00 -0700 Subject: [PATCH] progress --- rust/pspp/src/data.rs | 67 +++++++++++++++++------------ rust/pspp/src/dictionary.rs | 10 ++--- rust/pspp/src/format/display/mod.rs | 6 +-- rust/pspp/src/output/pivot/mod.rs | 8 +++- 4 files changed, 54 insertions(+), 37 deletions(-) diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index c9b6ed6869..3f9eda1046 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -310,6 +310,29 @@ where Self::String(s) => VarWidth::String(s.len().try_into().unwrap()), } } + + pub fn borrowed<'a>(&'a self) -> EncodedDatum> { + match self { + EncodedDatum::Number(number) => EncodedDatum::Number(*number), + EncodedDatum::String(encoded_string) => EncodedDatum::String(encoded_string.borrowed()), + } + } + + /// Compares this datum and `other` for equality, ignoring trailing ASCII + /// spaces in either, if they are both strings, for the purpose of + /// comparison. + pub fn eq_ignore_trailing_spaces(&self, other: &EncodedDatum>) -> bool + where + R2: Borrow, + { + match (self.borrowed(), other.borrowed()) { + (EncodedDatum::Number(lhs), EncodedDatum::Number(rhs)) => lhs == rhs, + (EncodedDatum::String(lhs), EncodedDatum::String(rhs)) => { + lhs.eq_ignore_trailing_spaces(&rhs) + } + _ => false, + } + } } impl EncodedDatum { @@ -367,13 +390,6 @@ impl EncodedDatum { } } - /// Compares this datum and `other` for equality, ignoring trailing ASCII - /// spaces in either, if they are both strings, for the purpose of - /// comparison. - pub fn eq_ignore_trailing_spaces<'a>(&self, other: impl Into>) -> bool { - self.borrowed().eq_ignore_trailing_spaces(other.into()) - } - /// Removes trailing ASCII spaces from this datum, if it is a string. pub fn trim_end(&mut self) { match self { @@ -382,21 +398,18 @@ impl EncodedDatum { } } - pub fn borrowed<'a>(&'a self) -> EncodedDat<'a> { - match self { - EncodedDatum::Number(number) => EncodedDat::Number(*number), - EncodedDatum::String(encoded_string) => EncodedDat::String(encoded_string.borrowed()), - } - } - - pub fn quoted(&self) -> QuotedEncodedDat<'_> { - self.borrowed().quoted() + pub fn quoted(&self) -> QuotedEncodedDatum<'_> { + QuotedEncodedDatum(self.borrowed()) } } impl Display for EncodedDatum { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.borrowed()) + match self { + Self::Number(None) => write!(f, "SYSMIS"), + Self::Number(Some(number)) => number.display_plain().fmt(f), + Self::String(string) => write!(f, "{string}"), + } } } @@ -486,19 +499,19 @@ impl<'a> EncodedDat<'a> { } } - pub fn quoted(&self) -> QuotedEncodedDat<'a> { - QuotedEncodedDat(*self) + pub fn quoted(&self) -> QuotedEncodedDatum<'a> { + todo!() } } -pub struct QuotedEncodedDat<'a>(EncodedDat<'a>); +pub struct QuotedEncodedDatum<'a>(BorrowedEncodedDatum<'a>); -impl Display for QuotedEncodedDat<'_> { +impl Display for QuotedEncodedDatum<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match &self.0 { - EncodedDat::Number(None) => write!(f, "SYSMIS"), - EncodedDat::Number(Some(number)) => number.display_plain().fmt(f), - EncodedDat::String(string) => write!(f, "\"{}\"", string.as_str()), + EncodedDatum::Number(None) => write!(f, "SYSMIS"), + EncodedDatum::Number(Some(number)) => number.display_plain().fmt(f), + EncodedDatum::String(string) => write!(f, "\"{}\"", string.as_str()), } } } @@ -714,10 +727,10 @@ where } } - pub fn as_encoded<'a>(&'a self, encoding: &'static Encoding) -> EncodedDat<'a> { + pub fn as_encoded<'a>(&'a self, encoding: &'static Encoding) -> BorrowedEncodedDatum<'a> { match self { - Datum::Number(number) => EncodedDat::Number(*number), - Datum::String(raw_string) => EncodedDat::String(EncodedString { + Datum::Number(number) => EncodedDatum::Number(*number), + Datum::String(raw_string) => EncodedDatum::String(EncodedString { encoding, raw: raw_string.borrow(), }), diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 053674c2ab..411fb24447 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -40,7 +40,7 @@ use thiserror::Error as ThisError; use unicase::UniCase; use crate::{ - data::{Datum, EncodedDat, EncodedDatum, OwnedRawString}, + data::{BorrowedEncodedDatum, Datum, EncodedDatum, OwnedRawString}, format::{DisplayPlain, Format}, identifier::{ByIdentifier, HasIdentifier, Identifier}, output::pivot::{ @@ -1022,7 +1022,7 @@ impl<'a> OutputMrsets<'a> { MultipleResponseType::MultipleDichotomy { datum, .. } => { pt.insert( &[row, 2], - Value::new_datum(datum.as_encoded(self.dictionary.encoding)), + Value::new_datum(&datum.as_encoded(self.dictionary.encoding)), ); "Dichotomies" } @@ -2052,16 +2052,16 @@ impl MissingValues { } } - pub fn contains(&self, value: EncodedDat) -> bool { + pub fn contains(&self, value: BorrowedEncodedDatum<'_>) -> bool { if self .values .iter() - .any(|datum| datum.eq_ignore_trailing_spaces(value)) + .any(|datum| datum.eq_ignore_trailing_spaces(&value)) { return true; } - if let EncodedDat::Number(Some(number)) = value + if let Some(Some(number)) = value.as_number() && let Some(range) = self.range { range.contains(number) diff --git a/rust/pspp/src/format/display/mod.rs b/rust/pspp/src/format/display/mod.rs index 94098de7d9..b1626bf588 100644 --- a/rust/pspp/src/format/display/mod.rs +++ b/rust/pspp/src/format/display/mod.rs @@ -29,7 +29,7 @@ use smallvec::{Array, SmallVec}; use crate::{ calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name}, - data::{EncodedDat, EncodedDatum, QuotedEncodedDat}, + data::{EncodedDat, EncodedDatum, QuotedEncodedDatum}, endian::{endian_to_smallvec, ToBytes}, format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type}, settings::{EndianSettings, Settings as PsppSettings}, @@ -91,7 +91,7 @@ impl EncodedDat<'_> { DisplayDatum::new(format, *self) } - pub fn display_plain(&self) -> QuotedEncodedDat<'_> { + pub fn display_plain(&self) -> QuotedEncodedDatum<'_> { self.quoted() } } @@ -105,7 +105,7 @@ impl EncodedDatum { DisplayDatum::new(format, self.borrowed()) } - pub fn display_plain(&self) -> QuotedEncodedDat<'_> { + pub fn display_plain(&self) -> QuotedEncodedDatum<'_> { self.quoted() } } diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index ed1b8ea906..ad29d5b6d4 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -43,6 +43,7 @@ //! could also be a variable name or an arbitrary text string. use std::{ + borrow::Borrow, collections::HashMap, fmt::{Debug, Display, Write}, io::Read, @@ -67,7 +68,7 @@ use thiserror::Error as ThisError; use tlo::parse_tlo; use crate::{ - data::{Datum, EncodedDat, OwnedRawString, }, + data::{BorrowedRawString, Datum, EncodedDat, EncodedDatum, EncodedString, OwnedRawString}, dictionary::{VarType, Variable}, format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat}, settings::{Settings, Show}, @@ -1859,7 +1860,10 @@ impl Value { variable_label: variable.label.clone(), })) } - pub fn new_datum(value: EncodedDat) -> Self { + pub fn new_datum(value: &EncodedDatum>) -> Self + where + R: Borrow, + { match value { EncodedDat::Number(number) => Self::new_number(number), EncodedDat::String(string) => Self::new_user_text(string.as_str()), -- 2.30.2