passes!
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 29 Jul 2025 15:27:52 +0000 (08:27 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 29 Jul 2025 15:27:52 +0000 (08:27 -0700)
rust/pspp/src/data.rs
rust/pspp/src/dictionary.rs
rust/pspp/src/format/display/mod.rs
rust/pspp/src/format/display/test.rs
rust/pspp/src/main.rs
rust/pspp/src/output/pivot/mod.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/write.rs

index a16691d0b439f18a0713b5427cdb76a0a9956553..f047ce93b300ddf972a88097f1d880d93f5dc9f6 100644 (file)
@@ -274,10 +274,7 @@ where
 }
 
 mod encoded;
-pub use encoded::{
-    BorrowedEncodedDatum, BorrowedEncodedString, EncodedDatum, EncodedString, OwnedEncodedDatum,
-    OwnedEncodedString, QuotedEncodedDatum,
-};
+pub use encoded::{BorrowedEncodedString, EncodedString, OwnedEncodedString};
 
 /// A [Datum] that owns its string data (if any).
 pub type OwnedDatum = Datum<OwnedRawString>;
@@ -478,17 +475,141 @@ where
         }
     }
 
-    pub fn as_encoded<'a>(&'a self, encoding: &'static Encoding) -> BorrowedEncodedDatum<'a> {
+    pub fn as_encoded<'a>(
+        &'a self,
+        encoding: &'static Encoding,
+    ) -> Datum<BorrowedEncodedString<'a>> {
         match self {
-            Datum::Number(number) => EncodedDatum::Number(*number),
-            Datum::String(raw_string) => EncodedDatum::String(EncodedString {
-                encoding,
+            Datum::Number(number) => Datum::Number(*number),
+            Datum::String(raw_string) => Datum::String(EncodedString {
                 raw: raw_string.borrow(),
+                encoding,
             }),
         }
     }
 }
 
+impl Datum<OwnedRawString> {
+    pub fn borrowed(&self) -> BorrowedDatum {
+        match self {
+            Datum::Number(number) => Datum::Number(*number),
+            Datum::String(string) => Datum::String(Borrow::borrow(string)),
+        }
+    }
+}
+
+impl<'a> Datum<&'a BorrowedRawString> {
+    pub fn borrowed(&self) -> BorrowedDatum {
+        self.clone()
+    }
+}
+
+impl Datum<OwnedEncodedString> {
+    pub fn borrowed<'a>(&'a self) -> Datum<BorrowedEncodedString<'a>> {
+        match self {
+            Datum::Number(number) => Datum::Number(*number),
+            Datum::String(string) => Datum::String(string.borrowed()),
+        }
+    }
+}
+
+impl<'a> Datum<BorrowedEncodedString<'a>> {
+    pub fn borrowed(&self) -> Datum<BorrowedEncodedString<'a>> {
+        self.clone()
+    }
+}
+
+impl<D> Datum<D>
+where
+    D: BorrowString,
+{
+    pub fn borrowed_string<'a>(&'a self) -> Datum<D::Borrowed<'a>> {
+        match self {
+            Datum::Number(number) => Datum::Number(*number),
+            Datum::String(string) => Datum::String(string.borrow_string()),
+        }
+    }
+}
+
+pub trait BorrowString {
+    type Borrowed<'a>
+    where
+        Self: 'a;
+    fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a>;
+}
+
+impl BorrowString for OwnedRawString {
+    type Borrowed<'a> = &'a BorrowedRawString;
+    fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
+        BorrowedRawString::new(&self.0)
+    }
+}
+
+impl BorrowString for BorrowedRawString {
+    type Borrowed<'a> = &'a BorrowedRawString;
+    fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
+        self.clone()
+    }
+}
+
+impl BorrowString for OwnedEncodedString {
+    type Borrowed<'a> = BorrowedEncodedString<'a>;
+    fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
+        BorrowedEncodedString::new(self.raw.borrowed(), self.encoding)
+    }
+}
+
+impl<'b> BorrowString for BorrowedEncodedString<'b> {
+    type Borrowed<'a>
+        = BorrowedEncodedString<'b>
+    where
+        Self: 'a;
+
+    fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
+        self.clone()
+    }
+}
+
+pub trait AsEncodedString: Borrow<BorrowedRawString> {
+    fn as_encoded_string<'a>(&'a self) -> BorrowedEncodedString<'a>;
+}
+
+impl AsEncodedString for OwnedEncodedString {
+    fn as_encoded_string<'a>(&'a self) -> BorrowedEncodedString<'a> {
+        self.borrowed()
+    }
+}
+
+impl<'b> AsEncodedString for BorrowedEncodedString<'b> {
+    fn as_encoded_string<'a>(&'a self) -> BorrowedEncodedString<'a> {
+        self.clone()
+    }
+}
+
+impl<B> Datum<B>
+where
+    B: AsEncodedString,
+{
+    pub fn quoted<'a>(&'a self) -> QuotedDatum<'a, B> {
+        QuotedDatum(self)
+    }
+}
+
+pub struct QuotedDatum<'a, B>(&'a Datum<B>);
+
+impl<'a, B> Display for QuotedDatum<'a, B>
+where
+    B: AsEncodedString,
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match &self.0 {
+            Datum::Number(None) => write!(f, "SYSMIS"),
+            Datum::Number(Some(number)) => number.display_plain().fmt(f),
+            Datum::String(string) => write!(f, "\"{}\"", string.as_encoded_string().as_str()),
+        }
+    }
+}
+
 impl<B> Datum<B>
 where
     B: BorrowMut<OwnedRawString>,
@@ -528,10 +649,10 @@ where
 }
 
 impl Datum<OwnedRawString> {
-    pub fn with_encoding(self, encoding: &'static Encoding) -> OwnedEncodedDatum {
+    pub fn with_encoding(self, encoding: &'static Encoding) -> Datum<OwnedEncodedString> {
         match self {
-            Datum::Number(number) => EncodedDatum::Number(number),
-            Datum::String(raw_string) => EncodedDatum::String(raw_string.with_encoding(encoding)),
+            Datum::Number(number) => Datum::Number(number),
+            Datum::String(raw_string) => Datum::String(raw_string.with_encoding(encoding)),
         }
     }
 }
@@ -609,13 +730,13 @@ impl<B> Case<B>
 where
     B: Borrow<[Datum<OwnedRawString>]>,
 {
-    fn len(&self) -> usize {
+    pub fn len(&self) -> usize {
         self.data.borrow().len()
     }
 }
 
 impl IntoIterator for Case<Vec<Datum<OwnedRawString>>> {
-    type Item = OwnedEncodedDatum;
+    type Item = Datum<OwnedEncodedString>;
 
     type IntoIter = CaseVecIter;
 
@@ -633,7 +754,7 @@ pub struct CaseVecIter {
 }
 
 impl Iterator for CaseVecIter {
-    type Item = OwnedEncodedDatum;
+    type Item = Datum<OwnedEncodedString>;
 
     fn next(&mut self) -> Option<Self::Item> {
         self.iter
index 9ddd5fde4228304f872141168942240c7dfc005f..0198c3bb7e9b3c392afbe4a8349457211c458e01 100644 (file)
@@ -40,7 +40,7 @@ use thiserror::Error as ThisError;
 use unicase::UniCase;
 
 use crate::{
-    data::{BorrowedEncodedDatum, Datum, OwnedEncodedDatum, OwnedRawString},
+    data::{AsEncodedString, Datum, OwnedEncodedString, OwnedRawString},
     format::{DisplayPlain, Format},
     identifier::{ByIdentifier, HasIdentifier, Identifier},
     output::pivot::{
@@ -890,7 +890,7 @@ impl<'a> OutputValueLabels<'a> {
                     .with_show_value_label(Some(Show::Value));
                 if variable
                     .missing_values
-                    .contains(datum.as_encoded(variable.encoding()))
+                    .contains(&datum.as_encoded(variable.encoding()))
                 {
                     value.add_footnote(&missing_footnote);
                 }
@@ -1957,7 +1957,7 @@ impl Hash for ValueLabels {
 #[derive(Clone, Default, Serialize)]
 pub struct MissingValues {
     /// Individual missing values, up to 3 of them.
-    values: Vec<OwnedEncodedDatum>,
+    values: Vec<Datum<OwnedEncodedString>>,
 
     /// Optional range of missing values.
     range: Option<MissingValueRange>,
@@ -2003,7 +2003,7 @@ impl MissingValues {
     pub fn clear(&mut self) {
         *self = Self::default();
     }
-    pub fn values(&self) -> &[OwnedEncodedDatum] {
+    pub fn values(&self) -> &[Datum<OwnedEncodedString>] {
         &self.values
     }
 
@@ -2012,7 +2012,7 @@ impl MissingValues {
     }
 
     pub fn new(
-        mut values: Vec<OwnedEncodedDatum>,
+        mut values: Vec<Datum<OwnedEncodedString>>,
         range: Option<MissingValueRange>,
     ) -> Result<Self, MissingValuesError> {
         if values.len() > 3 {
@@ -2052,7 +2052,10 @@ impl MissingValues {
         }
     }
 
-    pub fn contains(&self, value: BorrowedEncodedDatum<'_>) -> bool {
+    pub fn contains<S>(&self, value: &Datum<S>) -> bool
+    where
+        S: AsEncodedString,
+    {
         if self
             .values
             .iter()
index 236ada8b8a83aa6cd2e4c6744ea345a15a428234..cf3ae72114307bf177a092db2c01db1b39778bde 100644 (file)
@@ -15,7 +15,6 @@
 // this program.  If not, see <http://www.gnu.org/licenses/>.
 
 use std::{
-    borrow::Borrow,
     cmp::min,
     fmt::{Display, Error as FmtError, Formatter, Result as FmtResult, Write as _},
     io::{Error as IoError, Write as IoWrite},
@@ -30,19 +29,17 @@ use smallvec::{Array, SmallVec};
 
 use crate::{
     calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name},
-    data::{
-        BorrowedEncodedDatum, BorrowedRawString, EncodedDatum, EncodedString, QuotedEncodedDatum,
-    },
+    data::{AsEncodedString, BorrowString, Datum, QuotedDatum},
     endian::{endian_to_smallvec, ToBytes},
     format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type},
     settings::{EndianSettings, Settings as PsppSettings},
 };
 
-pub struct DisplayDatum<'a, 'b> {
+pub struct DisplayDatum<'b, B> {
     format: Format,
     settings: &'b Settings,
     endian: EndianSettings,
-    datum: BorrowedEncodedDatum<'a>,
+    datum: Datum<B>,
 
     /// If true, the output will remove leading and trailing spaces from numeric
     /// values, and trailing spaces from string values.  (This might make the
@@ -85,6 +82,7 @@ impl Display for DisplayPlainF64 {
     }
 }
 
+/*
 impl<R> EncodedDatum<EncodedString<R>>
 where
     R: Borrow<BorrowedRawString>,
@@ -100,20 +98,44 @@ where
     pub fn display_plain(&self) -> QuotedEncodedDatum<'_> {
         self.quoted()
     }
+}*/
+
+impl<'a, D> Datum<D>
+where
+    D: AsEncodedString + BorrowString,
+{
+    /// Returns an object that implements [Display] for printing this
+    /// [EncodedDatum] as `format`.
+    ///
+    /// [Display]: std::fmt::Display
+    pub fn display(&'a self, format: Format) -> DisplayDatum<'a, D::Borrowed<'a>>
+    where
+        D::Borrowed<'a>: AsEncodedString,
+    {
+        DisplayDatum::new(format, self.borrowed_string())
+    }
+
+    pub fn display_plain(&self) -> QuotedDatum<'_, D> {
+        self.quoted()
+    }
 }
 
-impl Display for DisplayDatum<'_, '_> {
+impl<'a, 'b, B> Display for DisplayDatum<'b, B>
+where
+    B: AsEncodedString,
+{
     fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        let number = match self.datum {
-            EncodedDatum::Number(number) => number,
-            EncodedDatum::String(string) => {
+        let number = match &self.datum {
+            Datum::Number(number) => *number,
+            Datum::String(string) => {
                 if self.format.type_() == Type::AHex {
-                    for byte in string.as_bytes() {
+                    for byte in string.as_encoded_string().as_bytes() {
                         write!(f, "{byte:02x}")?;
                     }
                 } else {
                     let quote = if self.quote_strings { "\"" } else { "" };
-                    let s = string.as_str();
+                    let s = string.as_encoded_string();
+                    let s = s.as_str();
                     let s = if self.trim_spaces {
                         s.trim_end_matches(' ')
                     } else {
@@ -164,8 +186,11 @@ impl Display for DisplayDatum<'_, '_> {
     }
 }
 
-impl<'a, 'b> DisplayDatum<'a, 'b> {
-    pub fn new(format: Format, datum: BorrowedEncodedDatum<'a>) -> Self {
+impl<'b, B> DisplayDatum<'b, B>
+where
+    B: AsEncodedString,
+{
+    pub fn new(format: Format, datum: Datum<B>) -> Self {
         let settings = PsppSettings::global();
         Self {
             format,
index c65697f9cdceca38ce00220b152ce00eb6238fb0..5ff3f0016524c53c3a019aea09ec4b2ffa658671 100644 (file)
@@ -23,7 +23,7 @@ use smallstr::SmallString;
 use smallvec::SmallVec;
 
 use crate::{
-    data::OwnedEncodedDatum,
+    data::{Datum, OwnedEncodedString},
     endian::Endian,
     format::{AbstractFormat, Epoch, Format, Settings, Type, UncheckedFormat, CC},
     lex::{scan::StringScanner, segment::Syntax, Punct, Token},
@@ -75,7 +75,7 @@ fn test(name: &str) {
                 let format: Format = format.try_into().unwrap();
                 assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon)));
                 let expected = tokens[2].as_string().unwrap();
-                let actual = OwnedEncodedDatum::Number(value)
+                let actual = Datum::<OwnedEncodedString>::Number(value)
                     .display(format)
                     .with_settings(&settings)
                     .with_endian(endian)
@@ -183,7 +183,7 @@ fn leading_zeros() {
         }
 
         fn test_with_settings(value: f64, expected: [&str; 2], settings: &Settings) {
-            let value = OwnedEncodedDatum::from(value);
+            let value = Datum::<OwnedEncodedString>::from(value);
             for (expected, d) in expected.into_iter().zip([2, 1].into_iter()) {
                 assert_eq!(
                     &value
@@ -214,7 +214,7 @@ fn leading_zeros() {
 fn non_ascii_cc() {
     fn test(settings: &Settings, value: f64, expected: &str) {
         assert_eq!(
-            &OwnedEncodedDatum::from(value)
+            &Datum::<OwnedEncodedString>::from(value)
                 .display(Format::new(Type::CC(CC::A), 10, 2).unwrap())
                 .with_settings(settings)
                 .to_string(),
@@ -266,7 +266,7 @@ fn test_binhex(name: &str) {
                 assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon)));
                 let expected = tokens[2].as_string().unwrap();
                 let mut actual = SmallVec::<[u8; 16]>::new();
-                OwnedEncodedDatum::Number(value)
+                Datum::<OwnedEncodedString>::Number(value)
                     .display(format)
                     .with_endian(endian)
                     .write(&mut actual, UTF_8)
index 2104cbfa204b78ca22274388b59630f9279ef953..8025c6a6266f62d3501f7bfe1d52f8f05abb38e5 100644 (file)
@@ -172,7 +172,7 @@ impl Convert {
                     .with_compression(self.sys_options.compression)
                     .write_file(&dictionary, output)?;
                 for case in cases {
-                    output.write_case(case?.into_iter().map(|datum| datum.into_raw()))?;
+                    output.write_case(case?)?;
                 }
             }
         }
index f55df6c1412a89750dba33a649a0996bbef4fd75..75cc9e3235dae6d27f0e0a5de0c3df17aa8dc84a 100644 (file)
@@ -43,7 +43,6 @@
 //!   could also be a variable name or an arbitrary text string.
 
 use std::{
-    borrow::Borrow,
     collections::HashMap,
     fmt::{Debug, Display, Write},
     io::Read,
@@ -68,9 +67,7 @@ use thiserror::Error as ThisError;
 use tlo::parse_tlo;
 
 use crate::{
-    data::{
-        BorrowedRawString, Datum, EncodedDatum, EncodedString, OwnedEncodedDatum, OwnedRawString,
-    },
+    data::{AsEncodedString, Datum, OwnedEncodedString, OwnedRawString},
     dictionary::{VarType, Variable},
     format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
     settings::{Settings, Show},
@@ -1862,13 +1859,13 @@ impl Value {
             variable_label: variable.label.clone(),
         }))
     }
-    pub fn new_datum<R>(value: &EncodedDatum<EncodedString<R>>) -> Self
+    pub fn new_datum<B>(value: &Datum<B>) -> Self
     where
-        R: Borrow<BorrowedRawString>,
+        B: AsEncodedString,
     {
         match value {
-            EncodedDatum::Number(number) => Self::new_number(*number),
-            EncodedDatum::String(string) => Self::new_user_text(string.as_str()),
+            Datum::Number(number) => Self::new_number(*number),
+            Datum::String(string) => Self::new_user_text(string.as_encoded_string().as_str()),
         }
     }
     pub fn new_variable_value(variable: &Variable, value: &Datum<OwnedRawString>) -> Self {
@@ -2214,7 +2211,7 @@ impl Display for DisplayValue<'_> {
                     write!(
                         &mut buf,
                         "{}",
-                        OwnedEncodedDatum::Number(*value).display(format)
+                        Datum::<OwnedEncodedString>::Number(*value).display(format)
                     )
                     .unwrap();
                     write!(f, "{}", buf.trim_start_matches(' '))?;
index d256183bb27903463285274eddff7c8fc7a579a2..448f2471d570d81f0da47e5a53f5896c1b725025 100644 (file)
@@ -26,7 +26,7 @@ use std::{
 use crate::{
     calendar::date_time_to_pspp,
     crypto::EncryptedFile,
-    data::{Case, Datum, EncodedDatum, OwnedRawString},
+    data::{Case, Datum, OwnedRawString},
     dictionary::{
         DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues,
         MissingValuesError, MultipleResponseType, VarWidth, Variable,
@@ -1258,7 +1258,7 @@ impl Records {
                 .map(|v| {
                     let mut value = OwnedRawString::from(v.0.as_slice());
                     value.resize(variable.width.as_string_width().unwrap());
-                    EncodedDatum::String(value.with_encoding(encoding))
+                    Datum::String(value.with_encoding(encoding))
                 })
                 .collect::<Vec<_>>();
             match MissingValues::new(values, None) {
index 7a808e699657061a182cccb66e3794a846b029f8..8fa7d114109792d2b4b3d71adbf2edc785603c9b 100644 (file)
@@ -1,5 +1,5 @@
 use std::{
-    borrow::Cow,
+    borrow::{Borrow, Cow},
     collections::HashMap,
     fmt::Write as _,
     fs::File,
@@ -17,7 +17,7 @@ use itertools::zip_eq;
 use smallvec::SmallVec;
 
 use crate::{
-    data::{Datum, OwnedEncodedDatum, OwnedRawString},
+    data::{BorrowedRawString, Datum},
     dictionary::{
         Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType,
         ValueLabels, VarWidth,
@@ -674,7 +674,10 @@ impl BinWrite for Pad {
     }
 }
 
-impl BinWrite for Datum<OwnedRawString> {
+impl<B> BinWrite for Datum<B>
+where
+    B: Borrow<BorrowedRawString>,
+{
     type Args<'a> = ();
 
     fn write_options<W: Write + Seek>(
@@ -685,23 +688,7 @@ impl BinWrite for Datum<OwnedRawString> {
     ) -> binrw::BinResult<()> {
         match self {
             Datum::Number(number) => number.unwrap_or(f64::MIN).write_options(writer, endian, ()),
-            Datum::String(raw_string) => raw_string.0.write_options(writer, endian, ()),
-        }
-    }
-}
-
-impl BinWrite for OwnedEncodedDatum {
-    type Args<'a> = ();
-
-    fn write_options<W: Write + Seek>(
-        &self,
-        writer: &mut W,
-        endian: binrw::Endian,
-        _: (),
-    ) -> binrw::BinResult<()> {
-        match self {
-            Self::Number(number) => number.unwrap_or(f64::MIN).write_options(writer, endian, ()),
-            Self::String(raw_string) => raw_string.as_bytes().write_options(writer, endian, ()),
+            Datum::String(raw_string) => raw_string.borrow().0.write_options(writer, endian, ()),
         }
     }
 }
@@ -840,10 +827,13 @@ where
         Ok(())
     }
 
-    fn write_case_uncompressed<'c>(
+    fn write_case_uncompressed<'c, B>(
         &mut self,
-        case: impl Iterator<Item = Datum<OwnedRawString>>,
-    ) -> Result<(), BinError> {
+        case: impl Iterator<Item = Datum<B>>,
+    ) -> Result<(), BinError>
+    where
+        B: Borrow<BorrowedRawString>,
+    {
         for (var, datum) in zip_eq(self.case_vars, case) {
             match var {
                 CaseVar::Numeric => datum
@@ -852,7 +842,7 @@ where
                     .unwrap_or(f64::MIN)
                     .write_le(&mut self.inner)?,
                 CaseVar::String(encoding) => {
-                    let mut s = datum.as_string().unwrap().as_bytes();
+                    let mut s = datum.as_string().unwrap().borrow().as_bytes();
                     for segment in encoding {
                         let data;
                         (data, s) = s.split_at(segment.data_bytes);
@@ -863,10 +853,13 @@ where
         }
         Ok(())
     }
-    fn write_case_compressed<'c>(
+    fn write_case_compressed<'c, B>(
         &mut self,
-        case: impl Iterator<Item = Datum<OwnedRawString>>,
-    ) -> Result<(), BinError> {
+        case: impl Iterator<Item = Datum<B>>,
+    ) -> Result<(), BinError>
+    where
+        B: Borrow<BorrowedRawString>,
+    {
         for (var, datum) in zip_eq(self.case_vars, case) {
             match var {
                 CaseVar::Numeric => match datum.as_number().unwrap() {
@@ -885,7 +878,7 @@ where
                 },
 
                 CaseVar::String(encoding) => {
-                    let mut s = datum.as_string().unwrap().as_bytes();
+                    let mut s = datum.as_string().unwrap().borrow().as_bytes();
                     for segment in encoding {
                         let data;
                         (data, s) = s.split_at(segment.data_bytes);
@@ -989,10 +982,13 @@ where
     /// # Panic
     ///
     /// Panics if [try_finish](Self::try_finish) has been called.
-    pub fn write_case<'a>(
+    pub fn write_case<'a, B>(
         &mut self,
-        case: impl IntoIterator<Item = Datum<OwnedRawString>>,
-    ) -> Result<(), BinError> {
+        case: impl IntoIterator<Item = Datum<B>>,
+    ) -> Result<(), BinError>
+    where
+        B: Borrow<BorrowedRawString>,
+    {
         match self.inner.as_mut().unwrap() {
             Either::Left(inner) => {
                 let mut inner =