more tests
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 4 Aug 2025 17:35:41 +0000 (10:35 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 4 Aug 2025 17:35:41 +0000 (10:35 -0700)
rust/pspp/src/data.rs
rust/pspp/src/dictionary.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/raw/records.rs
rust/pspp/src/sys/write.rs

index 70f8b733320362c9a3b266a1b9b9984be1b3bd38..c86e20a09074be521f27904078c9a28055751dc6 100644 (file)
@@ -202,6 +202,30 @@ impl Debug for ByteCow<'_> {
     }
 }
 
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct ByteStrArray<const N: usize>(pub [u8; N]);
+
+impl<const N: usize> Serialize for ByteStrArray<N> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        ByteStr(&self.0).serialize(serializer)
+    }
+}
+
+impl<const N: usize> RawString for ByteStrArray<N> {
+    fn raw_string_bytes(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+impl<const N: usize> Debug for ByteStrArray<N> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        ByteStr(&self.0).fmt(f)
+    }
+}
+
 #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct ByteString(pub Vec<u8>);
 
index 3ad1bf5033751d4d0e9cfc7fa7e1b5e96d3f19da..5a1d8fa772b662f6c2291d56fd9e79202239563c 100644 (file)
@@ -19,7 +19,6 @@
 use core::str;
 use std::{
     borrow::Cow,
-    cmp::Ordering,
     collections::{btree_set, BTreeMap, BTreeSet, HashMap, HashSet},
     fmt::{Debug, Display, Formatter, Result as FmtResult},
     hash::{DefaultHasher, Hash, Hasher},
@@ -91,22 +90,12 @@ impl Display for VarType {
 }
 
 /// [VarType], plus a width for [VarType::String].
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
 pub enum VarWidth {
     Numeric,
     String(u16),
 }
 
-impl PartialOrd for VarWidth {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        match (self, other) {
-            (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
-            (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
-            _ => None,
-        }
-    }
-}
-
 impl VarWidth {
     pub const MAX_STRING: u16 = 32767;
 
@@ -2068,7 +2057,7 @@ impl<'a> MissingValuesMut<'a> {
             Err(MissingValuesError::MixedTypes)
         } else if value == Datum::Number(None) {
             Err(MissingValuesError::SystemMissing)
-        } else if value.resize(self.width).is_err() {
+        } else if value.resize(self.width.min(VarWidth::String(8))).is_err() {
             Err(MissingValuesError::TooWide)
         } else {
             value.trim_end();
index 3aa3c0d7066d19f2e94910cb054db3ed5971313d..3b7334169b13d592c0b7883a87e520c9abf41e60 100644 (file)
@@ -533,6 +533,19 @@ pub enum Record {
     ),
 }
 
+impl Record {
+    pub fn as_long_string_missing_values(
+        &self,
+    ) -> Option<&LongStringMissingValueRecord<ByteString>> {
+        match self {
+            Record::LongStringMissingValues(long_string_missing_value_record) => {
+                Some(long_string_missing_value_record)
+            }
+            _ => None,
+        }
+    }
+}
+
 /// A [Record] that has been decoded to a more usable form.
 ///
 /// Some records can be understand raw, but others need to have strings decoded
@@ -1594,34 +1607,6 @@ impl Debug for UntypedDatum {
     }
 }
 
-/// An 8-byte raw string whose type and encoding are unknown.
-#[derive(Copy, Clone)]
-pub struct RawStrArray<const N: usize>(
-    /// Content.
-    pub [u8; N],
-);
-
-impl<const N: usize> From<[u8; N]> for RawStrArray<N> {
-    fn from(source: [u8; N]) -> Self {
-        Self(source)
-    }
-}
-
-impl<const N: usize> Debug for RawStrArray<N> {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{:?}", ByteStr(&self.0))
-    }
-}
-
-impl<const N: usize> Serialize for RawStrArray<N> {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        ByteStr(&self.0).serialize(serializer)
-    }
-}
-
 fn skip_bytes<R: Read>(r: &mut R, mut n: usize) -> Result<(), IoError> {
     thread_local! {
         static BUF: RefCell<[u8; 256]> = RefCell::new([0u8; 256]);
index 5411e03eb5e3527e64e54e82aa8eae6f788b0806..32fceed38bfff03238acc36342f87562a6721961 100644 (file)
@@ -12,7 +12,7 @@ use std::{
 };
 
 use crate::{
-    data::{ByteString, Datum},
+    data::{ByteStrArray, ByteString, Datum},
     dictionary::{
         Alignment, Attributes, CategoryLabels, Measure, MissingValueRange, MissingValues,
         MissingValuesError, VarType, VarWidth,
@@ -23,7 +23,7 @@ use crate::{
     sys::{
         raw::{
             read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum,
-            RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails,
+            RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails,
         },
         serialize_endian, ProductVersion,
     },
@@ -854,7 +854,7 @@ where
 }
 
 /// One line in a document.
-pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
+pub type RawDocumentLine = ByteStrArray<DOC_LINE_LEN>;
 
 /// Length of a line in a document.  Document lines are fixed-length and
 /// padded on the right with spaces.
@@ -885,7 +885,7 @@ impl DocumentRecord<RawDocumentLine> {
             let offsets = start_offset..start_offset.saturating_add((n * DOC_LINE_LEN) as u64);
             let mut lines = Vec::with_capacity(n);
             for _ in 0..n {
-                lines.push(RawStrArray(
+                lines.push(ByteStrArray(
                     read_bytes(r).map_err(|e| Error::new(Some(offsets.clone()), e.into()))?,
                 ));
             }
@@ -1569,7 +1569,7 @@ where
     pub var_name: N,
 
     /// Missing values.
-    pub missing_values: Vec<RawStrArray<8>>,
+    pub missing_values: Vec<ByteStrArray<8>>,
 }
 
 impl LongStringMissingValues<ByteString> {
@@ -1636,7 +1636,7 @@ impl LongStringMissingValueRecord<ByteString> {
                 }
 
                 let value: [u8; 8] = read_bytes(&mut input)?;
-                missing_values.push(RawStrArray(value));
+                missing_values.push(ByteStrArray(value));
             }
             missing_value_set.push(LongStringMissingValues {
                 var_name,
@@ -2273,7 +2273,7 @@ impl Extension {
         }
     }
 
-    pub(super) fn read<R: Read + Seek>(
+    pub fn read<R: Read + Seek>(
         r: &mut R,
         endian: Endian,
         var_types: &VarTypes,
index e61456d9e6389852d8f377ddbd03580376a4d963..2297a828eb693ceecd29524fcb37ba46789703a1 100644 (file)
@@ -1208,18 +1208,21 @@ mod tests {
     use binrw::{BinRead, Endian};
     use encoding_rs::UTF_8;
     use itertools::Itertools;
+    use unicase::UniCase;
 
     use crate::{
-        data::{ByteString, Datum},
+        data::{ByteString, Datum, RawString},
         dictionary::{
-            CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary,
-            MissingValueRange, MultipleResponseType, VarWidth, Variable,
+            Alignment, CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet,
+            Dictionary, Measure, MissingValueRange, MultipleResponseType, VarWidth, Variable,
         },
         identifier::{ByIdentifier, Identifier},
         sys::{
             raw::{
-                records::{DocumentRecord, RawHeader, RawVariableRecord, VariableRecord},
-                Decoder,
+                records::{
+                    DocumentRecord, Extension, RawHeader, RawVariableRecord, VariableRecord,
+                },
+                Decoder, VarTypes,
             },
             write::DictionaryWriter,
             ReadOptions, WriteOptions,
@@ -1436,6 +1439,19 @@ mod tests {
                 ],
                 None,
             ),
+            (
+                VarWidth::String(10),
+                vec![
+                    Datum::String(ByteString::from("abcdeasd")),
+                    Datum::String(ByteString::from("qwioejdf")),
+                ],
+                None,
+            ),
+            (
+                VarWidth::String(11),
+                vec![Datum::String(ByteString::from("abcdeasd"))],
+                None,
+            ),
         ];
 
         for (width, values, range) in test_cases {
@@ -1453,16 +1469,17 @@ mod tests {
             }
             dictionary.add_var(variable).unwrap();
 
-            let mut raw = Vec::new();
+            // Write and check variable records.
+            let mut raw_variables = Vec::new();
             DictionaryWriter::new(
                 &WriteOptions::reproducible(None),
-                &mut Cursor::new(&mut raw),
+                &mut Cursor::new(&mut raw_variables),
                 &dictionary,
             )
             .write_variables()
             .unwrap();
 
-            let mut cursor = Cursor::new(&raw[4..]);
+            let mut cursor = Cursor::new(&raw_variables[4..]);
             let record =
                 VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
             if !width.is_long_string() {
@@ -1471,6 +1488,47 @@ mod tests {
                 assert_eq!(&record.missing_values.values, &vec![]);
             }
             assert_eq!(&record.missing_values.range, &range);
+
+            // Write and check long string missing value record.
+            let mut raw_long_missing = Vec::new();
+            DictionaryWriter::new(
+                &WriteOptions::reproducible(None),
+                &mut Cursor::new(&mut raw_long_missing),
+                &dictionary,
+            )
+            .write_long_string_missing_values()
+            .unwrap();
+
+            if width.is_long_string() {
+                let mut cursor = Cursor::new(&raw_long_missing[4..]);
+                let record = Extension::read(
+                    &mut cursor,
+                    Endian::Little,
+                    &VarTypes::new(),
+                    &mut |_| panic!(),
+                )
+                .unwrap()
+                .unwrap()
+                .as_long_string_missing_values()
+                .unwrap()
+                .clone()
+                .decode(&mut Decoder::new(UTF_8, |_| panic!()));
+
+                assert_eq!(record.values.len(), 1);
+                assert_eq!(&record.values[0].var_name.0, &UniCase::new("var"));
+                let actual = record.values[0]
+                    .missing_values
+                    .iter()
+                    .map(|v| v.raw_string_bytes());
+                let expected = values
+                    .iter()
+                    .map(|v| v.as_string().unwrap().raw_string_bytes());
+                for (actual, expected) in actual.zip_eq(expected) {
+                    assert_eq!(actual, expected);
+                }
+            } else {
+                assert_eq!(raw_long_missing.len(), 0);
+            }
         }
     }
 
@@ -1742,4 +1800,81 @@ $e=E 11 6 choice 0  n o p
 "
         );
     }
+
+    #[test]
+    fn variable_display_parameters() {
+        let variables = [
+            (None, Alignment::Left, 10),
+            (Some(Measure::Nominal), Alignment::Right, 12),
+            (Some(Measure::Ordinal), Alignment::Center, 14),
+            (Some(Measure::Scale), Alignment::Right, 16),
+        ];
+        let mut expected = Dictionary::new(UTF_8);
+        for (index, (measure, alignment, display_width)) in variables.into_iter().enumerate() {
+            let mut variable = Variable::new(
+                Identifier::new(format!("v{index}")).unwrap(),
+                VarWidth::Numeric,
+                UTF_8,
+            );
+            variable.measure = measure;
+            variable.alignment = alignment;
+            variable.display_width = display_width;
+            expected.add_var(variable).unwrap();
+        }
+
+        let raw = WriteOptions::new()
+            .write_writer(&expected, Cursor::new(Vec::new()))
+            .unwrap()
+            .finish()
+            .unwrap()
+            .unwrap()
+            .into_inner();
+        let actual = ReadOptions::new(|_| panic!())
+            .open_reader(Cursor::new(raw))
+            .unwrap()
+            .dictionary;
+
+        fn display_parameters(
+            dictionary: &Dictionary,
+        ) -> impl Iterator<Item = (Option<Measure>, Alignment, u32)> {
+            dictionary
+                .variables
+                .iter()
+                .map(|variable| (variable.measure, variable.alignment, variable.display_width))
+        }
+        assert!(display_parameters(&expected).eq(display_parameters(&actual)));
+    }
+
+    #[test]
+    fn long_variable_names() {
+        let long_name = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@$";
+
+        let mut expected = Dictionary::new(UTF_8);
+        for name in (1..=64).map(|len| long_name[..len].to_string()) {
+            expected
+                .add_var(Variable::new(
+                    Identifier::new(name).unwrap(),
+                    VarWidth::Numeric,
+                    UTF_8,
+                ))
+                .unwrap();
+        }
+
+        let raw = WriteOptions::new()
+            .write_writer(&expected, Cursor::new(Vec::new()))
+            .unwrap()
+            .finish()
+            .unwrap()
+            .unwrap()
+            .into_inner();
+        let actual = ReadOptions::new(|_| panic!())
+            .open_reader(Cursor::new(raw))
+            .unwrap()
+            .dictionary;
+
+        fn names(dictionary: &Dictionary) -> impl Iterator<Item = &Identifier> {
+            dictionary.variables.iter().map(|variable| &variable.name)
+        }
+        assert!(names(&expected).eq(names(&actual)));
+    }
 }