From 4b3d91f780fb836a70f090bd4f7bc559fb9458ba Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 4 Aug 2025 10:35:41 -0700 Subject: [PATCH] more tests --- rust/pspp/src/data.rs | 24 +++++ rust/pspp/src/dictionary.rs | 15 +-- rust/pspp/src/sys/raw.rs | 41 +++------ rust/pspp/src/sys/raw/records.rs | 14 +-- rust/pspp/src/sys/write.rs | 151 +++++++++++++++++++++++++++++-- 5 files changed, 189 insertions(+), 56 deletions(-) diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index 70f8b73332..c86e20a090 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -202,6 +202,30 @@ impl Debug for ByteCow<'_> { } } +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ByteStrArray(pub [u8; N]); + +impl Serialize for ByteStrArray { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + ByteStr(&self.0).serialize(serializer) + } +} + +impl RawString for ByteStrArray { + fn raw_string_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl Debug for ByteStrArray { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + ByteStr(&self.0).fmt(f) + } +} + #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ByteString(pub Vec); diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 3ad1bf5033..5a1d8fa772 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -19,7 +19,6 @@ use core::str; use std::{ borrow::Cow, - cmp::Ordering, collections::{btree_set, BTreeMap, BTreeSet, HashMap, HashSet}, fmt::{Debug, Display, Formatter, Result as FmtResult}, hash::{DefaultHasher, Hash, Hasher}, @@ -91,22 +90,12 @@ impl Display for VarType { } /// [VarType], plus a width for [VarType::String]. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)] pub enum VarWidth { Numeric, String(u16), } -impl PartialOrd for VarWidth { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal), - (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)), - _ => None, - } - } -} - impl VarWidth { pub const MAX_STRING: u16 = 32767; @@ -2068,7 +2057,7 @@ impl<'a> MissingValuesMut<'a> { Err(MissingValuesError::MixedTypes) } else if value == Datum::Number(None) { Err(MissingValuesError::SystemMissing) - } else if value.resize(self.width).is_err() { + } else if value.resize(self.width.min(VarWidth::String(8))).is_err() { Err(MissingValuesError::TooWide) } else { value.trim_end(); diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 3aa3c0d706..3b7334169b 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -533,6 +533,19 @@ pub enum Record { ), } +impl Record { + pub fn as_long_string_missing_values( + &self, + ) -> Option<&LongStringMissingValueRecord> { + match self { + Record::LongStringMissingValues(long_string_missing_value_record) => { + Some(long_string_missing_value_record) + } + _ => None, + } + } +} + /// A [Record] that has been decoded to a more usable form. /// /// Some records can be understand raw, but others need to have strings decoded @@ -1594,34 +1607,6 @@ impl Debug for UntypedDatum { } } -/// An 8-byte raw string whose type and encoding are unknown. -#[derive(Copy, Clone)] -pub struct RawStrArray( - /// Content. - pub [u8; N], -); - -impl From<[u8; N]> for RawStrArray { - fn from(source: [u8; N]) -> Self { - Self(source) - } -} - -impl Debug for RawStrArray { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "{:?}", ByteStr(&self.0)) - } -} - -impl Serialize for RawStrArray { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - ByteStr(&self.0).serialize(serializer) - } -} - fn skip_bytes(r: &mut R, mut n: usize) -> Result<(), IoError> { thread_local! { static BUF: RefCell<[u8; 256]> = RefCell::new([0u8; 256]); diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index 5411e03eb5..32fceed38b 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -12,7 +12,7 @@ use std::{ }; use crate::{ - data::{ByteString, Datum}, + data::{ByteStrArray, ByteString, Datum}, dictionary::{ Alignment, Attributes, CategoryLabels, Measure, MissingValueRange, MissingValues, MissingValuesError, VarType, VarWidth, @@ -23,7 +23,7 @@ use crate::{ sys::{ raw::{ read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum, - RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails, + RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails, }, serialize_endian, ProductVersion, }, @@ -854,7 +854,7 @@ where } /// One line in a document. -pub type RawDocumentLine = RawStrArray; +pub type RawDocumentLine = ByteStrArray; /// Length of a line in a document. Document lines are fixed-length and /// padded on the right with spaces. @@ -885,7 +885,7 @@ impl DocumentRecord { let offsets = start_offset..start_offset.saturating_add((n * DOC_LINE_LEN) as u64); let mut lines = Vec::with_capacity(n); for _ in 0..n { - lines.push(RawStrArray( + lines.push(ByteStrArray( read_bytes(r).map_err(|e| Error::new(Some(offsets.clone()), e.into()))?, )); } @@ -1569,7 +1569,7 @@ where pub var_name: N, /// Missing values. - pub missing_values: Vec>, + pub missing_values: Vec>, } impl LongStringMissingValues { @@ -1636,7 +1636,7 @@ impl LongStringMissingValueRecord { } let value: [u8; 8] = read_bytes(&mut input)?; - missing_values.push(RawStrArray(value)); + missing_values.push(ByteStrArray(value)); } missing_value_set.push(LongStringMissingValues { var_name, @@ -2273,7 +2273,7 @@ impl Extension { } } - pub(super) fn read( + pub fn read( r: &mut R, endian: Endian, var_types: &VarTypes, diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index e61456d9e6..2297a828eb 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -1208,18 +1208,21 @@ mod tests { use binrw::{BinRead, Endian}; use encoding_rs::UTF_8; use itertools::Itertools; + use unicase::UniCase; use crate::{ - data::{ByteString, Datum}, + data::{ByteString, Datum, RawString}, dictionary::{ - CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, - MissingValueRange, MultipleResponseType, VarWidth, Variable, + Alignment, CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, + Dictionary, Measure, MissingValueRange, MultipleResponseType, VarWidth, Variable, }, identifier::{ByIdentifier, Identifier}, sys::{ raw::{ - records::{DocumentRecord, RawHeader, RawVariableRecord, VariableRecord}, - Decoder, + records::{ + DocumentRecord, Extension, RawHeader, RawVariableRecord, VariableRecord, + }, + Decoder, VarTypes, }, write::DictionaryWriter, ReadOptions, WriteOptions, @@ -1436,6 +1439,19 @@ mod tests { ], None, ), + ( + VarWidth::String(10), + vec![ + Datum::String(ByteString::from("abcdeasd")), + Datum::String(ByteString::from("qwioejdf")), + ], + None, + ), + ( + VarWidth::String(11), + vec![Datum::String(ByteString::from("abcdeasd"))], + None, + ), ]; for (width, values, range) in test_cases { @@ -1453,16 +1469,17 @@ mod tests { } dictionary.add_var(variable).unwrap(); - let mut raw = Vec::new(); + // Write and check variable records. + let mut raw_variables = Vec::new(); DictionaryWriter::new( &WriteOptions::reproducible(None), - &mut Cursor::new(&mut raw), + &mut Cursor::new(&mut raw_variables), &dictionary, ) .write_variables() .unwrap(); - let mut cursor = Cursor::new(&raw[4..]); + let mut cursor = Cursor::new(&raw_variables[4..]); let record = VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap(); if !width.is_long_string() { @@ -1471,6 +1488,47 @@ mod tests { assert_eq!(&record.missing_values.values, &vec![]); } assert_eq!(&record.missing_values.range, &range); + + // Write and check long string missing value record. + let mut raw_long_missing = Vec::new(); + DictionaryWriter::new( + &WriteOptions::reproducible(None), + &mut Cursor::new(&mut raw_long_missing), + &dictionary, + ) + .write_long_string_missing_values() + .unwrap(); + + if width.is_long_string() { + let mut cursor = Cursor::new(&raw_long_missing[4..]); + let record = Extension::read( + &mut cursor, + Endian::Little, + &VarTypes::new(), + &mut |_| panic!(), + ) + .unwrap() + .unwrap() + .as_long_string_missing_values() + .unwrap() + .clone() + .decode(&mut Decoder::new(UTF_8, |_| panic!())); + + assert_eq!(record.values.len(), 1); + assert_eq!(&record.values[0].var_name.0, &UniCase::new("var")); + let actual = record.values[0] + .missing_values + .iter() + .map(|v| v.raw_string_bytes()); + let expected = values + .iter() + .map(|v| v.as_string().unwrap().raw_string_bytes()); + for (actual, expected) in actual.zip_eq(expected) { + assert_eq!(actual, expected); + } + } else { + assert_eq!(raw_long_missing.len(), 0); + } } } @@ -1742,4 +1800,81 @@ $e=E 11 6 choice 0 n o p " ); } + + #[test] + fn variable_display_parameters() { + let variables = [ + (None, Alignment::Left, 10), + (Some(Measure::Nominal), Alignment::Right, 12), + (Some(Measure::Ordinal), Alignment::Center, 14), + (Some(Measure::Scale), Alignment::Right, 16), + ]; + let mut expected = Dictionary::new(UTF_8); + for (index, (measure, alignment, display_width)) in variables.into_iter().enumerate() { + let mut variable = Variable::new( + Identifier::new(format!("v{index}")).unwrap(), + VarWidth::Numeric, + UTF_8, + ); + variable.measure = measure; + variable.alignment = alignment; + variable.display_width = display_width; + expected.add_var(variable).unwrap(); + } + + let raw = WriteOptions::new() + .write_writer(&expected, Cursor::new(Vec::new())) + .unwrap() + .finish() + .unwrap() + .unwrap() + .into_inner(); + let actual = ReadOptions::new(|_| panic!()) + .open_reader(Cursor::new(raw)) + .unwrap() + .dictionary; + + fn display_parameters( + dictionary: &Dictionary, + ) -> impl Iterator, Alignment, u32)> { + dictionary + .variables + .iter() + .map(|variable| (variable.measure, variable.alignment, variable.display_width)) + } + assert!(display_parameters(&expected).eq(display_parameters(&actual))); + } + + #[test] + fn long_variable_names() { + let long_name = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@$"; + + let mut expected = Dictionary::new(UTF_8); + for name in (1..=64).map(|len| long_name[..len].to_string()) { + expected + .add_var(Variable::new( + Identifier::new(name).unwrap(), + VarWidth::Numeric, + UTF_8, + )) + .unwrap(); + } + + let raw = WriteOptions::new() + .write_writer(&expected, Cursor::new(Vec::new())) + .unwrap() + .finish() + .unwrap() + .unwrap() + .into_inner(); + let actual = ReadOptions::new(|_| panic!()) + .open_reader(Cursor::new(raw)) + .unwrap() + .dictionary; + + fn names(dictionary: &Dictionary) -> impl Iterator { + dictionary.variables.iter().map(|variable| &variable.name) + } + assert!(names(&expected).eq(names(&actual))); + } } -- 2.30.2