Add more write tests.
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 3 Aug 2025 16:36:19 +0000 (09:36 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 3 Aug 2025 16:36:19 +0000 (09:36 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/raw/records.rs
rust/pspp/src/sys/write.rs

index d42abfafe2ae7bad6e8e9cacd552bedb4f1b6094..509ef7f8190eeafa9248e87d9e33c5492f93eb1d 100644 (file)
@@ -177,7 +177,7 @@ impl VarWidth {
 
     /// Returns true if this is a very long string width, meaning wider than 255
     /// bytes, which was the limit for old versions of SPSS.
-    pub fn is_very_long(&self) -> bool {
+    pub fn is_very_long_string(&self) -> bool {
         match *self {
             VarWidth::Numeric => false,
             VarWidth::String(width) => width > 255,
@@ -194,7 +194,7 @@ impl VarWidth {
     /// a PSPP user.  Only very long string variables have more than one
     /// segment.
     pub fn n_segments(&self) -> usize {
-        if self.is_very_long() {
+        if self.is_very_long_string() {
             self.as_string_width().unwrap().div_ceil(Self::SEGMENT_SIZE)
         } else {
             1
@@ -219,7 +219,7 @@ impl VarWidth {
     /// by a PSPP user.
     pub fn segment_alloc_width(&self, segment_idx: usize) -> usize {
         debug_assert!(segment_idx < self.n_segments());
-        debug_assert!(self.is_very_long());
+        debug_assert!(self.is_very_long_string());
 
         if segment_idx < self.n_segments() - 1 {
             255
@@ -712,7 +712,10 @@ impl Dictionary {
                                 8,
                             )
                             .or_else(|_| {
-                                Identifier::new(format!("V{}", Display26Adic::new_uppercase(self.index)))
+                                Identifier::new(format!(
+                                    "V{}",
+                                    Display26Adic::new_uppercase(self.index)
+                                ))
                             })
                             .unwrap()
                     };
@@ -2006,6 +2009,8 @@ impl<'a> MissingValuesMut<'a> {
             Err(MissingValuesError::TooMany)
         } else if value.var_type() != VarType::from(self.width) {
             Err(MissingValuesError::MixedTypes)
+        } else if value == Datum::Number(None) {
+            Err(MissingValuesError::SystemMissing)
         } else if value.resize(self.width).is_err() {
             Err(MissingValuesError::TooWide)
         } else {
@@ -2082,6 +2087,7 @@ pub enum MissingValuesError {
     TooMany,
     TooWide,
     MixedTypes,
+    SystemMissing,
 }
 
 impl From<ResizeError> for MissingValuesError {
@@ -2182,7 +2188,7 @@ impl MissingValues {
     }
 }
 
-#[derive(Copy, Clone, Debug, Serialize)]
+#[derive(Copy, Clone, Debug, Serialize, PartialEq)]
 pub enum MissingValueRange {
     In { low: f64, high: f64 },
     From { low: f64 },
index 63fca6744c6661cf3c6a78c67c4e3b598a1c0099..8609590e6cca38d3416ea1e3318dfbc8c49eee8f 100644 (file)
@@ -1271,7 +1271,9 @@ impl Records {
                 Err(MissingValuesError::TooWide) => {
                     warn(Error::MissingValuesTooWide(record.var_name.clone()))
                 }
-                Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => {
+                Err(MissingValuesError::TooMany)
+                | Err(MissingValuesError::MixedTypes)
+                | Err(MissingValuesError::SystemMissing) => {
                     unreachable!()
                 }
             }
index 9b88153a8fa00e246284932b85394cd6391d5a8f..7dce91626cb3cfe207e38993ed983d50f40b4000 100644 (file)
@@ -616,7 +616,7 @@ impl Record {
     {
         let rec_type: u32 = endian.parse(read_bytes(reader)?);
         match rec_type {
-            2 => Ok(Some(VariableRecord::read(reader, endian, warn)?)),
+            2 => Ok(Some(Record::Variable(VariableRecord::read(reader, endian, warn)?))),
             3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
             6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
             7 => Extension::read(reader, endian, var_types, warn),
index d21a8383ae762c47a69d11d9922bbb1c88da7a2d..2a76362e677e9ff5c08855f71d38d08dc70d2548 100644 (file)
@@ -542,11 +542,7 @@ pub struct RawVariableRecord {
 
 impl VariableRecord<ByteString> {
     /// Reads a variable record from `r`.
-    pub fn read<R>(
-        r: &mut R,
-        endian: Endian,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Record, Error>
+    pub fn read<R>(r: &mut R, endian: Endian, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error>
     where
         R: Read + Seek,
     {
@@ -595,7 +591,7 @@ impl VariableRecord<ByteString> {
 
         let end_offset = r.stream_position()?;
 
-        Ok(Record::Variable(VariableRecord {
+        Ok(Self {
             offsets: start_offset..end_offset,
             width,
             name: raw_record.name.into(),
@@ -603,7 +599,7 @@ impl VariableRecord<ByteString> {
             write_format: raw_record.write_format,
             missing_values,
             label,
-        }))
+        })
     }
 
     /// Decodes a variable record using `decoder`.
index b2a8da96b5125bb535be7a0cbd2be19967e3d515..bd880124427583ae3696b4d13fcd6c7da8958d7d 100644 (file)
@@ -327,7 +327,13 @@ where
                     )
                         .write_le(self.writer)?;
                 }
-                variable.missing_values().values().write_le(self.writer)?;
+                let pad = variable
+                    .width
+                    .as_string_width()
+                    .map_or(0, |width| 8 - width);
+                for value in variable.missing_values().values() {
+                    (value, Zeros(pad)).write_le(self.writer)?;
+                }
             }
             write_variable_continuation_records(&mut self.writer, seg0_width)?;
 
@@ -589,7 +595,7 @@ where
     fn write_very_long_strings(&mut self) -> Result<(), BinError> {
         let mut s = String::new();
         for (index, variable) in self.dictionary.variables.iter().enumerate() {
-            if variable.width.is_very_long() {
+            if variable.width.is_very_long_string() {
                 let width = variable.width.as_string_width().unwrap();
                 write!(&mut s, "{}={width:05}\0\t", &self.short_names[index][0],).unwrap();
             }
@@ -1223,20 +1229,24 @@ where
 mod tests {
     use std::io::Cursor;
 
-    use binrw::BinRead;
+    use binrw::{BinRead, Endian};
     use encoding_rs::UTF_8;
+    use hexplay::HexView;
     use itertools::Itertools;
 
     use crate::{
-        dictionary::{Dictionary, VarWidth, Variable},
+        data::{ByteString, Datum},
+        dictionary::{Dictionary, MissingValueRange, VarWidth, Variable},
         identifier::Identifier,
         sys::{
-            raw::records::{RawHeader, RawVariableRecord},
+            raw::records::{RawHeader, RawVariableRecord, VariableRecord},
             write::DictionaryWriter,
             WriteOptions,
         },
     };
 
+    /// Checks that the header record has the right nominal case size and weight
+    /// index, even with long and very long string variables.
     #[test]
     fn header() {
         for variables in [
@@ -1286,8 +1296,11 @@ mod tests {
         }
     }
 
+    /// Checks that variable records are followed by the right number of
+    /// continuation records, and that very long string variables have the right
+    /// number of segment variables.
     #[test]
-    fn variables() {
+    fn variables_widths() {
         let variables = [
             (VarWidth::Numeric, vec![0]),
             (VarWidth::String(1), vec![1]),
@@ -1355,4 +1368,130 @@ mod tests {
             }
         }
     }
+
+    /// Checks that missing values are written correctly.
+    #[test]
+    fn variables_missing_values() {
+        let test_cases = [
+            (VarWidth::Numeric, vec![Datum::Number(Some(1.0))], None),
+            (
+                VarWidth::Numeric,
+                vec![Datum::Number(Some(1.0)), Datum::Number(Some(2.0))],
+                None,
+            ),
+            (
+                VarWidth::Numeric,
+                vec![
+                    Datum::Number(Some(1.0)),
+                    Datum::Number(Some(2.0)),
+                    Datum::Number(Some(3.0)),
+                ],
+                None,
+            ),
+            (
+                VarWidth::Numeric,
+                vec![],
+                Some(MissingValueRange::In {
+                    low: 10.0,
+                    high: 20.0,
+                }),
+            ),
+            (
+                VarWidth::Numeric,
+                vec![],
+                Some(MissingValueRange::From { low: 100.0 }),
+            ),
+            (
+                VarWidth::Numeric,
+                vec![],
+                Some(MissingValueRange::To { high: 200.0 }),
+            ),
+            (
+                VarWidth::Numeric,
+                vec![Datum::Number(Some(1.0))],
+                Some(MissingValueRange::In {
+                    low: 10.0,
+                    high: 20.0,
+                }),
+            ),
+            (
+                VarWidth::Numeric,
+                vec![Datum::Number(Some(1.0))],
+                Some(MissingValueRange::From { low: 100.0 }),
+            ),
+            (
+                VarWidth::Numeric,
+                vec![Datum::Number(Some(1.0))],
+                Some(MissingValueRange::To { high: 200.0 }),
+            ),
+            (
+                VarWidth::String(5),
+                vec![Datum::String(ByteString::from("abcde"))],
+                None,
+            ),
+            (
+                VarWidth::String(5),
+                vec![
+                    Datum::String(ByteString::from("abcde")),
+                    Datum::String(ByteString::from("qwioe")),
+                ],
+                None,
+            ),
+            (
+                VarWidth::String(5),
+                vec![
+                    Datum::String(ByteString::from("abcde")),
+                    Datum::String(ByteString::from("qwioe")),
+                    Datum::String(ByteString::from("jksld")),
+                ],
+                None,
+            ),
+            (
+                VarWidth::String(9),
+                vec![
+                    Datum::String(ByteString::from("abcdeasd")),
+                    Datum::String(ByteString::from("qwioejdf")),
+                    Datum::String(ByteString::from("jksldiwe")),
+                ],
+                None,
+            ),
+        ];
+
+        for (width, values, range) in test_cases {
+            let mut dictionary = Dictionary::new(UTF_8);
+            let mut variable = Variable::new(Identifier::new("var").unwrap(), width, UTF_8);
+            variable
+                .missing_values_mut()
+                .add_values(values.iter().map(|value| value.as_encoded(UTF_8).cloned()))
+                .unwrap();
+            if let Some(range) = &range {
+                variable
+                    .missing_values_mut()
+                    .add_range(range.clone())
+                    .unwrap();
+            }
+            dictionary.add_var(variable).unwrap();
+
+            let mut raw = Vec::new();
+            DictionaryWriter::new(
+                &WriteOptions::reproducible(None),
+                &mut Cursor::new(&mut raw),
+                &dictionary,
+            )
+            .write_variables()
+            .unwrap();
+            println!("{}", HexView::new(&raw));
+
+            let mut cursor = Cursor::new(&raw[4..]);
+            let record =
+                VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
+            dbg!(&record);
+            if !width.is_long_string() {
+                assert_eq!(&record.missing_values.values, &values);
+            } else {
+                assert_eq!(&record.missing_values.values, &vec![]);
+            }
+            assert_eq!(&record.missing_values.range, &range);
+        }
+    }
 }