test writing value labels
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 3 Aug 2025 22:51:57 +0000 (15:51 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 3 Aug 2025 22:51:57 +0000 (15:51 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/sys/write.rs

index 1cd3cbfa27d03a6f13edcbbe1d02be7d73efd7e2..a07db8dd3964a0108fd57bad08a98af34d7cab40 100644 (file)
@@ -199,9 +199,10 @@ impl VarWidth {
     }
 
     /// Returns the number of 8-byte chunks used for writing case data for a
-    /// variable with this width.  This concept does not apply to very long
-    /// string variables, which are divided into [multiple
-    /// segments](Self::n_segments) (which in turn are divided into chunks).
+    /// variable with this width.  Very long string variables (wider than 255
+    /// bytes) cannot directly be divided into chunks: they must first be
+    /// divided into multiple [segments](Self::segments), which can then be
+    /// divided into chunks.
     pub fn n_chunks(&self) -> Option<usize> {
         match *self {
             VarWidth::Numeric => Some(1),
index 5444b374d807c297a7813493cebb44765799f393..cb05364e3c487b28945a2cead2110dddca16e27f 100644 (file)
@@ -410,14 +410,22 @@ where
             // Label record.
             (3u32, value_labels.0.len() as u32).write_le(self.writer)?;
             for (datum, label) in &value_labels.0 {
+                let datum_padding = datum.width().as_string_width().map_or(0, |width| 8 - width);
                 let label = &*self.dictionary.encoding().encode(&label).0;
                 let label = if label.len() > 255 {
                     &label[..255]
                 } else {
                     label
                 };
-                let padding = (1 + label.len()).next_multiple_of(8) - (1 + label.len());
-                (datum, label.len() as u8, label, Zeros(padding)).write_le(self.writer)?;
+                let label_padding = (1 + label.len()).next_multiple_of(8) - (1 + label.len());
+                (
+                    datum,
+                    Zeros(datum_padding),
+                    label.len() as u8,
+                    label,
+                    Zeros(label_padding),
+                )
+                    .write_le(self.writer)?;
             }
 
             // Variable record.
@@ -610,7 +618,7 @@ where
         let mut cursor = Cursor::new(&mut body);
         for variable in &self.dictionary.variables {
             if variable.value_labels.is_empty() || !variable.width.is_long_string() {
-                break;
+                continue;
             }
             let name = self.dictionary.encoding().encode(&variable.name).0;
             (
@@ -1198,7 +1206,6 @@ mod tests {
 
     use binrw::{BinRead, Endian};
     use encoding_rs::UTF_8;
-    use hexplay::HexView;
     use itertools::Itertools;
 
     use crate::{
@@ -1206,9 +1213,9 @@ mod tests {
         dictionary::{Dictionary, MissingValueRange, VarWidth, Variable},
         identifier::Identifier,
         sys::{
-            raw::records::{RawHeader, RawVariableRecord, ValueLabelRecord, VariableRecord},
+            raw::records::{RawHeader, RawVariableRecord, VariableRecord},
             write::DictionaryWriter,
-            WriteOptions,
+            ReadOptions, WriteOptions,
         },
     };
 
@@ -1463,37 +1470,87 @@ mod tests {
     /// Checks that value labels are written correctly.
     #[test]
     fn variables_value_labels() {
-        let test_cases = [(
-            VarWidth::Numeric,
-            1,
-            vec![(Datum::Number(Some(1.0)), "One")],
-        )];
+        let variables = [
+            (VarWidth::Numeric, vec![(Datum::Number(Some(1.0)), "One")]),
+            (
+                VarWidth::Numeric,
+                vec![
+                    (Datum::Number(Some(1.0)), "One"),
+                    (Datum::Number(Some(2.0)), "Two"),
+                ],
+            ),
+            (
+                VarWidth::Numeric,
+                vec![
+                    (Datum::Number(Some(1.0)), "One"),
+                    (Datum::Number(Some(2.0)), "Two"),
+                    (Datum::Number(Some(3.0)), "Three"),
+                ],
+            ),
+            (
+                VarWidth::String(4),
+                vec![(Datum::String(ByteString::from("abcd")), "One")],
+            ),
+            (
+                VarWidth::String(8),
+                vec![(
+                    Datum::String(ByteString::from("abcdefgh")),
+                    "Longer variable label",
+                )],
+            ),
+            (
+                VarWidth::String(9),
+                vec![(
+                    Datum::String(ByteString::from("abcdefghi")),
+                    "Variable label for 9-byte value",
+                )],
+            ),
+            (
+                VarWidth::String(300),
+                vec![(
+                    Datum::String(ByteString::from(vec![b'x'; 300])),
+                    "Variable label for 300-byte value",
+                )],
+            ),
+        ];
 
-        for (width, n_chunks, value_labels) in test_cases {
+        for test_case in variables.iter().combinations_with_replacement(3) {
             let mut dictionary = Dictionary::new(UTF_8);
-            let mut variable = Variable::new(Identifier::new("var").unwrap(), width, UTF_8);
-            for (value, label) in &value_labels {
-                variable
-                    .value_labels
-                    .insert(value.clone(), (*label).into())
-                    .unwrap();
+            for (index, (width, value_labels)) in test_case.iter().enumerate() {
+                let mut variable = Variable::new(
+                    Identifier::new(format!("var{index}")).unwrap(),
+                    *width,
+                    UTF_8,
+                );
+                for (value, label) in value_labels {
+                    assert_eq!(
+                        variable.value_labels.insert(value.clone(), (*label).into()),
+                        None
+                    );
+                }
+                dictionary.add_var(variable).unwrap();
+            }
+            dbg!(&dictionary);
+
+            let raw = WriteOptions::new()
+                .write_writer(&dictionary, Cursor::new(Vec::new()))
+                .unwrap()
+                .finish()
+                .unwrap()
+                .unwrap()
+                .into_inner();
+            let dictionary2 = ReadOptions::new(|_| panic!())
+                .open_reader(Cursor::new(raw))
+                .unwrap()
+                .dictionary;
+
+            for (expected, actual) in dictionary
+                .variables
+                .iter()
+                .zip_eq(dictionary2.variables.iter())
+            {
+                assert_eq!(&expected.value_labels, &actual.value_labels);
             }
-            dictionary.add_var(variable).unwrap();
-
-            let mut raw = Vec::new();
-            DictionaryWriter::new(
-                &WriteOptions::reproducible(None),
-                &mut Cursor::new(&mut raw),
-                &dictionary,
-            )
-            .write_value_labels()
-            .unwrap();
-            println!("{}", HexView::new(&raw));
-
-            let mut cursor = Cursor::new(&raw[4..]);
-            //let record =
-            //ValueLabelRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
-            //dbg!(&record);
         }
     }
 }