some negative tests
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 10 Jun 2025 14:56:13 +0000 (07:56 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 10 Jun 2025 14:56:13 +0000 (07:56 -0700)
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/test.rs
rust/pspp/src/sys/testdata/misplaced_type_4_record.expected [new file with mode: 0644]
rust/pspp/src/sys/testdata/misplaced_type_4_record.sack [new file with mode: 0644]
rust/pspp/src/sys/testdata/unknown_encoding.expected [new file with mode: 0644]
rust/pspp/src/sys/testdata/unknown_encoding.sack [new file with mode: 0644]

index acd0a47fe236ebe11933067f84e02bae281f8ffb..c10e1ac81c6f52c53bfb380606755e40a6f0ad7f 100644 (file)
@@ -166,7 +166,7 @@ pub enum Warning {
     BadEncodingName { offset: u64 },
 
     // XXX This is risky because `text` might be arbitarily long.
-    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+    #[error("Text string contains invalid bytes for {encoding} encoding: {text:?}")]
     MalformedString { encoding: String, text: String },
 
     #[error("Invalid variable measurement level value {0}")]
index 4a58599d943840e7250603f950bd9797ae81b53e..afd8706e4d860d31a11c6a9721bad75bfd8d00da 100644 (file)
@@ -8,7 +8,7 @@ use crate::{
     },
     sys::{
         cooked::{decode, Headers},
-        raw::{encoding_from_headers, Decoder, Reader, Record},
+        raw::{encoding_from_headers, Decoder, Reader},
         sack::sack,
     },
 };
@@ -117,6 +117,16 @@ fn no_variables() {
     test_sysfile("no_variables");
 }
 
+#[test]
+fn unknown_encoding() {
+    test_sysfile("unknown_encoding");
+}
+
+#[test]
+fn misplaced_type_4_record() {
+    test_sysfile("misplaced_type_4_record");
+}
+
 fn test_sysfile(name: &str) {
     let input_filename = Path::new(env!("CARGO_MANIFEST_DIR"))
         .join("src/sys/testdata")
@@ -130,62 +140,68 @@ fn test_sysfile(name: &str) {
         let cursor = Cursor::new(sysfile);
         let mut warnings = Vec::new();
         let reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap();
-        let headers: Vec<Record> = reader.collect::<Result<Vec<_>, _>>().unwrap();
-        let encoding =
-            encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap();
-        let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning));
-        let mut decoded_records = Vec::new();
-        for header in headers {
-            decoded_records.push(header.decode(&mut decoder).unwrap());
-        }
-        drop(decoder);
-
-        let mut errors = Vec::new();
-        let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
-        let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap();
-        let (group, data) = metadata.to_pivot_rows();
-        let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
-            data.into_iter()
-                .enumerate()
-                .filter(|(_row, value)| !value.is_empty())
-                .map(|(row, value)| ([row], value)),
-        );
-        let (group, data) = dictionary.to_pivot_rows();
-        let dictionary_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
-            data.into_iter()
-                .enumerate()
-                .filter(|(_row, value)| !value.is_empty())
-                .map(|(row, value)| ([row], value)),
-        );
-        let mut output = Vec::new();
-        output.extend(
-            warnings
-                .into_iter()
-                .map(|warning| Arc::new(Item::from(Text::new_log(warning.to_string())))),
-        );
-        output.extend(
-            errors
-                .into_iter()
-                .map(|error| Arc::new(Item::from(Text::new_log(error.to_string())))),
-        );
-        output.push(Arc::new(metadata_table.into()));
-        output.push(Arc::new(dictionary_table.into()));
-        output.push(Arc::new(
-            dictionary.output_variables().to_pivot_table().into(),
-        ));
-        if let Some(pt) = dictionary.output_value_labels().to_pivot_table() {
-            output.push(Arc::new(pt.into()));
-        }
-        if let Some(pt) = dictionary.output_mrsets().to_pivot_table() {
-            output.push(Arc::new(pt.into()));
-        }
-        if let Some(pt) = dictionary.output_attributes().to_pivot_table() {
-            output.push(Arc::new(pt.into()));
-        }
-        if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
-            output.push(Arc::new(pt.into()));
-        }
-        let output = Item::new(Details::Group(output));
+        let output = match reader.collect() {
+            Ok(headers) => {
+                let encoding =
+                    encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap();
+                let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning));
+                let mut decoded_records = Vec::new();
+                for header in headers {
+                    decoded_records.push(header.decode(&mut decoder).unwrap());
+                }
+                drop(decoder);
+
+                let mut errors = Vec::new();
+                let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
+                let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap();
+                let (group, data) = metadata.to_pivot_rows();
+                let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))])
+                    .with_data(
+                        data.into_iter()
+                            .enumerate()
+                            .filter(|(_row, value)| !value.is_empty())
+                            .map(|(row, value)| ([row], value)),
+                    );
+                let (group, data) = dictionary.to_pivot_rows();
+                let dictionary_table = PivotTable::new([(Axis3::Y, Dimension::new(group))])
+                    .with_data(
+                        data.into_iter()
+                            .enumerate()
+                            .filter(|(_row, value)| !value.is_empty())
+                            .map(|(row, value)| ([row], value)),
+                    );
+                let mut output = Vec::new();
+                output.extend(
+                    warnings
+                        .into_iter()
+                        .map(|warning| Arc::new(Item::from(Text::new_log(warning.to_string())))),
+                );
+                output.extend(
+                    errors
+                        .into_iter()
+                        .map(|error| Arc::new(Item::from(Text::new_log(error.to_string())))),
+                );
+                output.push(Arc::new(metadata_table.into()));
+                output.push(Arc::new(dictionary_table.into()));
+                output.push(Arc::new(
+                    dictionary.output_variables().to_pivot_table().into(),
+                ));
+                if let Some(pt) = dictionary.output_value_labels().to_pivot_table() {
+                    output.push(Arc::new(pt.into()));
+                }
+                if let Some(pt) = dictionary.output_mrsets().to_pivot_table() {
+                    output.push(Arc::new(pt.into()));
+                }
+                if let Some(pt) = dictionary.output_attributes().to_pivot_table() {
+                    output.push(Arc::new(pt.into()));
+                }
+                if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
+                    output.push(Arc::new(pt.into()));
+                }
+                Item::new(Details::Group(output))
+            }
+            Err(error) => Item::new(Details::Text(Box::new(Text::new_log(error.to_string())))),
+        };
 
         assert_lines_eq(
             &expected,
diff --git a/rust/pspp/src/sys/testdata/misplaced_type_4_record.expected b/rust/pspp/src/sys/testdata/misplaced_type_4_record.expected
new file mode 100644 (file)
index 0000000..742db79
--- /dev/null
@@ -0,0 +1 @@
+At offset 0xd4, unrecognized record type 4.
diff --git a/rust/pspp/src/sys/testdata/misplaced_type_4_record.sack b/rust/pspp/src/sys/testdata/misplaced_type_4_record.sack
new file mode 100644 (file)
index 0000000..4b0674c
--- /dev/null
@@ -0,0 +1,9 @@
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Type 4 record.
+>>4<<;
diff --git a/rust/pspp/src/sys/testdata/unknown_encoding.expected b/rust/pspp/src/sys/testdata/unknown_encoding.expected
new file mode 100644 (file)
index 0000000..a02e3ab
--- /dev/null
@@ -0,0 +1,26 @@
+This system file does not indicate its own character encoding.  For best results, specify an encoding explicitly.  Use SYSFILE INFO with ENCODING="DETECT" to analyze the possible encodings.
+
+Text string contains invalid bytes for UTF-8 encoding: "PSPP synthetic test file: ����                                  "
+
+╭──────────────────────┬────────────────────────╮
+│       Created        │    01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product        │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│       Compression    │None                    │
+│       Number of Cases│                       0│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬──────────────────────────────╮
+│Label    │PSPP synthetic test file: ����│
+│Variables│                             4│
+╰─────────┴──────────────────────────────╯
+
+╭─┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├─┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│a│       1│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│b│       2│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│c│       3│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│d│       4│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+╰─┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
diff --git a/rust/pspp/src/sys/testdata/unknown_encoding.sack b/rust/pspp/src/sys/testdata/unknown_encoding.sack
new file mode 100644 (file)
index 0000000..18194f1
--- /dev/null
@@ -0,0 +1,20 @@
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+4; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+0; # No cases.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52";
+"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
+i8 0 *3;
+
+# Numeric variables.
+2; 0; 0; 0; 0x050800 *2; s8 "A";
+2; 0; 0; 0; 0x050800 *2; s8 "B";
+2; 0; 0; 0; 0x050800 *2; s8 "C";
+2; 0; 0; 0; 0x050800 *2; s8 "D";
+
+# Dictionary termination record.
+999; 0;