more tewts
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 6 Jul 2025 19:57:14 +0000 (12:57 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 6 Jul 2025 19:57:14 +0000 (12:57 -0700)
18 files changed:
rust/pspp/src/identifier.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/test.rs
rust/pspp/src/sys/testdata/compressed_data.expected
rust/pspp/src/sys/testdata/compressed_data_other_bias.expected
rust/pspp/src/sys/testdata/compressed_data_zero_bias.expected
rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected
rust/pspp/src/sys/testdata/fewer_data_records_than_indicated_by_file_header.expected
rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected
rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sack [deleted file]
rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav [new file with mode: 0644]
rust/pspp/src/sys/testdata/invalid_long_string_missing_values.expected
rust/pspp/src/sys/testdata/invalid_long_string_missing_values.sack
rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.expected [new file with mode: 0644]
rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.sack [new file with mode: 0644]
rust/pspp/src/sys/testdata/value_labels.expected
rust/pspp/src/sys/testdata/value_labels.sack

index e891347f9d6c3c38089b4706aca10fda3c6b9d2b..191744a1e213abc1ab211a29a47f036867757aad 100644 (file)
@@ -104,10 +104,12 @@ pub enum Error {
     #[error("\"!\" is not a valid identifier.")]
     Bang,
 
-    #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character \"{1}\".")]
+    #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character {1:?}.")]
     BadFirstCharacter(String, char),
 
-    #[error("\"{0}\" may not be used as an identifier because it contains disallowed character \"{1}\".")]
+    #[error(
+        "\"{0}\" may not be used as an identifier because it contains disallowed character {1:?}."
+    )]
     BadLaterCharacter(String, char),
 
     #[error("Identifier \"{id}\" is {length} bytes in the encoding in use ({encoding}), which exceeds the {max}-byte limit.")]
index 43ad3c5f599209c185dfe9ebb0b8feb31c09addd..bf4169fc286c4ab4af25795b7573d3a821bdc9c7 100644 (file)
@@ -546,7 +546,11 @@ impl Metadata {
             creation,
             endian: header.endian,
             compression: header.compression,
-            n_cases: header.n_cases.map(|n| n as u64),
+            n_cases: headers
+                .number_of_cases
+                .as_ref()
+                .map(|record| record.n_cases)
+                .or_else(|| header.n_cases.map(|n| n as u64)),
             product,
             product_ext: headers.product_info.as_ref().map(|pe| fix_line_ends(&pe.0)),
             version: headers.integer_info.as_ref().map(|ii| ii.version),
@@ -1093,6 +1097,9 @@ pub fn decode(
     }
 
     let metadata = Metadata::decode(&headers, warn);
+    if let Some(n_cases) = metadata.n_cases {
+        cases = cases.take().map(|cases| cases.with_expected_cases(n_cases))
+    }
     Ok((dictionary, metadata, cases))
 }
 
index 1317c2d8359c45e332c5a784f8721a0f198d8ee1..f81a671ea634c7a80cd89d703cc2b5ba2ed1713b 100644 (file)
@@ -115,6 +115,9 @@ pub enum Error {
         ztrailer_len: u64,
     },
 
+    #[error("File metadata says it contains {expected} cases, but {actual} cases were read.")]
+    WrongNumberOfCases { expected: u64, actual: u64 },
+
     #[error("{0}")]
     EncodingError(EncodingError),
 }
@@ -1181,6 +1184,8 @@ pub struct Cases {
     endian: Endian,
     codes: VecDeque<u8>,
     eof: bool,
+    expected_cases: Option<u64>,
+    read_cases: u64,
 }
 
 impl Debug for Cases {
@@ -1198,7 +1203,9 @@ impl Default for Cases {
             bias: 100.0,
             endian: Endian::Little,
             codes: VecDeque::new(),
-            eof: true,
+            eof: false,
+            expected_cases: None,
+            read_cases: 0,
         }
     }
 }
@@ -1208,33 +1215,39 @@ impl Cases {
     where
         R: Read + Seek + 'static,
     {
-        let case_vars = var_types
-            .types
-            .iter()
-            .flatten()
-            .copied()
-            .map(CaseVar::new)
-            .collect::<Vec<_>>();
         Self {
             reader: if header.compression == Some(Compression::ZLib) {
                 Box::new(ZlibDecodeMultiple::new(reader))
             } else {
                 Box::new(reader)
             },
-            eof: case_vars.is_empty(),
-            case_vars,
+            eof: false,
+            case_vars: var_types
+                .types
+                .iter()
+                .flatten()
+                .copied()
+                .map(CaseVar::new)
+                .collect::<Vec<_>>(),
             compression: header.compression,
             bias: header.bias,
             endian: header.endian,
             codes: VecDeque::with_capacity(8),
+            expected_cases: None,
+            read_cases: 0,
         }
     }
 
     pub fn with_widths(self, widths: impl IntoIterator<Item = VarWidth>) -> Self {
-        let case_vars = widths.into_iter().map(CaseVar::new).collect::<Vec<_>>();
         Self {
-            eof: self.eof || case_vars.is_empty(),
-            case_vars,
+            case_vars: widths.into_iter().map(CaseVar::new).collect::<Vec<_>>(),
+            ..self
+        }
+    }
+
+    pub fn with_expected_cases(self, expected_cases: u64) -> Self {
+        Self {
+            expected_cases: Some(expected_cases),
             ..self
         }
     }
@@ -1248,7 +1261,9 @@ impl Iterator for Cases {
             return None;
         }
 
-        let retval = if self.compression.is_some() {
+        let retval = if self.case_vars.is_empty() {
+            None
+        } else if self.compression.is_some() {
             Datum::read_compressed_case(
                 &mut self.reader,
                 &self.case_vars,
@@ -1260,7 +1275,25 @@ impl Iterator for Cases {
         } else {
             Datum::read_case(&mut self.reader, &self.case_vars, self.endian).transpose()
         };
-        self.eof = matches!(retval, None | Some(Err(_)));
+        match &retval {
+            None => {
+                self.eof = true;
+                if let Some(expected_cases) = self.expected_cases
+                    && expected_cases != self.read_cases
+                {
+                    return Some(Err(Error::WrongNumberOfCases {
+                        expected: expected_cases,
+                        actual: self.read_cases,
+                    }));
+                } else {
+                    return None;
+                }
+            }
+            Some(Ok(_)) => {
+                self.read_cases += 1;
+            }
+            Some(Err(_)) => self.eof = true,
+        };
         retval
     }
 }
index aa2af100121d336446348c9b342a8d3f7888402b..255a30539a564f044bab0768cd7558faf2efd716 100644 (file)
@@ -17,489 +17,499 @@ use enum_iterator::all;
 
 #[test]
 fn variable_labels_and_missing_values() {
-    test_sysfile("variable_labels_and_missing_values");
+    test_sack_sysfile("variable_labels_and_missing_values");
 }
 
 #[test]
 fn unspecified_number_of_variable_positions() {
-    test_sysfile("unspecified_number_of_variable_positions");
+    test_sack_sysfile("unspecified_number_of_variable_positions");
 }
 
 #[test]
 fn wrong_variable_positions_but_v13() {
-    test_sysfile("wrong_variable_positions_but_v13");
+    test_sack_sysfile("wrong_variable_positions_but_v13");
 }
 
 #[test]
 fn value_labels() {
-    test_sysfile("value_labels");
+    test_sack_sysfile("value_labels");
 }
 
 #[test]
 fn documents() {
-    test_sysfile("documents");
+    test_sack_sysfile("documents");
 }
 
 #[test]
 fn empty_document_record() {
-    test_sysfile("empty_document_record");
+    test_sack_sysfile("empty_document_record");
 }
 
 #[test]
 fn variable_sets() {
-    test_sysfile("variable_sets");
+    test_sack_sysfile("variable_sets");
 }
 
 #[test]
 fn multiple_response_sets() {
-    test_sysfile("multiple_response_sets");
+    test_sack_sysfile("multiple_response_sets");
 }
 
 #[test]
 fn extra_product_info() {
     // Also checks for handling of CR-only line ends in file label and extra
     // product info.
-    test_sysfile("extra_product_info");
+    test_sack_sysfile("extra_product_info");
 }
 
 #[test]
 fn variable_display_without_width() {
-    test_sysfile("variable_display_without_width");
+    test_sack_sysfile("variable_display_without_width");
 }
 
 #[test]
 fn variable_display_with_width() {
-    test_sysfile("variable_display_with_width");
+    test_sack_sysfile("variable_display_with_width");
 }
 
 #[test]
 fn long_variable_names() {
-    test_sysfile("long_variable_names");
+    test_sack_sysfile("long_variable_names");
 }
 
 #[test]
 fn very_long_strings() {
-    test_sysfile("very_long_strings");
+    test_sack_sysfile("very_long_strings");
 }
 
 #[test]
 fn attributes() {
-    test_sysfile("attributes");
+    test_sack_sysfile("attributes");
 }
 
 #[test]
 fn variable_roles() {
-    test_sysfile("variable_roles");
+    test_sack_sysfile("variable_roles");
 }
 
 #[test]
 fn compressed_data() {
-    test_sysfile("compressed_data");
+    test_sack_sysfile("compressed_data");
 }
 
 #[test]
 fn compressed_data_zero_bias() {
-    test_sysfile("compressed_data_zero_bias");
+    test_sack_sysfile("compressed_data_zero_bias");
 }
 
 #[test]
 fn compressed_data_other_bias() {
-    test_sysfile("compressed_data_other_bias");
+    test_sack_sysfile("compressed_data_other_bias");
 }
 
 #[test]
 fn zcompressed_data() {
-    test_sysfile("zcompressed_data");
+    test_sack_sysfile("zcompressed_data");
 }
 
 #[test]
 fn no_variables() {
-    test_sysfile("no_variables");
+    test_sack_sysfile("no_variables");
 }
 
 #[test]
 fn unknown_encoding() {
-    test_sysfile("unknown_encoding");
+    test_sack_sysfile("unknown_encoding");
 }
 
 #[test]
 fn misplaced_type_4_record() {
-    test_sysfile("misplaced_type_4_record");
+    test_sack_sysfile("misplaced_type_4_record");
 }
 
 #[test]
 fn bad_record_type() {
-    test_sysfile("bad_record_type");
+    test_sack_sysfile("bad_record_type");
 }
 
 #[test]
 fn wrong_variable_positions() {
-    test_sysfile("wrong_variable_positions");
+    test_sack_sysfile("wrong_variable_positions");
 }
 
 #[test]
 fn invalid_variable_name() {
-    test_sysfile("invalid_variable_name");
+    test_sack_sysfile("invalid_variable_name");
 }
 
 #[test]
 fn invalid_label_indicator() {
-    test_sysfile("invalid_label_indicator");
+    test_sack_sysfile("invalid_label_indicator");
 }
 
 #[test]
 fn invalid_missing_indicator() {
-    test_sysfile("invalid_missing_indicator");
+    test_sack_sysfile("invalid_missing_indicator");
 }
 
 #[test]
 fn invalid_missing_indicator2() {
-    test_sysfile("invalid_missing_indicator2");
+    test_sack_sysfile("invalid_missing_indicator2");
 }
 
 #[test]
 fn missing_string_continuation() {
-    test_sysfile("missing_string_continuation");
+    test_sack_sysfile("missing_string_continuation");
 }
 
 #[test]
 fn invalid_variable_format() {
-    test_sysfile("invalid_variable_format");
+    test_sack_sysfile("invalid_variable_format");
 }
 
 #[test]
 fn invalid_long_string_missing_values() {
-    test_sysfile("invalid_long_string_missing_values");
+    test_sack_sysfile("invalid_long_string_missing_values");
 }
 
 #[test]
 fn weight_must_be_numeric() {
-    test_sysfile("weight_must_be_numeric");
+    test_sack_sysfile("weight_must_be_numeric");
 }
 
 #[test]
 fn weight_variable_bad_index() {
-    test_sysfile("weight_variable_bad_index");
+    test_sack_sysfile("weight_variable_bad_index");
 }
 
 #[test]
 fn weight_variable_continuation() {
-    test_sysfile("weight_variable_continuation");
+    test_sack_sysfile("weight_variable_continuation");
 }
 
 #[test]
 fn multiple_documents_records() {
-    test_sysfile("multiple_documents_records");
+    test_sack_sysfile("multiple_documents_records");
 }
 
 #[test]
 fn unknown_extension_record() {
-    test_sysfile("unknown_extension_record");
+    test_sack_sysfile("unknown_extension_record");
 }
 
 #[test]
 fn extension_too_large() {
-    test_sysfile("extension_too_large");
+    test_sack_sysfile("extension_too_large");
 }
 
 #[test]
 fn bad_machine_integer_info_count() {
-    test_sysfile("bad_machine_integer_info_count");
+    test_sack_sysfile("bad_machine_integer_info_count");
 }
 
 #[test]
 fn bad_machine_integer_info_float_format() {
-    test_sysfile("bad_machine_integer_info_float_format");
+    test_sack_sysfile("bad_machine_integer_info_float_format");
 }
 
 #[test]
 fn bad_machine_integer_info_endianness() {
-    test_sysfile("bad_machine_integer_info_endianness");
+    test_sack_sysfile("bad_machine_integer_info_endianness");
 }
 
 #[test]
 fn bad_machine_float_info_size() {
-    test_sysfile("bad_machine_float_info_size");
+    test_sack_sysfile("bad_machine_float_info_size");
 }
 
 #[test]
 fn wrong_special_floats() {
-    test_sysfile("wrong_special_floats");
+    test_sack_sysfile("wrong_special_floats");
 }
 
 #[test]
 fn variable_sets_unknown_variable() {
-    test_sysfile("variable_sets_unknown_variable");
+    test_sack_sysfile("variable_sets_unknown_variable");
 }
 
 #[test]
 fn multiple_response_sets_bad_name() {
-    test_sysfile("multiple_response_sets_bad_name");
+    test_sack_sysfile("multiple_response_sets_bad_name");
 }
 
 #[test]
 fn multiple_response_sets_missing_space_after_c() {
-    test_sysfile("multiple_response_sets_missing_space_after_c");
+    test_sack_sysfile("multiple_response_sets_missing_space_after_c");
 }
 
 #[test]
 fn multiple_response_sets_missing_space_after_e() {
-    test_sysfile("multiple_response_sets_missing_space_after_e");
+    test_sack_sysfile("multiple_response_sets_missing_space_after_e");
 }
 
 #[test]
 fn multiple_response_sets_missing_label_source() {
-    test_sysfile("multiple_response_sets_missing_label_source");
+    test_sack_sysfile("multiple_response_sets_missing_label_source");
 }
 
 #[test]
 fn multiple_response_sets_unexpected_label_source() {
-    test_sysfile("multiple_response_sets_unexpected_label_source");
+    test_sack_sysfile("multiple_response_sets_unexpected_label_source");
 }
 
 #[test]
 fn multiple_response_sets_bad_counted_string() {
-    test_sysfile("multiple_response_sets_bad_counted_string");
+    test_sack_sysfile("multiple_response_sets_bad_counted_string");
 }
 
 #[test]
 fn multiple_response_sets_counted_string_missing_space() {
-    test_sysfile("multiple_response_sets_counted_string_missing_space");
+    test_sack_sysfile("multiple_response_sets_counted_string_missing_space");
 }
 
 #[test]
 fn multiple_response_sets_counted_string_bad_length() {
-    test_sysfile("multiple_response_sets_counted_string_bad_length");
+    test_sack_sysfile("multiple_response_sets_counted_string_bad_length");
 }
 
 #[test]
 fn multiple_response_sets_missing_space_after_counted_string() {
-    test_sysfile("multiple_response_sets_missing_space_after_counted_string");
+    test_sack_sysfile("multiple_response_sets_missing_space_after_counted_string");
 }
 
 #[test]
 fn multiple_response_sets_missing_newline_after_variable_name() {
-    test_sysfile("multiple_response_sets_missing_newline_after_variable_name");
+    test_sack_sysfile("multiple_response_sets_missing_newline_after_variable_name");
 }
 
 #[test]
 fn multiple_response_sets_duplicate_variable_name() {
-    test_sysfile("multiple_response_sets_duplicate_variable_name");
+    test_sack_sysfile("multiple_response_sets_duplicate_variable_name");
 }
 
 #[test]
 fn mixed_variable_types_in_mrsets() {
-    test_sysfile("mixed_variable_types_in_mrsets");
+    test_sack_sysfile("mixed_variable_types_in_mrsets");
 }
 
 #[test]
 fn missing_newline_after_variable_name_in_mrsets() {
-    test_sysfile("missing_newline_after_variable_name_in_mrsets");
+    test_sack_sysfile("missing_newline_after_variable_name_in_mrsets");
 }
 
 #[test]
 fn zero_or_one_variable_in_mrset() {
-    test_sysfile("zero_or_one_variable_in_mrset");
+    test_sack_sysfile("zero_or_one_variable_in_mrset");
 }
 
 #[test]
 fn wrong_display_parameter_size() {
-    test_sysfile("wrong_display_parameter_size");
+    test_sack_sysfile("wrong_display_parameter_size");
 }
 
 #[test]
 fn wrong_display_parameter_count() {
-    test_sysfile("wrong_display_parameter_count");
+    test_sack_sysfile("wrong_display_parameter_count");
 }
 
 #[test]
 fn wrong_display_measurement_level() {
-    test_sysfile("wrong_display_measurement_level");
+    test_sack_sysfile("wrong_display_measurement_level");
 }
 
 #[test]
 fn wrong_display_alignment() {
-    test_sysfile("wrong_display_alignment");
+    test_sack_sysfile("wrong_display_alignment");
 }
 
 #[test]
 fn bad_variable_name_in_variable_value_pair() {
-    test_sysfile("bad_variable_name_in_variable_value_pair");
+    test_sack_sysfile("bad_variable_name_in_variable_value_pair");
 }
 
 #[test]
 fn duplicate_long_variable_name() {
-    test_sysfile("duplicate_long_variable_name");
+    test_sack_sysfile("duplicate_long_variable_name");
 }
 
 #[test]
 fn bad_very_long_string_length() {
-    test_sysfile("bad_very_long_string_length");
+    test_sack_sysfile("bad_very_long_string_length");
 }
 
 #[test]
 fn bad_very_long_string_segment_width() {
-    test_sysfile("bad_very_long_string_segment_width");
+    test_sack_sysfile("bad_very_long_string_segment_width");
 }
 
 #[test]
 fn too_many_value_labels() {
-    test_sysfile("too_many_value_labels");
+    test_sack_sysfile("too_many_value_labels");
 }
 
 #[test]
 fn missing_type_4_record() {
-    test_sysfile("missing_type_4_record");
+    test_sack_sysfile("missing_type_4_record");
 }
 
 #[test]
 fn value_label_with_no_associated_variables() {
-    test_sysfile("value_label_with_no_associated_variables");
+    test_sack_sysfile("value_label_with_no_associated_variables");
 }
 
 #[test]
 fn type_4_record_names_long_string_variable() {
-    test_sysfile("type_4_record_names_long_string_variable");
+    test_sack_sysfile("type_4_record_names_long_string_variable");
 }
 
 #[test]
 fn value_label_variable_indexes_must_be_in_correct_range() {
-    test_sysfile("value_label_variable_indexes_must_be_in_correct_range");
+    test_sack_sysfile("value_label_variable_indexes_must_be_in_correct_range");
 }
 
 #[test]
 fn value_label_variable_indexes_must_not_be_long_string_continuation() {
-    test_sysfile("value_label_variable_indexes_must_not_be_long_string_continuation");
+    test_sack_sysfile("value_label_variable_indexes_must_not_be_long_string_continuation");
 }
 
 #[test]
 fn variables_for_value_label_must_all_be_same_type() {
-    test_sysfile("variables_for_value_label_must_all_be_same_type");
+    test_sack_sysfile("variables_for_value_label_must_all_be_same_type");
 }
 
 #[test]
 fn duplicate_value_labels_type() {
-    test_sysfile("duplicate_value_labels_type");
+    test_sack_sysfile("duplicate_value_labels_type");
 }
 
 #[test]
 fn missing_attribute_value() {
-    test_sysfile("missing_attribute_value");
+    test_sack_sysfile("missing_attribute_value");
 }
 
 #[test]
 fn unquoted_attribute_value() {
-    test_sysfile("unquoted_attribute_value");
+    test_sack_sysfile("unquoted_attribute_value");
 }
 
 #[test]
 fn duplicate_attribute_name() {
-    test_sysfile("duplicate_attribute_name");
+    test_sack_sysfile("duplicate_attribute_name");
 }
 
 #[test]
 fn bad_variable_name_in_long_string_value_label() {
-    test_sysfile("bad_variable_name_in_long_string_value_label");
+    test_sack_sysfile("bad_variable_name_in_long_string_value_label");
 }
 
 #[test]
 fn fewer_data_records_than_indicated_by_file_header() {
-    test_sysfile("fewer_data_records_than_indicated_by_file_header");
+    test_sack_sysfile("fewer_data_records_than_indicated_by_file_header");
+}
+
+#[test]
+fn more_data_records_than_indicated_by_file_header() {
+    test_sack_sysfile("more_data_records_than_indicated_by_file_header");
 }
 
 #[test]
 fn partial_data_record_between_variables() {
-    test_sysfile("partial_data_record_between_variables");
+    test_sack_sysfile("partial_data_record_between_variables");
 }
 
 #[test]
 fn partial_data_record_within_long_string() {
-    test_sysfile("partial_data_record_within_long_string");
+    test_sack_sysfile("partial_data_record_within_long_string");
 }
 
 #[test]
 fn partial_compressed_data_record() {
-    test_sysfile("partial_compressed_data_record");
+    test_sack_sysfile("partial_compressed_data_record");
 }
 
 #[test]
 fn zcompressed_data_bad_zheader_ofs() {
-    test_sysfile("zcompressed_data_bad_zheader_ofs");
+    test_sack_sysfile("zcompressed_data_bad_zheader_ofs");
 }
 
 #[test]
 fn zcompressed_data_bad_ztrailer_ofs() {
-    test_sysfile("zcompressed_data_bad_ztrailer_ofs");
+    test_sack_sysfile("zcompressed_data_bad_ztrailer_ofs");
 }
 
 #[test]
 fn zcompressed_data_invalid_ztrailer_len() {
-    test_sysfile("zcompressed_data_invalid_ztrailer_len");
+    test_sack_sysfile("zcompressed_data_invalid_ztrailer_len");
 }
 
 #[test]
 fn zcompressed_data_wrong_ztrailer_len() {
-    test_sysfile("zcompressed_data_wrong_ztrailer_len");
+    test_sack_sysfile("zcompressed_data_wrong_ztrailer_len");
 }
 
 #[test]
 fn zcompressed_data_wrong_ztrailer_bias() {
-    test_sysfile("zcompressed_data_wrong_ztrailer_bias");
+    test_sack_sysfile("zcompressed_data_wrong_ztrailer_bias");
 }
 
 #[test]
 fn zcompressed_data_wrong_ztrailer_zero() {
-    test_sysfile("zcompressed_data_wrong_ztrailer_zero");
+    test_sack_sysfile("zcompressed_data_wrong_ztrailer_zero");
 }
 
 #[test]
 fn zcompressed_data_wrong_block_size() {
-    test_sysfile("zcompressed_data_wrong_block_size");
+    test_sack_sysfile("zcompressed_data_wrong_block_size");
 }
 
 #[test]
 fn zcompressed_data_wrong_n_blocks() {
-    test_sysfile("zcompressed_data_wrong_n_blocks");
+    test_sack_sysfile("zcompressed_data_wrong_n_blocks");
 }
 
 #[test]
 fn zcompressed_data_wrong_uncompressed_ofs() {
-    test_sysfile("zcompressed_data_wrong_uncompressed_ofs");
+    test_sack_sysfile("zcompressed_data_wrong_uncompressed_ofs");
 }
 
 #[test]
 fn zcompressed_data_wrong_compressed_ofs() {
-    test_sysfile("zcompressed_data_wrong_compressed_ofs");
+    test_sack_sysfile("zcompressed_data_wrong_compressed_ofs");
 }
 
 #[test]
 fn zcompressed_data_compressed_sizes_dont_add_up() {
-    test_sysfile("zcompressed_data_compressed_sizes_dont_add_up");
+    test_sack_sysfile("zcompressed_data_compressed_sizes_dont_add_up");
 }
 
 #[test]
 fn zcompressed_data_uncompressed_size_block_size() {
-    test_sysfile("zcompressed_data_uncompressed_size_block_size");
+    test_sack_sysfile("zcompressed_data_uncompressed_size_block_size");
 }
 
 #[test]
 fn zcompressed_data_compression_expands_data_too_much() {
-    test_sysfile("zcompressed_data_compression_expands_data_too_much");
+    test_sack_sysfile("zcompressed_data_compression_expands_data_too_much");
 }
 
 #[test]
 fn zcompressed_data_compressed_sizes_don_t_add_up() {
-    test_sysfile("zcompressed_data_compressed_sizes_don_t_add_up");
+    test_sack_sysfile("zcompressed_data_compressed_sizes_don_t_add_up");
 }
 
+/// CVE-2017-10791.
+/// See also https://bugzilla.redhat.com/show_bug.cgi?id=1467004.
+/// See also https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=866890.
+/// See also https://security-tracker.debian.org/tracker/CVE-2017-10791.
+/// Found by team OWL337, using the collAFL fuzzer.
 #[test]
 fn integer_overflows_in_long_string_missing_values() {
-    test_sysfile("integer_overflows_in_long_string_missing_values");
+    test_raw_sysfile("integer_overflows_in_long_string_missing_values");
 }
 
 #[test]
 fn null_dereference_skipping_bad_extension_record_18() {
-    test_sysfile("null_dereference_skipping_bad_extension_record_18");
+    test_sack_sysfile("null_dereference_skipping_bad_extension_record_18");
 }
 
 /// Duplicate variable name handling negative test.
@@ -508,10 +518,21 @@ fn null_dereference_skipping_bad_extension_record_18() {
 /// #41475).
 #[test]
 fn duplicate_variable_name() {
-    test_sysfile("duplicate_variable_name");
+    test_sack_sysfile("duplicate_variable_name");
+}
+
+fn test_raw_sysfile(name: &str) {
+    let input_filename = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("src/sys/testdata")
+        .join(name)
+        .with_extension("sav");
+    let sysfile = std::fs::read(&input_filename).unwrap();
+    let expected_filename = input_filename.with_extension("expected");
+    let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap();
+    test_sysfile(name, sysfile, &expected, &expected_filename);
 }
 
-fn test_sysfile(name: &str) {
+fn test_sack_sysfile(name: &str) {
     let input_filename = Path::new(env!("CARGO_MANIFEST_DIR"))
         .join("src/sys/testdata")
         .join(name)
@@ -528,114 +549,115 @@ fn test_sysfile(name: &str) {
             },
         );
         let sysfile = sack(&input, Some(&input_filename), endian).unwrap();
-        let cursor = Cursor::new(sysfile);
-        let mut warnings = Vec::new();
-        let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap();
-        let output = match reader.headers().collect() {
-            Ok(headers) => {
-                let cases = reader.cases();
-                let encoding =
-                    encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap();
-                let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning));
-                let mut decoded_records = Vec::new();
-                for header in headers {
-                    decoded_records.push(header.decode(&mut decoder).unwrap());
-                }
-                drop(decoder);
-
-                let mut errors = Vec::new();
-                let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
-                let (dictionary, metadata, cases) =
-                    decode(headers, cases, encoding, |e| errors.push(e)).unwrap();
-                let (group, data) = metadata.to_pivot_rows();
-                let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))])
-                    .with_data(
-                        data.into_iter()
-                            .enumerate()
-                            .filter(|(_row, value)| !value.is_empty())
-                            .map(|(row, value)| ([row], value)),
-                    );
-                let (group, data) = dictionary.to_pivot_rows();
-                let dictionary_table = PivotTable::new([(Axis3::Y, Dimension::new(group))])
-                    .with_data(
-                        data.into_iter()
-                            .enumerate()
-                            .filter(|(_row, value)| !value.is_empty())
-                            .map(|(row, value)| ([row], value)),
-                    );
-                let mut output = Vec::new();
-                output.extend(
-                    warnings
-                        .into_iter()
-                        .map(|warning| Arc::new(Item::from(Text::new_log(warning.to_string())))),
-                );
-                output.extend(
-                    errors
-                        .into_iter()
-                        .map(|error| Arc::new(Item::from(Text::new_log(error.to_string())))),
-                );
-                output.push(Arc::new(metadata_table.into()));
-                output.push(Arc::new(dictionary_table.into()));
-                output.push(Arc::new(
-                    dictionary.output_variables().to_pivot_table().into(),
-                ));
-                if let Some(pt) = dictionary.output_value_labels().to_pivot_table() {
-                    output.push(Arc::new(pt.into()));
-                }
-                if let Some(pt) = dictionary.output_mrsets().to_pivot_table() {
-                    output.push(Arc::new(pt.into()));
-                }
-                if let Some(pt) = dictionary.output_attributes().to_pivot_table() {
-                    output.push(Arc::new(pt.into()));
-                }
-                if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
-                    output.push(Arc::new(pt.into()));
-                }
-                if let Some(cases) = cases {
-                    let variables = Group::new("Variable")
-                        .with_multiple(dictionary.variables.iter().map(|var| &**var));
-                    let mut case_numbers = Group::new("Case").with_label_shown();
-                    let mut data = Vec::new();
-                    for (case_number, case) in cases.enumerate() {
-                        match case {
-                            Ok(case) => {
-                                case_numbers.push(Value::new_integer(Some(
-                                    (case_numbers.len() + 1) as f64,
-                                )));
-                                data.push(
-                                    case.into_iter()
-                                        .map(|datum| Value::new_datum(&datum, dictionary.encoding))
-                                        .collect::<Vec<_>>(),
-                                );
-                            }
-                            Err(error) => {
-                                output.push(Arc::new(Item::from(Text::new_log(error.to_string()))));
-                            }
+        test_sysfile(name, sysfile, &expected, &expected_filename);
+    }
+}
+
+fn test_sysfile(name: &str, sysfile: Vec<u8>, expected: &str, expected_filename: &Path) {
+    let cursor = Cursor::new(sysfile);
+    let mut warnings = Vec::new();
+    let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap();
+    let output = match reader.headers().collect() {
+        Ok(headers) => {
+            let cases = reader.cases();
+            let encoding =
+                encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap();
+            let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning));
+            let mut decoded_records = Vec::new();
+            for header in headers {
+                decoded_records.push(header.decode(&mut decoder).unwrap());
+            }
+            drop(decoder);
+
+            let mut errors = Vec::new();
+            let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
+            let (dictionary, metadata, cases) =
+                decode(headers, cases, encoding, |e| errors.push(e)).unwrap();
+            let (group, data) = metadata.to_pivot_rows();
+            let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
+                data.into_iter()
+                    .enumerate()
+                    .filter(|(_row, value)| !value.is_empty())
+                    .map(|(row, value)| ([row], value)),
+            );
+            let (group, data) = dictionary.to_pivot_rows();
+            let dictionary_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
+                data.into_iter()
+                    .enumerate()
+                    .filter(|(_row, value)| !value.is_empty())
+                    .map(|(row, value)| ([row], value)),
+            );
+            let mut output = Vec::new();
+            output.extend(
+                warnings
+                    .into_iter()
+                    .map(|warning| Arc::new(Item::from(Text::new_log(warning.to_string())))),
+            );
+            output.extend(
+                errors
+                    .into_iter()
+                    .map(|error| Arc::new(Item::from(Text::new_log(error.to_string())))),
+            );
+            output.push(Arc::new(metadata_table.into()));
+            output.push(Arc::new(dictionary_table.into()));
+            output.push(Arc::new(
+                dictionary.output_variables().to_pivot_table().into(),
+            ));
+            if let Some(pt) = dictionary.output_value_labels().to_pivot_table() {
+                output.push(Arc::new(pt.into()));
+            }
+            if let Some(pt) = dictionary.output_mrsets().to_pivot_table() {
+                output.push(Arc::new(pt.into()));
+            }
+            if let Some(pt) = dictionary.output_attributes().to_pivot_table() {
+                output.push(Arc::new(pt.into()));
+            }
+            if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
+                output.push(Arc::new(pt.into()));
+            }
+            if let Some(cases) = cases {
+                let variables = Group::new("Variable")
+                    .with_multiple(dictionary.variables.iter().map(|var| &**var));
+                let mut case_numbers = Group::new("Case").with_label_shown();
+                let mut data = Vec::new();
+                for case in cases {
+                    match case {
+                        Ok(case) => {
+                            case_numbers
+                                .push(Value::new_integer(Some((case_numbers.len() + 1) as f64)));
+                            data.push(
+                                case.into_iter()
+                                    .map(|datum| Value::new_datum(&datum, dictionary.encoding))
+                                    .collect::<Vec<_>>(),
+                            );
+                        }
+                        Err(error) => {
+                            output.push(Arc::new(Item::from(Text::new_log(error.to_string()))));
                         }
                     }
-                    if !data.is_empty() {
-                        let mut pt = PivotTable::new([
-                            (Axis3::X, Dimension::new(variables)),
-                            (Axis3::Y, Dimension::new(case_numbers)),
-                        ]);
-                        for (row_number, row) in data.into_iter().enumerate() {
-                            for (column_number, datum) in row.into_iter().enumerate() {
-                                pt.insert(&[column_number, row_number], datum);
-                            }
+                }
+                if !data.is_empty() {
+                    let mut pt = PivotTable::new([
+                        (Axis3::X, Dimension::new(variables)),
+                        (Axis3::Y, Dimension::new(case_numbers)),
+                    ]);
+                    for (row_number, row) in data.into_iter().enumerate() {
+                        for (column_number, datum) in row.into_iter().enumerate() {
+                            pt.insert(&[column_number, row_number], datum);
                         }
-                        output.push(Arc::new(pt.into()));
                     }
+                    output.push(Arc::new(pt.into()));
                 }
-                Item::new(Details::Group(output))
             }
-            Err(error) => Item::new(Details::Text(Box::new(Text::new_log(error.to_string())))),
-        };
-
-        assert_lines_eq(
-            &expected,
-            expected_filename.display(),
-            &output.to_string(),
-            "actual",
-        );
-    }
+            Item::new(Details::Group(output))
+        }
+        Err(error) => Item::new(Details::Text(Box::new(Text::new_log(error.to_string())))),
+    };
+
+    assert_lines_eq(
+        &expected,
+        expected_filename.display(),
+        &output.to_string(),
+        "actual",
+    );
 }
index f36fb5ebcee1cdeb6ab86528523bdc54c83e1d9c..3b0e1d4e2101e8f76324e7256f058f5cdbe12244 100644 (file)
 │str8 │       4│     │Nominal          │Input│    8│Left     │A8          │A8          │              │
 │str15│       5│     │Nominal          │Input│   15│Left     │A15         │A15         │              │
 ╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬──────┬──────┬────────┬────────────────┬──────────────────────────────╮
+│Case│ num1 │ num2 │  str4  │      str8      │             str15            │
+├────┼──────┼──────┼────────┼────────────────┼──────────────────────────────┤
+│1   │-99.00│   .00│        │abcdefgh        │        0123                  │
+│2   │   .  │151.00│jklm    │nopqrstu        │vwxyzABC                      │
+│3   │  1.00│  2.00│DEFG    │HIJKLMNO        │        PQRSTUV               │
+╰────┴──────┴──────┴────────┴────────────────┴──────────────────────────────╯
index f57bc54fa11d9a495004c46c9e97ca4cef625420..17e3b4157a35b66d6ea92cea2bfa93b411c52566 100644 (file)
@@ -23,3 +23,11 @@ Compression bias is 50 instead of the usual values of 0 or 100.
 │str8 │       4│     │Nominal          │Input│    8│Left     │A8          │A8          │              │
 │str15│       5│     │Nominal          │Input│   15│Left     │A15         │A15         │              │
 ╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬──────┬──────┬────────┬────────────────┬──────────────────────────────╮
+│Case│ num1 │ num2 │  str4  │      str8      │             str15            │
+├────┼──────┼──────┼────────┼────────────────┼──────────────────────────────┤
+│1   │-49.00│ 50.00│        │abcdefgh        │        0123                  │
+│2   │   .  │201.00│jklm    │nopqrstu        │vwxyzABC                      │
+│3   │ 51.00│ 52.00│DEFG    │HIJKLMNO        │        PQRSTUV               │
+╰────┴──────┴──────┴────────┴────────────────┴──────────────────────────────╯
index f36fb5ebcee1cdeb6ab86528523bdc54c83e1d9c..0a4fd1167b6f5d0c3ffa71e14801108519d12014 100644 (file)
 │str8 │       4│     │Nominal          │Input│    8│Left     │A8          │A8          │              │
 │str15│       5│     │Nominal          │Input│   15│Left     │A15         │A15         │              │
 ╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬──────┬──────┬────────┬────────────────┬──────────────────────────────╮
+│Case│ num1 │ num2 │  str4  │      str8      │             str15            │
+├────┼──────┼──────┼────────┼────────────────┼──────────────────────────────┤
+│1   │  1.00│100.00│        │abcdefgh        │        0123                  │
+│2   │   .  │251.00│jklm    │nopqrstu        │vwxyzABC                      │
+│3   │101.00│102.00│DEFG    │HIJKLMNO        │        PQRSTUV               │
+╰────┴──────┴──────┴────────┴────────────────┴──────────────────────────────╯
index de5196bcd7ec49a610aadf848e5da91facd5f5e1..14947fd8677b3cf0d252d86e6abfb3e059948ba7 100644 (file)
@@ -1,8 +1,8 @@
-Invalid name in long variable name record.  "_Invalid" may not be used as an identifier because it begins with disallowed character "_".
+Invalid name in long variable name record.  "_Invalid" may not be used as an identifier because it begins with disallowed character '_'.
 
-Invalid name in long variable name record.  "$Invalid" may not be used as an identifier because it begins with disallowed character "$".
+Invalid name in long variable name record.  "$Invalid" may not be used as an identifier because it begins with disallowed character '$'.
 
-Invalid name in long variable name record.  "#Invalid" may not be used as an identifier because it begins with disallowed character "#".
+Invalid name in long variable name record.  "#Invalid" may not be used as an identifier because it begins with disallowed character '#'.
 
 Duplicate long variable name LONGVARIABLENAME.
 
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..41eca0a2663c3807765aef485e5579154eacd29f 100644 (file)
@@ -0,0 +1,30 @@
+╭──────────────────────┬────────────────────────╮
+│       Created        │    01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product        │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│       Compression    │None                    │
+│       Number of Cases│                       5│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│2│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│    │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│       1│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│num2│       2│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+File metadata says it contains 5 cases, but 4 cases were read.
+
+╭────┬────┬────╮
+│Case│num1│num2│
+├────┼────┼────┤
+│1   │1.00│2.00│
+│2   │3.00│4.00│
+│3   │5.00│6.00│
+│4   │7.00│8.00│
+╰────┴────┴────╯
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..10f60064aaa59c281b714805cac34b2bf77c8644 100644 (file)
Binary files a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected and b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected differ
diff --git a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sack b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sack
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav
new file mode 100644 (file)
index 0000000..6462081
Binary files /dev/null and b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav differ
index 95e4106d27813911553fe4f6d75289d0ff27e89b..37499e8d596612d1e71119a8a284535c91656556 100644 (file)
@@ -17,7 +17,7 @@ Invalid long string missing value for 7-byte string variable STR4.
 │       Version        │1.2.3                   │
 ├──────────────────────┼────────────────────────┤
 │       Compression    │None                    │
-│       Number of Cases│                       1
+│       Number of Cases│                       0
 ╰──────────────────────┴────────────────────────╯
 
 ╭─────────┬──────────────────────────────╮
index 3b4ef337d5156eeea1a73b0dccc562c6c6c87d76..b80fd1a2cff285125635378f080b7192eafbad9d 100644 (file)
@@ -4,7 +4,7 @@
 8; # Nominal case size
 0; # Not compressed
 0; # Not weighted
-1; # 1 case.
+0; # 0 cases.
 100.0; # Bias.
 "01 Jan 11"; "20:53:52";
 "PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
@@ -61,6 +61,3 @@ COUNT("STR4"); i8 1; 8; "ABCDEFGH";
 
 # Dictionary termination record.
 999; 0;
-s8 "abcd"; s8 "efgh"; s8 "ijkl"; s8 "mnop"; s8 "qrst"; s8 "uvwx";
-s16 "yzABCDEFGHI"; s16 "JKLMNOPQR"; s16 "STUVWXYZ01";
-s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC";
diff --git a/rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.expected b/rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.expected
new file mode 100644 (file)
index 0000000..cbe0398
--- /dev/null
@@ -0,0 +1,32 @@
+╭──────────────────────┬────────────────────────╮
+│       Created        │    01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product        │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│       Compression    │None                    │
+│       Number of Cases│                       5│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│2│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│    │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│       1│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+│num2│       2│     │                 │Input│    8│Right    │F8.0        │F8.0        │              │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+File metadata says it contains 5 cases, but 6 cases were read.
+
+╭────┬─────┬─────╮
+│Case│ num1│ num2│
+├────┼─────┼─────┤
+│1   │ 1.00│ 2.00│
+│2   │ 3.00│ 4.00│
+│3   │ 5.00│ 6.00│
+│4   │ 7.00│ 8.00│
+│5   │ 9.00│10.00│
+│6   │11.00│12.00│
+╰────┴─────┴─────╯
diff --git a/rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.sack b/rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.sack
new file mode 100644 (file)
index 0000000..370865e
--- /dev/null
@@ -0,0 +1,20 @@
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 2; 0; 0; >>5<<; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variables.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+2; 0; 0; 0; 0x050800 *2; s8 "NUM2";
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Data.
+999; 0;
+1.0; 2.0;
+3.0; 4.0;
+5.0; 6.0;
+7.0; 8.0;
+9.0; 10.0;
+# Extra record here:
+11.0; 12.0;
index c62f7006a2a6c7049c31fbaf7de623cd29af216b..54bd0ec5f099a2b8e83e16f5194ff4e507fd6c48 100644 (file)
@@ -5,7 +5,7 @@
 │       Version        │1.2.3                   │
 ├──────────────────────┼────────────────────────┤
 │       Compression    │None                    │
-│       Number of Cases│                       1
+│       Number of Cases│                       0
 ╰──────────────────────┴────────────────────────╯
 
 ╭─────────┬────────────────────────╮
index 8198711e1e8cdbcc49ba0aff7754d7c2cee4ca65..6d5e2cfdaf8c3e5b5a5a4bc3e2eb78005884aa39 100644 (file)
@@ -4,7 +4,7 @@
 22; # Nominal case size
 0; # Not compressed
 0; # Not weighted
-1; # 1 case.
+0; # 0 cases.
 100.0; # Bias.
 "05 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
 i8 0 *3;