From 16f6b4988420244e19a26eca74b1092543fd14c0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 6 Jul 2025 12:57:14 -0700 Subject: [PATCH] more tewts --- rust/pspp/src/identifier.rs | 6 +- rust/pspp/src/sys/cooked.rs | 9 +- rust/pspp/src/sys/raw.rs | 63 ++- rust/pspp/src/sys/test.rs | 428 +++++++++--------- .../src/sys/testdata/compressed_data.expected | 8 + .../compressed_data_other_bias.expected | 8 + .../compressed_data_zero_bias.expected | 8 + .../duplicate_long_variable_name.expected | 6 +- ...rds_than_indicated_by_file_header.expected | 30 ++ ...ows_in_long_string_missing_values.expected | Bin 0 -> 4089 bytes ...erflows_in_long_string_missing_values.sack | 0 ...verflows_in_long_string_missing_values.sav | Bin 0 -> 671 bytes ...nvalid_long_string_missing_values.expected | 2 +- .../invalid_long_string_missing_values.sack | 5 +- ...rds_than_indicated_by_file_header.expected | 32 ++ ...records_than_indicated_by_file_header.sack | 20 + .../src/sys/testdata/value_labels.expected | 2 +- rust/pspp/src/sys/testdata/value_labels.sack | 2 +- 18 files changed, 398 insertions(+), 231 deletions(-) delete mode 100644 rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sack create mode 100644 rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav create mode 100644 rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.expected create mode 100644 rust/pspp/src/sys/testdata/more_data_records_than_indicated_by_file_header.sack diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index e891347f9d..191744a1e2 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -104,10 +104,12 @@ pub enum Error { #[error("\"!\" is not a valid identifier.")] Bang, - #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character \"{1}\".")] + #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character {1:?}.")] BadFirstCharacter(String, char), - #[error("\"{0}\" may not be used as an identifier because it contains disallowed character \"{1}\".")] + #[error( + "\"{0}\" may not be used as an identifier because it contains disallowed character {1:?}." + )] BadLaterCharacter(String, char), #[error("Identifier \"{id}\" is {length} bytes in the encoding in use ({encoding}), which exceeds the {max}-byte limit.")] diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 43ad3c5f59..bf4169fc28 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -546,7 +546,11 @@ impl Metadata { creation, endian: header.endian, compression: header.compression, - n_cases: header.n_cases.map(|n| n as u64), + n_cases: headers + .number_of_cases + .as_ref() + .map(|record| record.n_cases) + .or_else(|| header.n_cases.map(|n| n as u64)), product, product_ext: headers.product_info.as_ref().map(|pe| fix_line_ends(&pe.0)), version: headers.integer_info.as_ref().map(|ii| ii.version), @@ -1093,6 +1097,9 @@ pub fn decode( } let metadata = Metadata::decode(&headers, warn); + if let Some(n_cases) = metadata.n_cases { + cases = cases.take().map(|cases| cases.with_expected_cases(n_cases)) + } Ok((dictionary, metadata, cases)) } diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 1317c2d835..f81a671ea6 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -115,6 +115,9 @@ pub enum Error { ztrailer_len: u64, }, + #[error("File metadata says it contains {expected} cases, but {actual} cases were read.")] + WrongNumberOfCases { expected: u64, actual: u64 }, + #[error("{0}")] EncodingError(EncodingError), } @@ -1181,6 +1184,8 @@ pub struct Cases { endian: Endian, codes: VecDeque, eof: bool, + expected_cases: Option, + read_cases: u64, } impl Debug for Cases { @@ -1198,7 +1203,9 @@ impl Default for Cases { bias: 100.0, endian: Endian::Little, codes: VecDeque::new(), - eof: true, + eof: false, + expected_cases: None, + read_cases: 0, } } } @@ -1208,33 +1215,39 @@ impl Cases { where R: Read + Seek + 'static, { - let case_vars = var_types - .types - .iter() - .flatten() - .copied() - .map(CaseVar::new) - .collect::>(); Self { reader: if header.compression == Some(Compression::ZLib) { Box::new(ZlibDecodeMultiple::new(reader)) } else { Box::new(reader) }, - eof: case_vars.is_empty(), - case_vars, + eof: false, + case_vars: var_types + .types + .iter() + .flatten() + .copied() + .map(CaseVar::new) + .collect::>(), compression: header.compression, bias: header.bias, endian: header.endian, codes: VecDeque::with_capacity(8), + expected_cases: None, + read_cases: 0, } } pub fn with_widths(self, widths: impl IntoIterator) -> Self { - let case_vars = widths.into_iter().map(CaseVar::new).collect::>(); Self { - eof: self.eof || case_vars.is_empty(), - case_vars, + case_vars: widths.into_iter().map(CaseVar::new).collect::>(), + ..self + } + } + + pub fn with_expected_cases(self, expected_cases: u64) -> Self { + Self { + expected_cases: Some(expected_cases), ..self } } @@ -1248,7 +1261,9 @@ impl Iterator for Cases { return None; } - let retval = if self.compression.is_some() { + let retval = if self.case_vars.is_empty() { + None + } else if self.compression.is_some() { Datum::read_compressed_case( &mut self.reader, &self.case_vars, @@ -1260,7 +1275,25 @@ impl Iterator for Cases { } else { Datum::read_case(&mut self.reader, &self.case_vars, self.endian).transpose() }; - self.eof = matches!(retval, None | Some(Err(_))); + match &retval { + None => { + self.eof = true; + if let Some(expected_cases) = self.expected_cases + && expected_cases != self.read_cases + { + return Some(Err(Error::WrongNumberOfCases { + expected: expected_cases, + actual: self.read_cases, + })); + } else { + return None; + } + } + Some(Ok(_)) => { + self.read_cases += 1; + } + Some(Err(_)) => self.eof = true, + }; retval } } diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index aa2af10012..255a30539a 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -17,489 +17,499 @@ use enum_iterator::all; #[test] fn variable_labels_and_missing_values() { - test_sysfile("variable_labels_and_missing_values"); + test_sack_sysfile("variable_labels_and_missing_values"); } #[test] fn unspecified_number_of_variable_positions() { - test_sysfile("unspecified_number_of_variable_positions"); + test_sack_sysfile("unspecified_number_of_variable_positions"); } #[test] fn wrong_variable_positions_but_v13() { - test_sysfile("wrong_variable_positions_but_v13"); + test_sack_sysfile("wrong_variable_positions_but_v13"); } #[test] fn value_labels() { - test_sysfile("value_labels"); + test_sack_sysfile("value_labels"); } #[test] fn documents() { - test_sysfile("documents"); + test_sack_sysfile("documents"); } #[test] fn empty_document_record() { - test_sysfile("empty_document_record"); + test_sack_sysfile("empty_document_record"); } #[test] fn variable_sets() { - test_sysfile("variable_sets"); + test_sack_sysfile("variable_sets"); } #[test] fn multiple_response_sets() { - test_sysfile("multiple_response_sets"); + test_sack_sysfile("multiple_response_sets"); } #[test] fn extra_product_info() { // Also checks for handling of CR-only line ends in file label and extra // product info. - test_sysfile("extra_product_info"); + test_sack_sysfile("extra_product_info"); } #[test] fn variable_display_without_width() { - test_sysfile("variable_display_without_width"); + test_sack_sysfile("variable_display_without_width"); } #[test] fn variable_display_with_width() { - test_sysfile("variable_display_with_width"); + test_sack_sysfile("variable_display_with_width"); } #[test] fn long_variable_names() { - test_sysfile("long_variable_names"); + test_sack_sysfile("long_variable_names"); } #[test] fn very_long_strings() { - test_sysfile("very_long_strings"); + test_sack_sysfile("very_long_strings"); } #[test] fn attributes() { - test_sysfile("attributes"); + test_sack_sysfile("attributes"); } #[test] fn variable_roles() { - test_sysfile("variable_roles"); + test_sack_sysfile("variable_roles"); } #[test] fn compressed_data() { - test_sysfile("compressed_data"); + test_sack_sysfile("compressed_data"); } #[test] fn compressed_data_zero_bias() { - test_sysfile("compressed_data_zero_bias"); + test_sack_sysfile("compressed_data_zero_bias"); } #[test] fn compressed_data_other_bias() { - test_sysfile("compressed_data_other_bias"); + test_sack_sysfile("compressed_data_other_bias"); } #[test] fn zcompressed_data() { - test_sysfile("zcompressed_data"); + test_sack_sysfile("zcompressed_data"); } #[test] fn no_variables() { - test_sysfile("no_variables"); + test_sack_sysfile("no_variables"); } #[test] fn unknown_encoding() { - test_sysfile("unknown_encoding"); + test_sack_sysfile("unknown_encoding"); } #[test] fn misplaced_type_4_record() { - test_sysfile("misplaced_type_4_record"); + test_sack_sysfile("misplaced_type_4_record"); } #[test] fn bad_record_type() { - test_sysfile("bad_record_type"); + test_sack_sysfile("bad_record_type"); } #[test] fn wrong_variable_positions() { - test_sysfile("wrong_variable_positions"); + test_sack_sysfile("wrong_variable_positions"); } #[test] fn invalid_variable_name() { - test_sysfile("invalid_variable_name"); + test_sack_sysfile("invalid_variable_name"); } #[test] fn invalid_label_indicator() { - test_sysfile("invalid_label_indicator"); + test_sack_sysfile("invalid_label_indicator"); } #[test] fn invalid_missing_indicator() { - test_sysfile("invalid_missing_indicator"); + test_sack_sysfile("invalid_missing_indicator"); } #[test] fn invalid_missing_indicator2() { - test_sysfile("invalid_missing_indicator2"); + test_sack_sysfile("invalid_missing_indicator2"); } #[test] fn missing_string_continuation() { - test_sysfile("missing_string_continuation"); + test_sack_sysfile("missing_string_continuation"); } #[test] fn invalid_variable_format() { - test_sysfile("invalid_variable_format"); + test_sack_sysfile("invalid_variable_format"); } #[test] fn invalid_long_string_missing_values() { - test_sysfile("invalid_long_string_missing_values"); + test_sack_sysfile("invalid_long_string_missing_values"); } #[test] fn weight_must_be_numeric() { - test_sysfile("weight_must_be_numeric"); + test_sack_sysfile("weight_must_be_numeric"); } #[test] fn weight_variable_bad_index() { - test_sysfile("weight_variable_bad_index"); + test_sack_sysfile("weight_variable_bad_index"); } #[test] fn weight_variable_continuation() { - test_sysfile("weight_variable_continuation"); + test_sack_sysfile("weight_variable_continuation"); } #[test] fn multiple_documents_records() { - test_sysfile("multiple_documents_records"); + test_sack_sysfile("multiple_documents_records"); } #[test] fn unknown_extension_record() { - test_sysfile("unknown_extension_record"); + test_sack_sysfile("unknown_extension_record"); } #[test] fn extension_too_large() { - test_sysfile("extension_too_large"); + test_sack_sysfile("extension_too_large"); } #[test] fn bad_machine_integer_info_count() { - test_sysfile("bad_machine_integer_info_count"); + test_sack_sysfile("bad_machine_integer_info_count"); } #[test] fn bad_machine_integer_info_float_format() { - test_sysfile("bad_machine_integer_info_float_format"); + test_sack_sysfile("bad_machine_integer_info_float_format"); } #[test] fn bad_machine_integer_info_endianness() { - test_sysfile("bad_machine_integer_info_endianness"); + test_sack_sysfile("bad_machine_integer_info_endianness"); } #[test] fn bad_machine_float_info_size() { - test_sysfile("bad_machine_float_info_size"); + test_sack_sysfile("bad_machine_float_info_size"); } #[test] fn wrong_special_floats() { - test_sysfile("wrong_special_floats"); + test_sack_sysfile("wrong_special_floats"); } #[test] fn variable_sets_unknown_variable() { - test_sysfile("variable_sets_unknown_variable"); + test_sack_sysfile("variable_sets_unknown_variable"); } #[test] fn multiple_response_sets_bad_name() { - test_sysfile("multiple_response_sets_bad_name"); + test_sack_sysfile("multiple_response_sets_bad_name"); } #[test] fn multiple_response_sets_missing_space_after_c() { - test_sysfile("multiple_response_sets_missing_space_after_c"); + test_sack_sysfile("multiple_response_sets_missing_space_after_c"); } #[test] fn multiple_response_sets_missing_space_after_e() { - test_sysfile("multiple_response_sets_missing_space_after_e"); + test_sack_sysfile("multiple_response_sets_missing_space_after_e"); } #[test] fn multiple_response_sets_missing_label_source() { - test_sysfile("multiple_response_sets_missing_label_source"); + test_sack_sysfile("multiple_response_sets_missing_label_source"); } #[test] fn multiple_response_sets_unexpected_label_source() { - test_sysfile("multiple_response_sets_unexpected_label_source"); + test_sack_sysfile("multiple_response_sets_unexpected_label_source"); } #[test] fn multiple_response_sets_bad_counted_string() { - test_sysfile("multiple_response_sets_bad_counted_string"); + test_sack_sysfile("multiple_response_sets_bad_counted_string"); } #[test] fn multiple_response_sets_counted_string_missing_space() { - test_sysfile("multiple_response_sets_counted_string_missing_space"); + test_sack_sysfile("multiple_response_sets_counted_string_missing_space"); } #[test] fn multiple_response_sets_counted_string_bad_length() { - test_sysfile("multiple_response_sets_counted_string_bad_length"); + test_sack_sysfile("multiple_response_sets_counted_string_bad_length"); } #[test] fn multiple_response_sets_missing_space_after_counted_string() { - test_sysfile("multiple_response_sets_missing_space_after_counted_string"); + test_sack_sysfile("multiple_response_sets_missing_space_after_counted_string"); } #[test] fn multiple_response_sets_missing_newline_after_variable_name() { - test_sysfile("multiple_response_sets_missing_newline_after_variable_name"); + test_sack_sysfile("multiple_response_sets_missing_newline_after_variable_name"); } #[test] fn multiple_response_sets_duplicate_variable_name() { - test_sysfile("multiple_response_sets_duplicate_variable_name"); + test_sack_sysfile("multiple_response_sets_duplicate_variable_name"); } #[test] fn mixed_variable_types_in_mrsets() { - test_sysfile("mixed_variable_types_in_mrsets"); + test_sack_sysfile("mixed_variable_types_in_mrsets"); } #[test] fn missing_newline_after_variable_name_in_mrsets() { - test_sysfile("missing_newline_after_variable_name_in_mrsets"); + test_sack_sysfile("missing_newline_after_variable_name_in_mrsets"); } #[test] fn zero_or_one_variable_in_mrset() { - test_sysfile("zero_or_one_variable_in_mrset"); + test_sack_sysfile("zero_or_one_variable_in_mrset"); } #[test] fn wrong_display_parameter_size() { - test_sysfile("wrong_display_parameter_size"); + test_sack_sysfile("wrong_display_parameter_size"); } #[test] fn wrong_display_parameter_count() { - test_sysfile("wrong_display_parameter_count"); + test_sack_sysfile("wrong_display_parameter_count"); } #[test] fn wrong_display_measurement_level() { - test_sysfile("wrong_display_measurement_level"); + test_sack_sysfile("wrong_display_measurement_level"); } #[test] fn wrong_display_alignment() { - test_sysfile("wrong_display_alignment"); + test_sack_sysfile("wrong_display_alignment"); } #[test] fn bad_variable_name_in_variable_value_pair() { - test_sysfile("bad_variable_name_in_variable_value_pair"); + test_sack_sysfile("bad_variable_name_in_variable_value_pair"); } #[test] fn duplicate_long_variable_name() { - test_sysfile("duplicate_long_variable_name"); + test_sack_sysfile("duplicate_long_variable_name"); } #[test] fn bad_very_long_string_length() { - test_sysfile("bad_very_long_string_length"); + test_sack_sysfile("bad_very_long_string_length"); } #[test] fn bad_very_long_string_segment_width() { - test_sysfile("bad_very_long_string_segment_width"); + test_sack_sysfile("bad_very_long_string_segment_width"); } #[test] fn too_many_value_labels() { - test_sysfile("too_many_value_labels"); + test_sack_sysfile("too_many_value_labels"); } #[test] fn missing_type_4_record() { - test_sysfile("missing_type_4_record"); + test_sack_sysfile("missing_type_4_record"); } #[test] fn value_label_with_no_associated_variables() { - test_sysfile("value_label_with_no_associated_variables"); + test_sack_sysfile("value_label_with_no_associated_variables"); } #[test] fn type_4_record_names_long_string_variable() { - test_sysfile("type_4_record_names_long_string_variable"); + test_sack_sysfile("type_4_record_names_long_string_variable"); } #[test] fn value_label_variable_indexes_must_be_in_correct_range() { - test_sysfile("value_label_variable_indexes_must_be_in_correct_range"); + test_sack_sysfile("value_label_variable_indexes_must_be_in_correct_range"); } #[test] fn value_label_variable_indexes_must_not_be_long_string_continuation() { - test_sysfile("value_label_variable_indexes_must_not_be_long_string_continuation"); + test_sack_sysfile("value_label_variable_indexes_must_not_be_long_string_continuation"); } #[test] fn variables_for_value_label_must_all_be_same_type() { - test_sysfile("variables_for_value_label_must_all_be_same_type"); + test_sack_sysfile("variables_for_value_label_must_all_be_same_type"); } #[test] fn duplicate_value_labels_type() { - test_sysfile("duplicate_value_labels_type"); + test_sack_sysfile("duplicate_value_labels_type"); } #[test] fn missing_attribute_value() { - test_sysfile("missing_attribute_value"); + test_sack_sysfile("missing_attribute_value"); } #[test] fn unquoted_attribute_value() { - test_sysfile("unquoted_attribute_value"); + test_sack_sysfile("unquoted_attribute_value"); } #[test] fn duplicate_attribute_name() { - test_sysfile("duplicate_attribute_name"); + test_sack_sysfile("duplicate_attribute_name"); } #[test] fn bad_variable_name_in_long_string_value_label() { - test_sysfile("bad_variable_name_in_long_string_value_label"); + test_sack_sysfile("bad_variable_name_in_long_string_value_label"); } #[test] fn fewer_data_records_than_indicated_by_file_header() { - test_sysfile("fewer_data_records_than_indicated_by_file_header"); + test_sack_sysfile("fewer_data_records_than_indicated_by_file_header"); +} + +#[test] +fn more_data_records_than_indicated_by_file_header() { + test_sack_sysfile("more_data_records_than_indicated_by_file_header"); } #[test] fn partial_data_record_between_variables() { - test_sysfile("partial_data_record_between_variables"); + test_sack_sysfile("partial_data_record_between_variables"); } #[test] fn partial_data_record_within_long_string() { - test_sysfile("partial_data_record_within_long_string"); + test_sack_sysfile("partial_data_record_within_long_string"); } #[test] fn partial_compressed_data_record() { - test_sysfile("partial_compressed_data_record"); + test_sack_sysfile("partial_compressed_data_record"); } #[test] fn zcompressed_data_bad_zheader_ofs() { - test_sysfile("zcompressed_data_bad_zheader_ofs"); + test_sack_sysfile("zcompressed_data_bad_zheader_ofs"); } #[test] fn zcompressed_data_bad_ztrailer_ofs() { - test_sysfile("zcompressed_data_bad_ztrailer_ofs"); + test_sack_sysfile("zcompressed_data_bad_ztrailer_ofs"); } #[test] fn zcompressed_data_invalid_ztrailer_len() { - test_sysfile("zcompressed_data_invalid_ztrailer_len"); + test_sack_sysfile("zcompressed_data_invalid_ztrailer_len"); } #[test] fn zcompressed_data_wrong_ztrailer_len() { - test_sysfile("zcompressed_data_wrong_ztrailer_len"); + test_sack_sysfile("zcompressed_data_wrong_ztrailer_len"); } #[test] fn zcompressed_data_wrong_ztrailer_bias() { - test_sysfile("zcompressed_data_wrong_ztrailer_bias"); + test_sack_sysfile("zcompressed_data_wrong_ztrailer_bias"); } #[test] fn zcompressed_data_wrong_ztrailer_zero() { - test_sysfile("zcompressed_data_wrong_ztrailer_zero"); + test_sack_sysfile("zcompressed_data_wrong_ztrailer_zero"); } #[test] fn zcompressed_data_wrong_block_size() { - test_sysfile("zcompressed_data_wrong_block_size"); + test_sack_sysfile("zcompressed_data_wrong_block_size"); } #[test] fn zcompressed_data_wrong_n_blocks() { - test_sysfile("zcompressed_data_wrong_n_blocks"); + test_sack_sysfile("zcompressed_data_wrong_n_blocks"); } #[test] fn zcompressed_data_wrong_uncompressed_ofs() { - test_sysfile("zcompressed_data_wrong_uncompressed_ofs"); + test_sack_sysfile("zcompressed_data_wrong_uncompressed_ofs"); } #[test] fn zcompressed_data_wrong_compressed_ofs() { - test_sysfile("zcompressed_data_wrong_compressed_ofs"); + test_sack_sysfile("zcompressed_data_wrong_compressed_ofs"); } #[test] fn zcompressed_data_compressed_sizes_dont_add_up() { - test_sysfile("zcompressed_data_compressed_sizes_dont_add_up"); + test_sack_sysfile("zcompressed_data_compressed_sizes_dont_add_up"); } #[test] fn zcompressed_data_uncompressed_size_block_size() { - test_sysfile("zcompressed_data_uncompressed_size_block_size"); + test_sack_sysfile("zcompressed_data_uncompressed_size_block_size"); } #[test] fn zcompressed_data_compression_expands_data_too_much() { - test_sysfile("zcompressed_data_compression_expands_data_too_much"); + test_sack_sysfile("zcompressed_data_compression_expands_data_too_much"); } #[test] fn zcompressed_data_compressed_sizes_don_t_add_up() { - test_sysfile("zcompressed_data_compressed_sizes_don_t_add_up"); + test_sack_sysfile("zcompressed_data_compressed_sizes_don_t_add_up"); } +/// CVE-2017-10791. +/// See also https://bugzilla.redhat.com/show_bug.cgi?id=1467004. +/// See also https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=866890. +/// See also https://security-tracker.debian.org/tracker/CVE-2017-10791. +/// Found by team OWL337, using the collAFL fuzzer. #[test] fn integer_overflows_in_long_string_missing_values() { - test_sysfile("integer_overflows_in_long_string_missing_values"); + test_raw_sysfile("integer_overflows_in_long_string_missing_values"); } #[test] fn null_dereference_skipping_bad_extension_record_18() { - test_sysfile("null_dereference_skipping_bad_extension_record_18"); + test_sack_sysfile("null_dereference_skipping_bad_extension_record_18"); } /// Duplicate variable name handling negative test. @@ -508,10 +518,21 @@ fn null_dereference_skipping_bad_extension_record_18() { /// #41475). #[test] fn duplicate_variable_name() { - test_sysfile("duplicate_variable_name"); + test_sack_sysfile("duplicate_variable_name"); +} + +fn test_raw_sysfile(name: &str) { + let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/sys/testdata") + .join(name) + .with_extension("sav"); + let sysfile = std::fs::read(&input_filename).unwrap(); + let expected_filename = input_filename.with_extension("expected"); + let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap(); + test_sysfile(name, sysfile, &expected, &expected_filename); } -fn test_sysfile(name: &str) { +fn test_sack_sysfile(name: &str) { let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) .join("src/sys/testdata") .join(name) @@ -528,114 +549,115 @@ fn test_sysfile(name: &str) { }, ); let sysfile = sack(&input, Some(&input_filename), endian).unwrap(); - let cursor = Cursor::new(sysfile); - let mut warnings = Vec::new(); - let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap(); - let output = match reader.headers().collect() { - Ok(headers) => { - let cases = reader.cases(); - let encoding = - encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap(); - let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning)); - let mut decoded_records = Vec::new(); - for header in headers { - decoded_records.push(header.decode(&mut decoder).unwrap()); - } - drop(decoder); - - let mut errors = Vec::new(); - let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); - let (dictionary, metadata, cases) = - decode(headers, cases, encoding, |e| errors.push(e)).unwrap(); - let (group, data) = metadata.to_pivot_rows(); - let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]) - .with_data( - data.into_iter() - .enumerate() - .filter(|(_row, value)| !value.is_empty()) - .map(|(row, value)| ([row], value)), - ); - let (group, data) = dictionary.to_pivot_rows(); - let dictionary_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]) - .with_data( - data.into_iter() - .enumerate() - .filter(|(_row, value)| !value.is_empty()) - .map(|(row, value)| ([row], value)), - ); - let mut output = Vec::new(); - output.extend( - warnings - .into_iter() - .map(|warning| Arc::new(Item::from(Text::new_log(warning.to_string())))), - ); - output.extend( - errors - .into_iter() - .map(|error| Arc::new(Item::from(Text::new_log(error.to_string())))), - ); - output.push(Arc::new(metadata_table.into())); - output.push(Arc::new(dictionary_table.into())); - output.push(Arc::new( - dictionary.output_variables().to_pivot_table().into(), - )); - if let Some(pt) = dictionary.output_value_labels().to_pivot_table() { - output.push(Arc::new(pt.into())); - } - if let Some(pt) = dictionary.output_mrsets().to_pivot_table() { - output.push(Arc::new(pt.into())); - } - if let Some(pt) = dictionary.output_attributes().to_pivot_table() { - output.push(Arc::new(pt.into())); - } - if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() { - output.push(Arc::new(pt.into())); - } - if let Some(cases) = cases { - let variables = Group::new("Variable") - .with_multiple(dictionary.variables.iter().map(|var| &**var)); - let mut case_numbers = Group::new("Case").with_label_shown(); - let mut data = Vec::new(); - for (case_number, case) in cases.enumerate() { - match case { - Ok(case) => { - case_numbers.push(Value::new_integer(Some( - (case_numbers.len() + 1) as f64, - ))); - data.push( - case.into_iter() - .map(|datum| Value::new_datum(&datum, dictionary.encoding)) - .collect::>(), - ); - } - Err(error) => { - output.push(Arc::new(Item::from(Text::new_log(error.to_string())))); - } + test_sysfile(name, sysfile, &expected, &expected_filename); + } +} + +fn test_sysfile(name: &str, sysfile: Vec, expected: &str, expected_filename: &Path) { + let cursor = Cursor::new(sysfile); + let mut warnings = Vec::new(); + let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap(); + let output = match reader.headers().collect() { + Ok(headers) => { + let cases = reader.cases(); + let encoding = + encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap(); + let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning)); + let mut decoded_records = Vec::new(); + for header in headers { + decoded_records.push(header.decode(&mut decoder).unwrap()); + } + drop(decoder); + + let mut errors = Vec::new(); + let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); + let (dictionary, metadata, cases) = + decode(headers, cases, encoding, |e| errors.push(e)).unwrap(); + let (group, data) = metadata.to_pivot_rows(); + let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data( + data.into_iter() + .enumerate() + .filter(|(_row, value)| !value.is_empty()) + .map(|(row, value)| ([row], value)), + ); + let (group, data) = dictionary.to_pivot_rows(); + let dictionary_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data( + data.into_iter() + .enumerate() + .filter(|(_row, value)| !value.is_empty()) + .map(|(row, value)| ([row], value)), + ); + let mut output = Vec::new(); + output.extend( + warnings + .into_iter() + .map(|warning| Arc::new(Item::from(Text::new_log(warning.to_string())))), + ); + output.extend( + errors + .into_iter() + .map(|error| Arc::new(Item::from(Text::new_log(error.to_string())))), + ); + output.push(Arc::new(metadata_table.into())); + output.push(Arc::new(dictionary_table.into())); + output.push(Arc::new( + dictionary.output_variables().to_pivot_table().into(), + )); + if let Some(pt) = dictionary.output_value_labels().to_pivot_table() { + output.push(Arc::new(pt.into())); + } + if let Some(pt) = dictionary.output_mrsets().to_pivot_table() { + output.push(Arc::new(pt.into())); + } + if let Some(pt) = dictionary.output_attributes().to_pivot_table() { + output.push(Arc::new(pt.into())); + } + if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() { + output.push(Arc::new(pt.into())); + } + if let Some(cases) = cases { + let variables = Group::new("Variable") + .with_multiple(dictionary.variables.iter().map(|var| &**var)); + let mut case_numbers = Group::new("Case").with_label_shown(); + let mut data = Vec::new(); + for case in cases { + match case { + Ok(case) => { + case_numbers + .push(Value::new_integer(Some((case_numbers.len() + 1) as f64))); + data.push( + case.into_iter() + .map(|datum| Value::new_datum(&datum, dictionary.encoding)) + .collect::>(), + ); + } + Err(error) => { + output.push(Arc::new(Item::from(Text::new_log(error.to_string())))); } } - if !data.is_empty() { - let mut pt = PivotTable::new([ - (Axis3::X, Dimension::new(variables)), - (Axis3::Y, Dimension::new(case_numbers)), - ]); - for (row_number, row) in data.into_iter().enumerate() { - for (column_number, datum) in row.into_iter().enumerate() { - pt.insert(&[column_number, row_number], datum); - } + } + if !data.is_empty() { + let mut pt = PivotTable::new([ + (Axis3::X, Dimension::new(variables)), + (Axis3::Y, Dimension::new(case_numbers)), + ]); + for (row_number, row) in data.into_iter().enumerate() { + for (column_number, datum) in row.into_iter().enumerate() { + pt.insert(&[column_number, row_number], datum); } - output.push(Arc::new(pt.into())); } + output.push(Arc::new(pt.into())); } - Item::new(Details::Group(output)) } - Err(error) => Item::new(Details::Text(Box::new(Text::new_log(error.to_string())))), - }; - - assert_lines_eq( - &expected, - expected_filename.display(), - &output.to_string(), - "actual", - ); - } + Item::new(Details::Group(output)) + } + Err(error) => Item::new(Details::Text(Box::new(Text::new_log(error.to_string())))), + }; + + assert_lines_eq( + &expected, + expected_filename.display(), + &output.to_string(), + "actual", + ); } diff --git a/rust/pspp/src/sys/testdata/compressed_data.expected b/rust/pspp/src/sys/testdata/compressed_data.expected index f36fb5ebce..3b0e1d4e21 100644 --- a/rust/pspp/src/sys/testdata/compressed_data.expected +++ b/rust/pspp/src/sys/testdata/compressed_data.expected @@ -21,3 +21,11 @@ │str8 │ 4│ │Nominal │Input│ 8│Left │A8 │A8 │ │ │str15│ 5│ │Nominal │Input│ 15│Left │A15 │A15 │ │ ╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬──────┬──────┬────────┬────────────────┬──────────────────────────────╮ +│Case│ num1 │ num2 │ str4 │ str8 │ str15 │ +├────┼──────┼──────┼────────┼────────────────┼──────────────────────────────┤ +│1 │-99.00│ .00│ │abcdefgh │ 0123 │ +│2 │ . │151.00│jklm │nopqrstu │vwxyzABC │ +│3 │ 1.00│ 2.00│DEFG │HIJKLMNO │ PQRSTUV │ +╰────┴──────┴──────┴────────┴────────────────┴──────────────────────────────╯ diff --git a/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected b/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected index f57bc54fa1..17e3b4157a 100644 --- a/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected +++ b/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected @@ -23,3 +23,11 @@ Compression bias is 50 instead of the usual values of 0 or 100. │str8 │ 4│ │Nominal │Input│ 8│Left │A8 │A8 │ │ │str15│ 5│ │Nominal │Input│ 15│Left │A15 │A15 │ │ ╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬──────┬──────┬────────┬────────────────┬──────────────────────────────╮ +│Case│ num1 │ num2 │ str4 │ str8 │ str15 │ +├────┼──────┼──────┼────────┼────────────────┼──────────────────────────────┤ +│1 │-49.00│ 50.00│ │abcdefgh │ 0123 │ +│2 │ . │201.00│jklm │nopqrstu │vwxyzABC │ +│3 │ 51.00│ 52.00│DEFG │HIJKLMNO │ PQRSTUV │ +╰────┴──────┴──────┴────────┴────────────────┴──────────────────────────────╯ diff --git a/rust/pspp/src/sys/testdata/compressed_data_zero_bias.expected b/rust/pspp/src/sys/testdata/compressed_data_zero_bias.expected index f36fb5ebce..0a4fd1167b 100644 --- a/rust/pspp/src/sys/testdata/compressed_data_zero_bias.expected +++ b/rust/pspp/src/sys/testdata/compressed_data_zero_bias.expected @@ -21,3 +21,11 @@ │str8 │ 4│ │Nominal │Input│ 8│Left │A8 │A8 │ │ │str15│ 5│ │Nominal │Input│ 15│Left │A15 │A15 │ │ ╰─────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭────┬──────┬──────┬────────┬────────────────┬──────────────────────────────╮ +│Case│ num1 │ num2 │ str4 │ str8 │ str15 │ +├────┼──────┼──────┼────────┼────────────────┼──────────────────────────────┤ +│1 │ 1.00│100.00│ │abcdefgh │ 0123 │ +│2 │ . │251.00│jklm │nopqrstu │vwxyzABC │ +│3 │101.00│102.00│DEFG │HIJKLMNO │ PQRSTUV │ +╰────┴──────┴──────┴────────┴────────────────┴──────────────────────────────╯ diff --git a/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected b/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected index de5196bcd7..14947fd867 100644 --- a/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected +++ b/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected @@ -1,8 +1,8 @@ -Invalid name in long variable name record. "_Invalid" may not be used as an identifier because it begins with disallowed character "_". +Invalid name in long variable name record. "_Invalid" may not be used as an identifier because it begins with disallowed character '_'. -Invalid name in long variable name record. "$Invalid" may not be used as an identifier because it begins with disallowed character "$". +Invalid name in long variable name record. "$Invalid" may not be used as an identifier because it begins with disallowed character '$'. -Invalid name in long variable name record. "#Invalid" may not be used as an identifier because it begins with disallowed character "#". +Invalid name in long variable name record. "#Invalid" may not be used as an identifier because it begins with disallowed character '#'. Duplicate long variable name LONGVARIABLENAME. diff --git a/rust/pspp/src/sys/testdata/fewer_data_records_than_indicated_by_file_header.expected b/rust/pspp/src/sys/testdata/fewer_data_records_than_indicated_by_file_header.expected index e69de29bb2..41eca0a266 100644 --- a/rust/pspp/src/sys/testdata/fewer_data_records_than_indicated_by_file_header.expected +++ b/rust/pspp/src/sys/testdata/fewer_data_records_than_indicated_by_file_header.expected @@ -0,0 +1,30 @@ +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +├──────────────────────┼────────────────────────┤ +│ Compression │None │ +│ Number of Cases│ 5│ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬─╮ +│Variables│2│ +╰─────────┴─╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +│num2│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +File metadata says it contains 5 cases, but 4 cases were read. + +╭────┬────┬────╮ +│Case│num1│num2│ +├────┼────┼────┤ +│1 │1.00│2.00│ +│2 │3.00│4.00│ +│3 │5.00│6.00│ +│4 │7.00│8.00│ +╰────┴────┴────╯ diff --git a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..10f60064aaa59c281b714805cac34b2bf77c8644 100644 GIT binary patch literal 4089 zcmdT`%Wm676wTUSap6rijX+9r;soeIQ0&5mtr(Ugqd+$!a%4?QaRkmNjpM*D|s)CYi>h7ZeLCNTo)_lH_YE zfpkiljC28 zW1Z1l8ViqRlUVZ=8N^qMEs3<$L*R9yB4v{;OkNsE)9kZm$Wgc zUl>jSWv5mx6tbSnNPs~-6zS9oG=*YiM3U$m=vU=E{62VyJTGJ_R;ZF*iA)KOjEwSl zAtK-1zQ23>(-;5WeX)mslEdw9dv|YtCifd>5}!BrZ&VS5ga*N*Z%30y{U97rID9r3 zK07#s8hrfHMcRLM;biynf92Y*GQ}(SJk#+evPGpJ&ZcyQC9iJ`K71kt7E0Lf1z65l zc6!k7#Yy67FZwr=S-zLN6Oe2Glh&W}@xSe#uWhVXYwY-33S3nx3TzuVvM9~${IaGP}IhYt5;xJ%J_HTMQ-nKwY-8HU3qlPer7bShuvGP=-- z1b3xkyTo&psOyw9fX_1pO&pXf!LRuQs$S0&b`cJlnMgMICTO3oEi;5T0qlk z7fIb=Q@_rZxLY;nHXrghuyH8}LUh}YloCQTjRel}+7GgmbiLt^!%{xtF4XlhKOKON z4}1MWyZKr$?g^ex8J|EO(DNmPXrghg5J_bT*}s#(ph}>G5dBvj2=kHe>l_xj;JHf& zPUXU#&(I&iQ}JuQ7L0p>$K|Z$fcWtDW4}o?CFSyM0Zpr2Bz1>P{W@FXZq;(PpFKia zMs0VL8@_xQz&EVz;o5HUkQZ|fd0#gNX3Jpdkjh65FHGdXOE2sNd^`CB;!4O;!D80r z@xbH6YkHc=8XPzy!{Wkd;2=vp&9D}9=#1Qj-bWfj#Ygfq8#N;>SX@LpaF8VjpRLKe zx@L{S=&^$=d0ZLQB6+xEjl$@OgDiPc88vJAfi()FLkC%MSQ*tK`S$$C8imnQ2U+s8 qGO9)L_HHWs?uEF^yY7X!*B2kR7Rfha&2C+!PX|xS2JMc*mi-IRj`X7d literal 0 HcmV?d00001 diff --git a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sack b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sack deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.sav new file mode 100644 index 0000000000000000000000000000000000000000..6462081f326d2cdb2a16dbe140684188a11f454d GIT binary patch literal 671 zcmbtR!AiqG5ZzYAR=s*((1Tt|l8qn*p#iNZ6{L}h$3S+I)R-3b;L%_7FFf`O()uQu zg&u?+eDLxzGjC?HoAdD`7+suQ%J?~scnd42Ln&q zrM1OE1%tFKCD0W@?1LSUaMTK(ZzJW&$E1?J^24DDhga9s@E>3*?b}ATcOZzfXzBs} zj(V`8uBh(L((LIV0^9jQ93r^~(wTeU3Fz>RX6hx<93uIhm-;phR;_GSUu%xZcg$Xz zNnV=o=DGL3@~|~~PT7ACtudGG>5<<; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variables. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; +2; 0; 0; 0; 0x050800 *2; s8 "NUM2"; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# Data. +999; 0; +1.0; 2.0; +3.0; 4.0; +5.0; 6.0; +7.0; 8.0; +9.0; 10.0; +# Extra record here: +11.0; 12.0; diff --git a/rust/pspp/src/sys/testdata/value_labels.expected b/rust/pspp/src/sys/testdata/value_labels.expected index c62f7006a2..54bd0ec5f0 100644 --- a/rust/pspp/src/sys/testdata/value_labels.expected +++ b/rust/pspp/src/sys/testdata/value_labels.expected @@ -5,7 +5,7 @@ │ Version │1.2.3 │ ├──────────────────────┼────────────────────────┤ │ Compression │None │ -│ Number of Cases│ 1│ +│ Number of Cases│ 0│ ╰──────────────────────┴────────────────────────╯ ╭─────────┬────────────────────────╮ diff --git a/rust/pspp/src/sys/testdata/value_labels.sack b/rust/pspp/src/sys/testdata/value_labels.sack index 8198711e1e..6d5e2cfdaf 100644 --- a/rust/pspp/src/sys/testdata/value_labels.sack +++ b/rust/pspp/src/sys/testdata/value_labels.sack @@ -4,7 +4,7 @@ 22; # Nominal case size 0; # Not compressed 0; # Not weighted -1; # 1 case. +0; # 0 cases. 100.0; # Bias. "05 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; i8 0 *3; -- 2.30.2