From af384723375f2eda155d85b50ed5a34f10a0da01 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 17 Jun 2025 13:17:39 -0700 Subject: [PATCH] more tests --- rust/pspp/src/output/text.rs | 1 - rust/pspp/src/sys/cooked.rs | 51 ++++++++++++++++--- rust/pspp/src/sys/test.rs | 32 ++++++++++++ .../bad_machine_integer_info_count.expected | 20 ++++++++ .../bad_machine_integer_info_count.sack | 15 ++++++ ...d_machine_integer_info_endianness.expected | 21 ++++++++ .../bad_machine_integer_info_endianness.sack | 15 ++++++ ...machine_integer_info_float_format.expected | 21 ++++++++ ...bad_machine_integer_info_float_format.sack | 15 ++++++ .../sys/testdata/extension_too_large.expected | 1 + .../src/sys/testdata/extension_too_large.sack | 9 ++++ .../multiple_documents_records.expected | 1 - .../unknown_extension_record.expected | 20 ++++++++ .../testdata/unknown_extension_record.sack | 15 ++++++ 14 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected create mode 100644 rust/pspp/src/sys/testdata/bad_machine_integer_info_count.sack create mode 100644 rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.expected create mode 100644 rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.sack create mode 100644 rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.expected create mode 100644 rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.sack create mode 100644 rust/pspp/src/sys/testdata/extension_too_large.expected create mode 100644 rust/pspp/src/sys/testdata/extension_too_large.sack create mode 100644 rust/pspp/src/sys/testdata/unknown_extension_record.expected create mode 100644 rust/pspp/src/sys/testdata/unknown_extension_record.sack diff --git a/rust/pspp/src/output/text.rs b/rust/pspp/src/output/text.rs index be67f96576..6e435a0955 100644 --- a/rust/pspp/src/output/text.rs +++ b/rust/pspp/src/output/text.rs @@ -451,7 +451,6 @@ where continue; } - let segment = &self.text[self.indexes.end..index]; let segment_width = self.text[self.indexes.end..index].width(); if self.width == 0 || self.width + segment_width <= self.max_width { // Add this segment to the current line. diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index dba0c40027..86c4ca9a17 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -1,9 +1,4 @@ -use std::{ - cell::RefCell, - collections::{BTreeMap, HashMap}, - ops::Range, - rc::Rc, -}; +use std::{cell::RefCell, collections::BTreeMap, ops::Range, rc::Rc}; use crate::{ calendar::date_time_to_pspp, @@ -211,6 +206,24 @@ pub enum Error { #[error("Long string missing values record says variable {name} has {count} missing values, but only 1 to 3 missing values are allowed.")] LongStringMissingValueInvalidCount { name: Identifier, count: usize }, + #[error("Unknown extension record with subtype {subtype} at offset {offset:#x}, consisting of {count} {size}-byte units. Please feel free to report this as a bug.")] + UnknownExtensionRecord { + offset: u64, + subtype: u32, + size: u32, + count: u32, + }, + + #[error( + "Floating-point representation indicated by system file ({0}) differs from expected (1)." + )] + UnexpectedFloatFormat(i32), + + #[error( + "Integer format indicated by system file ({actual}) differs from expected ({expected})." + )] + UnexpectedEndianess { actual: i32, expected: i32 }, + #[error("Details TBD (cooked)")] TBD, } @@ -533,8 +546,21 @@ pub fn decode( .map(trim_end_spaces) .collect(); - // XXX warn for weird integer format - // XXX warn for weird floating-point format, etc. + if let Some(integer_info) = &headers.integer_info { + let floating_point_rep = integer_info.floating_point_rep; + if floating_point_rep != 1 { + warn(Error::UnexpectedFloatFormat(floating_point_rep)) + } + + let expected = match headers.header.endian { + Endian::Big => 1, + Endian::Little => 2, + }; + let actual = integer_info.endianness; + if actual != expected { + warn(Error::UnexpectedEndianess { actual, expected }); + } + }; if let Some(nominal_case_size) = headers.header.nominal_case_size { let n_vars = headers.variable.len(); @@ -924,6 +950,15 @@ pub fn decode( dictionary.variable_sets.push(variable_set); } + for record in headers.other_extension.drain(..) { + warn(Error::UnknownExtensionRecord { + offset: record.offsets.start, + subtype: record.subtype, + size: record.size, + count: record.count, + }); + } + let metadata = Metadata::decode(&headers, warn); Ok((dictionary, metadata)) } diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index 75207ee356..3caabc81c9 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -192,6 +192,31 @@ fn multiple_documents_records() { test_sysfile("multiple_documents_records"); } +#[test] +fn unknown_extension_record() { + test_sysfile("unknown_extension_record"); +} + +#[test] +fn extension_too_large() { + test_sysfile("extension_too_large"); +} + +#[test] +fn bad_machine_integer_info_count() { + test_sysfile("bad_machine_integer_info_count"); +} + +#[test] +fn bad_machine_integer_info_float_format() { + test_sysfile("bad_machine_integer_info_float_format"); +} + +#[test] +fn bad_machine_integer_info_endianness() { + test_sysfile("bad_machine_integer_info_endianness"); +} + /// Duplicate variable name handling negative test. /// /// SPSS-generated system file can contain duplicate variable names (see bug @@ -210,6 +235,13 @@ fn test_sysfile(name: &str) { let expected_filename = input_filename.with_extension("expected"); let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap(); for endian in all::() { + let expected = expected.replace( + "{endian}", + match endian { + Endian::Big => "1", + Endian::Little => "2", + }, + ); let sysfile = sack(&input, Some(&input_filename), endian).unwrap(); let cursor = Cursor::new(sysfile); let mut warnings = Vec::new(); diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected b/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected new file mode 100644 index 0000000000..72bda6df51 --- /dev/null +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected @@ -0,0 +1,20 @@ +At offset 0xe0, integer record has bad count 9 instead of the expected 8. + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +├──────────────────────┼────────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│Unknown │ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬─╮ +│Variables│1│ +╰─────────┴─╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.sack b/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.sack new file mode 100644 index 0000000000..84e4a25619 --- /dev/null +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.sack @@ -0,0 +1,15 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Machine integer info record. +7; 3; 4; >>9<<; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; >>1234<<; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# End of dictionary. +999; 0; diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.expected b/rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.expected new file mode 100644 index 0000000000..e66463ad12 --- /dev/null +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.expected @@ -0,0 +1,21 @@ +Integer format indicated by system file (3) differs from expected ({endian}). + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│Unknown │ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬─╮ +│Variables│1│ +╰─────────┴─╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.sack b/rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.sack new file mode 100644 index 0000000000..22cde40a61 --- /dev/null +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_endianness.sack @@ -0,0 +1,15 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; >>3<<; 1252; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# End of dictionary. +999; 0; diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.expected b/rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.expected new file mode 100644 index 0000000000..d25902e584 --- /dev/null +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.expected @@ -0,0 +1,21 @@ +Floating-point representation indicated by system file (2) differs from expected (1). + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +│ Version │1.2.3 │ +├──────────────────────┼────────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│Unknown │ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬─╮ +│Variables│1│ +╰─────────┴─╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.sack b/rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.sack new file mode 100644 index 0000000000..62d0c7b0da --- /dev/null +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_float_format.sack @@ -0,0 +1,15 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; >>2<<; 1; ENDIAN; 1252; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# End of dictionary. +999; 0; diff --git a/rust/pspp/src/sys/testdata/extension_too_large.expected b/rust/pspp/src/sys/testdata/extension_too_large.expected new file mode 100644 index 0000000000..c48edff61c --- /dev/null +++ b/rust/pspp/src/sys/testdata/extension_too_large.expected @@ -0,0 +1 @@ +At offset 0xd8, record type 7 subtype 3 is too large with element size 4294963200 and 4294963200 elements. diff --git a/rust/pspp/src/sys/testdata/extension_too_large.sack b/rust/pspp/src/sys/testdata/extension_too_large.sack new file mode 100644 index 0000000000..ee45f7309a --- /dev/null +++ b/rust/pspp/src/sys/testdata/extension_too_large.sack @@ -0,0 +1,9 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; 2; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Too-large extension record. +7; 3; >>0xfffff000 * 2<<; diff --git a/rust/pspp/src/sys/testdata/multiple_documents_records.expected b/rust/pspp/src/sys/testdata/multiple_documents_records.expected index f6f127ec16..0c181f3954 100644 --- a/rust/pspp/src/sys/testdata/multiple_documents_records.expected +++ b/rust/pspp/src/sys/testdata/multiple_documents_records.expected @@ -19,4 +19,3 @@ │num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ │num2│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ ╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ - diff --git a/rust/pspp/src/sys/testdata/unknown_extension_record.expected b/rust/pspp/src/sys/testdata/unknown_extension_record.expected new file mode 100644 index 0000000000..3d4be23ca9 --- /dev/null +++ b/rust/pspp/src/sys/testdata/unknown_extension_record.expected @@ -0,0 +1,20 @@ +Unknown extension record with subtype 30 at offset 0xe0, consisting of 1 1-byte units. Please feel free to report this as a bug. + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +├──────────────────────┼────────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│Unknown │ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬─╮ +│Variables│1│ +╰─────────┴─╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ diff --git a/rust/pspp/src/sys/testdata/unknown_extension_record.sack b/rust/pspp/src/sys/testdata/unknown_extension_record.sack new file mode 100644 index 0000000000..ce261aff3d --- /dev/null +++ b/rust/pspp/src/sys/testdata/unknown_extension_record.sack @@ -0,0 +1,15 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Unknown extension record type. +7; 30; 1; 1; i8 0; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# End of dictionary. +999; 0; -- 2.30.2