continue;
}
- let segment = &self.text[self.indexes.end..index];
let segment_width = self.text[self.indexes.end..index].width();
if self.width == 0 || self.width + segment_width <= self.max_width {
// Add this segment to the current line.
-use std::{
- cell::RefCell,
- collections::{BTreeMap, HashMap},
- ops::Range,
- rc::Rc,
-};
+use std::{cell::RefCell, collections::BTreeMap, ops::Range, rc::Rc};
use crate::{
calendar::date_time_to_pspp,
#[error("Long string missing values record says variable {name} has {count} missing values, but only 1 to 3 missing values are allowed.")]
LongStringMissingValueInvalidCount { name: Identifier, count: usize },
+ #[error("Unknown extension record with subtype {subtype} at offset {offset:#x}, consisting of {count} {size}-byte units. Please feel free to report this as a bug.")]
+ UnknownExtensionRecord {
+ offset: u64,
+ subtype: u32,
+ size: u32,
+ count: u32,
+ },
+
+ #[error(
+ "Floating-point representation indicated by system file ({0}) differs from expected (1)."
+ )]
+ UnexpectedFloatFormat(i32),
+
+ #[error(
+ "Integer format indicated by system file ({actual}) differs from expected ({expected})."
+ )]
+ UnexpectedEndianess { actual: i32, expected: i32 },
+
#[error("Details TBD (cooked)")]
TBD,
}
.map(trim_end_spaces)
.collect();
- // XXX warn for weird integer format
- // XXX warn for weird floating-point format, etc.
+ if let Some(integer_info) = &headers.integer_info {
+ let floating_point_rep = integer_info.floating_point_rep;
+ if floating_point_rep != 1 {
+ warn(Error::UnexpectedFloatFormat(floating_point_rep))
+ }
+
+ let expected = match headers.header.endian {
+ Endian::Big => 1,
+ Endian::Little => 2,
+ };
+ let actual = integer_info.endianness;
+ if actual != expected {
+ warn(Error::UnexpectedEndianess { actual, expected });
+ }
+ };
if let Some(nominal_case_size) = headers.header.nominal_case_size {
let n_vars = headers.variable.len();
dictionary.variable_sets.push(variable_set);
}
+ for record in headers.other_extension.drain(..) {
+ warn(Error::UnknownExtensionRecord {
+ offset: record.offsets.start,
+ subtype: record.subtype,
+ size: record.size,
+ count: record.count,
+ });
+ }
+
let metadata = Metadata::decode(&headers, warn);
Ok((dictionary, metadata))
}
test_sysfile("multiple_documents_records");
}
+#[test]
+fn unknown_extension_record() {
+ test_sysfile("unknown_extension_record");
+}
+
+#[test]
+fn extension_too_large() {
+ test_sysfile("extension_too_large");
+}
+
+#[test]
+fn bad_machine_integer_info_count() {
+ test_sysfile("bad_machine_integer_info_count");
+}
+
+#[test]
+fn bad_machine_integer_info_float_format() {
+ test_sysfile("bad_machine_integer_info_float_format");
+}
+
+#[test]
+fn bad_machine_integer_info_endianness() {
+ test_sysfile("bad_machine_integer_info_endianness");
+}
+
/// Duplicate variable name handling negative test.
///
/// SPSS-generated system file can contain duplicate variable names (see bug
let expected_filename = input_filename.with_extension("expected");
let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap();
for endian in all::<Endian>() {
+ let expected = expected.replace(
+ "{endian}",
+ match endian {
+ Endian::Big => "1",
+ Endian::Little => "2",
+ },
+ );
let sysfile = sack(&input, Some(&input_filename), endian).unwrap();
let cursor = Cursor::new(sysfile);
let mut warnings = Vec::new();
--- /dev/null
+At offset 0xe0, integer record has bad count 9 instead of the expected 8.
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│1│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Machine integer info record.
+7; 3; 4; >>9<<; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; >>1234<<;
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;
--- /dev/null
+Integer format indicated by system file (3) differs from expected ({endian}).
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│1│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; >>3<<; 1252;
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;
--- /dev/null
+Floating-point representation indicated by system file (2) differs from expected (1).
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│1│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; >>2<<; 1; ENDIAN; 1252;
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;
--- /dev/null
+At offset 0xd8, record type 7 subtype 3 is too large with element size 4294963200 and 4294963200 elements.
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 2; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Too-large extension record.
+7; 3; >>0xfffff000 * 2<<;
│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
│num2│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
-
--- /dev/null
+Unknown extension record with subtype 30 at offset 0xe0, consisting of 1 1-byte units. Please feel free to report this as a bug.
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+├──────────────────────┼────────────────────────┤
+│ Compression │SAV │
+│ Number of Cases│Unknown │
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬─╮
+│Variables│1│
+╰─────────┴─╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+
+# Numeric variable, no label or missing values.
+2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+
+# Unknown extension record type.
+7; 30; 1; 1; i8 0;
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# End of dictionary.
+999; 0;