borrow::Cow,
cmp::Ordering,
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
- fmt::{Debug, Formatter, Result as FmtResult},
+ fmt::{Debug, Display, Formatter, Result as FmtResult},
hash::Hash,
ops::{Bound, RangeBounds, RangeInclusive},
str::FromStr,
self.as_string_width().unwrap() - segment_idx * Self::EFFECTIVE_VLS_CHUNK
}
}
+
+ pub fn display_adjective(&self) -> VarWidthAdjective {
+ VarWidthAdjective(*self)
+ }
}
impl From<VarWidth> for VarType {
}
}
+pub struct VarWidthAdjective(VarWidth);
+
+impl Display for VarWidthAdjective {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ match self.0 {
+ VarWidth::Numeric => write!(f, "numeric"),
+ VarWidth::String(width) => write!(f, "{width}-byte string"),
+ }
+ }
+}
+
#[derive(Clone)]
pub enum Datum<S = RawString> {
Number(Option<f64>),
#[error("File header claims {expected} variable positions but {actual} were read from file.")]
WrongVariablePositions { actual: usize, expected: usize },
+ #[error("Unknown variable name \"{name}\" in long string missing value record.")]
+ LongStringMissingValueUnknownVariable { name: Identifier },
+
+ #[error("Invalid long string missing value for {} variable {name}.", width.display_adjective())]
+ LongStringMissingValueBadWdith { name: Identifier, width: VarWidth },
+
+ #[error("Long string missing values record says variable {name} has {count} missing values, but only 1 to 3 missing values are allowed.")]
+ LongStringMissingValueInvalidCount { name: Identifier, count: usize },
+
#[error("Details TBD (cooked)")]
TBD,
}
}
}
- for record in headers
+ for mut record in headers
.long_string_missing_values
.drain(..)
.flat_map(|record| record.0.into_iter())
{
let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
- warn(dbg!(Error::TBD));
+ warn(Error::LongStringMissingValueUnknownVariable {
+ name: record.var_name.clone(),
+ });
continue;
};
+ if !variable.width.is_long_string() {
+ warn(Error::LongStringMissingValueBadWdith {
+ name: record.var_name.clone(),
+ width: variable.width,
+ });
+ continue;
+ }
+ if record.missing_values.len() > 3 {
+ warn(Error::LongStringMissingValueInvalidCount {
+ name: record.var_name.clone(),
+ count: record.missing_values.len(),
+ });
+ record.missing_values.truncate(3);
+ }
let values = record
.missing_values
.into_iter()
Datum::String(value)
})
.collect::<Vec<_>>();
- dbg!(&values);
match MissingValues::new(values, None) {
Ok(missing_values) => variable.missing_values = missing_values,
- Err(MissingValuesError::TooMany) => warn(Error::TBD),
Err(MissingValuesError::TooWide) => warn(Error::TBD),
- Err(MissingValuesError::MixedTypes) => unreachable!(),
+ Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => {
+ unreachable!()
+ }
}
}
};
impl LongStringMissingValueRecord<RawString> {
- fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+ fn parse(
+ ext: &Extension,
+ endian: Endian,
+ warn: &mut dyn FnMut(Warning),
+ ) -> Result<Record, Warning> {
ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?;
let mut input = &ext.data[..];
let mut missing_value_set = Vec::new();
while !input.is_empty() {
let var_name = read_string(&mut input, endian)?;
+ dbg!(&var_name);
let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
let value_len: u32 = endian.parse(read_bytes(&mut input)?);
if value_len != 8 {
let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
- return Err(Warning::BadLongMissingValueLength {
+ warn(Warning::BadLongMissingValueLength {
record_offset: ext.offsets.start,
offset,
value_len,
});
+ read_vec(
+ &mut input,
+ dbg!(value_len as usize * n_missing_values as usize),
+ )?;
+ continue;
}
let mut missing_values = Vec::new();
for i in 0..n_missing_values {
- let value: [u8; 8] = read_bytes(&mut input)?;
- let numeric_value: u64 = endian.parse(value);
- let value = if i > 0 && numeric_value == 8 {
+ if i > 0 {
// Tolerate files written by old, buggy versions of PSPP
// where we believed that the value_length was repeated
// before each missing value.
- read_bytes(&mut input)?
- } else {
- value
- };
+ let mut peek = input;
+ let number: u32 = endian.parse(read_bytes(&mut peek)?);
+ if number == 8 {
+ input = peek;
+ }
+ }
+
+ let value: [u8; 8] = read_bytes(&mut input)?;
missing_values.push(RawStrArray(value));
}
missing_value_set.push(LongStringMissingValues {
11 => VarDisplayRecord::parse(&extension, var_types, endian, warn),
7 | 19 => MultipleResponseRecord::parse(&extension, endian),
21 => LongStringValueLabelRecord::parse(&extension, endian),
- 22 => LongStringMissingValueRecord::parse(&extension, endian),
+ 22 => LongStringMissingValueRecord::parse(&extension, endian, warn),
20 => EncodingRecord::parse(&extension, endian),
16 => NumberOfCasesRecord::parse(&extension, endian),
5 => RawVariableSetRecord::parse(extension),
+In long string missing values record starting at offset 0x238, value length at offset 0x2a8 is 12 instead of the expected 8.
+
+File header claims 8 variable positions but 9 were read from file.
+
+Long string missing values record says variable STR2 has 4 missing values, but only 1 to 3 missing values are allowed.
+
+Unknown variable name "Nonexistent" in long string missing value record.
+
+Invalid long string missing value for numeric variable NUM1.
+
+Invalid long string missing value for 7-byte string variable STR4.
+
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │None │
+│ Number of Cases│ 1│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬──────────────────────────────╮
+│Label │PSPP synthetic test file: ôõöø│
+│Variables│ 5│
+╰─────────┴──────────────────────────────╯
+
+╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────────────────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │
+├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────────────────────────┤
+│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│str1│ 2│ │Nominal │Input│ 9│Left │A9 │A9 │ │
+│str2│ 3│ │Nominal │Input│ 10│Left │A10 │A10 │"abcdefgh"; "ijklmnop"; "qrstuvwx"│
+│str3│ 4│ │Nominal │Input│ 11│Left │A11 │A11 │"ABCDEFGH"; "IJKLMNOP" │
+│str4│ 5│ │Nominal │Input│ 7│Left │A7 │A7 │ │
+╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────────────────────────╯
# File header.
"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
2; # Layout code
-7; # Nominal case size
+8; # Nominal case size
0; # Not compressed
0; # Not weighted
1; # 1 case.
2; -1; 0; 0; 0; 0; s8 "";
2; 11; 0; 0; 0x010b00 *2; s8 "STR3";
2; -1; 0; 0; 0; 0; s8 "";
+2; 7; 0; 0; 0x010700 *2; s8 "STR4";
+2; -1; 0; 0; 0; 0; s8 "";
# Machine integer info record.
7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252;
# Buggy way that this was written in old PSPP, with a length
# before each missing value instead of just once.
COUNT("STR3"); i8 2; 8; "ABCDEFGH"; >>8<<; "IJKLMNOP";
+
+# Invalid 8-byte missing value for 7-byte string.
+COUNT("STR4"); i8 1; 8; "ABCDEFGH";
);
# Character encoding record.
│Variables│ 21│
╰─────────┴──────────────────────────────╯
-â\95â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¬â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\95®
-│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │
-â\94\9câ\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¼â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94¤
-│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
-│Numeric variable 2's label (ùúû)│ 2│Numeric variable 2's label (ùúû)│ │Input│ 8│Right │F8.0 │F8.0 │ │
-│num3 │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │1 │
-│Another numeric variable label │ 4│Another numeric variable label │ │Input│ 8│Right │F8.0 │F8.0 │1 │
-│num5 │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2 │
-│num6 │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2; 3 │
-│num7 │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3 │
-│num8 │ 8│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3; 5 │
-│num9 │ 9│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU HIGH; -5 │
-│numàèìñò │ 10│ │ │Input│ 8│Right │F8.0 │F8.0 │LOW THRU 1; 5 │
-│str1 │ 11│ │Nominal │Input│ 4│Left │A4 │A4 │ │
-│String variable 2's label │ 12│String variable 2's label │Nominal │Input│ 4│Left │A4 │A4 │ │
-│str3 │ 13│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS" │
-│Another string variable label │ 14│Another string variable label │Nominal │Input│ 4│Left │A4 │A4 │"OTHR" │
-│str5 │ 15│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR" │
-│str6 │ 16│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR"; "MORE" │
-│str7 │ 17│ │Nominal │Input│ 11│Left │A11 │A11 │"first8by" │
-│str8 │ 18│ │Nominal │Input│ 9│Left │A9 │A9 │"abcdefgh " │
-│str9 │ 19│ │Nominal │Input│ 10│Left │A10 │A10 │"abcdefgh "; "01234567 " │
-│str10 │ 20│ │Nominal │Input│ 11│Left │A11 │A11 │"abcdefgh "; "01234567 "; "0 "│
-│25-byte string │ 21│25-byte string │Nominal │Input│ 25│Left │A25 │A25 │ │
-â\95°â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94´â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\94\80â\95¯
+╭────────────────────────────────┬────────┬────────────────────────────────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬───────────────────────────╮
+│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│ Missing Values │
+├────────────────────────────────┼────────┼────────────────────────────────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼───────────────────────────┤
+│num1 │ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│Numeric variable 2's label (ùúû)│ 2│Numeric variable 2's label (ùúû)│ │Input│ 8│Right │F8.0 │F8.0 │ │
+│num3 │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │1 │
+│Another numeric variable label │ 4│Another numeric variable label │ │Input│ 8│Right │F8.0 │F8.0 │1 │
+│num5 │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2 │
+│num6 │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │1; 2; 3 │
+│num7 │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3 │
+│num8 │ 8│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU 3; 5 │
+│num9 │ 9│ │ │Input│ 8│Right │F8.0 │F8.0 │1 THRU HIGH; -5 │
+│numàèìñò │ 10│ │ │Input│ 8│Right │F8.0 │F8.0 │LOW THRU 1; 5 │
+│str1 │ 11│ │Nominal │Input│ 4│Left │A4 │A4 │ │
+│String variable 2's label │ 12│String variable 2's label │Nominal │Input│ 4│Left │A4 │A4 │ │
+│str3 │ 13│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS" │
+│Another string variable label │ 14│Another string variable label │Nominal │Input│ 4│Left │A4 │A4 │"OTHR" │
+│str5 │ 15│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR" │
+│str6 │ 16│ │Nominal │Input│ 4│Left │A4 │A4 │"MISS"; "OTHR"; "MORE" │
+│str7 │ 17│ │Nominal │Input│ 11│Left │A11 │A11 │"first8by" │
+│str8 │ 18│ │Nominal │Input│ 9│Left │A9 │A9 │"abcdefgh" │
+│str9 │ 19│ │Nominal │Input│ 10│Left │A10 │A10 │"abcdefgh"; "01234567" │
+│str10 │ 20│ │Nominal │Input│ 11│Left │A11 │A11 │"abcdefgh"; "01234567"; "0"│
+│25-byte string │ 21│25-byte string │Nominal │Input│ 25│Left │A25 │A25 │ │
+╰────────────────────────────────┴────────┴────────────────────────────────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴───────────────────────────╯