From 588777734d9a7e755722bd9ef88c2ba1f977dbea Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 13 Jul 2025 13:23:42 -0700 Subject: [PATCH] warning improvements --- rust/pspp/src/identifier.rs | 23 +- rust/pspp/src/sys/cooked.rs | 6 +- rust/pspp/src/sys/raw.rs | 92 ++-- rust/pspp/src/sys/raw/records.rs | 424 +++++++++++------- .../bad_machine_float_info_size.expected | 2 +- .../bad_machine_integer_info_count.expected | 2 +- ...iable_name_in_variable_value_pair.expected | 2 +- .../compressed_data_other_bias.expected | 2 +- .../duplicate_attribute_name.expected | 4 +- .../duplicate_long_variable_name.expected | 6 +- ...ows_in_long_string_missing_values.expected | Bin 4089 -> 4131 bytes ...nvalid_long_string_missing_values.expected | 2 +- .../testdata/missing_attribute_value.expected | 4 +- ..._response_sets_bad_counted_string.expected | 2 +- ...se_sets_counted_string_bad_length.expected | 2 +- ...sets_counted_string_missing_space.expected | 2 +- ...esponse_sets_missing_label_source.expected | 2 +- ...ssing_newline_after_variable_name.expected | 2 +- ...sponse_sets_missing_space_after_c.expected | 2 +- ...issing_space_after_counted_string.expected | 2 +- ...sponse_sets_missing_space_after_e.expected | 2 +- ...onse_sets_unexpected_label_source.expected | 2 +- ..._skipping_bad_extension_record_18.expected | 2 +- .../unquoted_attribute_value.expected | 4 +- ..._indexes_must_be_in_correct_range.expected | 6 +- ...t_not_be_long_string_continuation.expected | 2 +- ...abel_with_no_associated_variables.expected | 2 +- ...value_label_must_all_be_same_type.expected | 2 +- .../testdata/wrong_display_alignment.expected | 2 +- .../wrong_display_measurement_level.expected | 2 +- .../wrong_display_parameter_count.expected | 2 +- .../wrong_display_parameter_size.expected | 2 +- ...data_uncompressed_size_block_size.expected | 2 +- 33 files changed, 361 insertions(+), 254 deletions(-) diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index b81ddfe5f3..cc9b1f213e 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -120,13 +120,13 @@ pub enum Error { #[error("\"!\" is not a valid identifier.")] Bang, - #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character {1:?}.")] - BadFirstCharacter(String, char), + #[error("{string:?} may not be used as an identifier because it begins with disallowed character {c:?}.")] + BadFirstCharacter { string: String, c: char }, #[error( - "\"{0}\" may not be used as an identifier because it contains disallowed character {1:?}." + "{string:?} may not be used as an identifier because it contains disallowed character {c:?}." )] - BadLaterCharacter(String, char), + BadLaterCharacter { string: String, c: char }, #[error("Identifier \"{id}\" is {length} bytes in the encoding in use ({encoding}), which exceeds the {max}-byte limit.")] TooLong { @@ -270,11 +270,17 @@ impl Identifier { let mut i = s.chars(); let first = i.next().unwrap(); if !first.may_start_id() { - return Err(Error::BadFirstCharacter(s.into(), first)); + return Err(Error::BadFirstCharacter { + string: s.into(), + c: first, + }); } for c in i { if !c.may_continue_id() { - return Err(Error::BadLaterCharacter(s.into(), c)); + return Err(Error::BadLaterCharacter { + string: s.into(), + c, + }); } } Ok(()) @@ -305,7 +311,10 @@ impl Identifier { _ => { let s = self.0.into_inner(); let first = s.chars().next().unwrap(); - Err(Error::BadFirstCharacter(s, first)) + Err(Error::BadFirstCharacter { + string: s, + c: first, + }) } } } diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 8a00f5ddd3..2c87f74322 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -757,7 +757,7 @@ impl Headers { for record in self .multiple_response .iter() - .flat_map(|record| record.0.iter()) + .flat_map(|record| record.sets.iter()) { match MultipleResponseSet::decode(&dictionary, record, &mut warn) { Ok(mrset) => { @@ -889,7 +889,7 @@ impl Headers { for record in self .long_string_value_labels .drain(..) - .flat_map(|record| record.0.into_iter()) + .flat_map(|record| record.labels.into_iter()) { let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else { warn(Error::UnknownLongStringValueLabelVariable( @@ -914,7 +914,7 @@ impl Headers { for mut record in self .long_string_missing_values .drain(..) - .flat_map(|record| record.0.into_iter()) + .flat_map(|record| record.values.into_iter()) { let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else { warn(Error::LongStringMissingValueUnknownVariable { diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 57143033d5..bc599c5af7 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -253,65 +253,84 @@ pub enum ErrorDetails { /// /// Warnings indicate that something may be amiss, but they do not prevent /// reading further records. +#[derive(Debug)] +pub struct Warning { + /// Range of file offsets where the warning occurred. + offsets: Option>, + + /// Details of the warning. + details: WarningDetails, +} + +impl std::error::Error for Warning {} + +impl Warning { + pub fn new(offsets: Option>, details: WarningDetails) -> Self { + Self { offsets, details } + } +} + +impl Display for Warning { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + if let Some(offsets) = &self.offsets + && !offsets.is_empty() + { + if offsets.end > offsets.start.wrapping_add(1) { + write!( + f, + "Warning at file offsets {:#x} to {:#x}: ", + offsets.start, offsets.end + )?; + } else { + write!(f, "Warning at file offset {:#x}: ", offsets.start)?; + } + } + write!(f, "{}", &self.details) + } +} + #[derive(ThisError, Debug)] -pub enum Warning { +pub enum WarningDetails { #[error("Unexpected end of data inside extension record.")] UnexpectedEndOfData, #[error( - "At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified." + "At least one valid variable index for value labels is required but none were specified." )] - NoVarIndexes { offset: u64 }, + NoVarIndexes, - #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)] + #[error("The first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)] MixedVarTypes { - offset: u64, var_type: VarType, wrong_types: Vec, }, #[error( - "At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}" + "One or more variable indexes for value labels were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}" )] - InvalidVarIndexes { - offset: u64, - max: usize, - invalid: Vec, - }, + InvalidVarIndexes { max: usize, invalid: Vec }, - #[error( - "At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}." - )] + #[error("{record} has bad size {size} bytes instead of the expected {expected_size}.")] BadRecordSize { - offset: u64, record: String, size: u32, expected_size: u32, }, - #[error( - "At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}." - )] + #[error("{record} has bad count {count} instead of the expected {expected_count}.")] BadRecordCount { - offset: u64, record: String, count: u32, expected_count: u32, }, #[error( - "In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8." + "In long string missing values record, value length at offset {offset:#x} is {value_len} instead of the expected 8." )] - BadLongMissingValueLength { - record_offset: u64, - offset: u64, - value_len: u32, - }, + BadLongMissingValueLength { offset: u64, value_len: u32 }, - #[error( - "The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8." - )] - BadEncodingName { offset: u64 }, + #[error("Encoding record contains an encoding name that is not valid UTF-8.")] + BadEncodingName, // XXX This is risky because `text` might be arbitarily long. #[error("Text string contains invalid bytes for {encoding} encoding: {text:?}")] @@ -447,7 +466,7 @@ pub enum Warning { TBD, } -impl From for Warning { +impl From for WarningDetails { fn from(_source: IoError) -> Self { Self::UnexpectedEndOfData } @@ -720,7 +739,7 @@ pub fn infer_encoding( Ok(encoding) => Ok(encoding), Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, ErrorDetails::EncodingError(err))), Err(err) => { - warn(Warning::EncodingError(err)); + warn(Warning::new(None, WarningDetails::EncodingError(err))); // Warn that we're using the default encoding. Ok(default_encoding()) } @@ -772,10 +791,13 @@ impl<'de> Decoder<'de> { fn decode_slice<'a>(&mut self, input: &'a [u8]) -> Cow<'a, str> { let (output, malformed) = self.encoding.decode_without_bom_handling(input); if malformed { - self.warn(Warning::MalformedString { - encoding: self.encoding.name().into(), - text: output.clone().into(), - }); + self.warn(Warning::new( + None, + WarningDetails::MalformedString { + encoding: self.encoding.name().into(), + text: output.clone().into(), + }, + )); } output } diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index 063912b89e..bf7e7cdfc8 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -21,7 +21,7 @@ use crate::{ identifier::{Error as IdError, Identifier}, sys::raw::{ read_bytes, read_string, read_vec, DecodedRecord, Decoder, Error, ErrorDetails, Magic, - RawDatum, RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, + RawDatum, RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails, }, }; @@ -179,7 +179,10 @@ impl HeaderRecord { let n_cases = (header.n_cases < i32::MAX as u32 / 2).then_some(header.n_cases); if header.bias != 100.0 && header.bias != 0.0 { - warn(Warning::UnexpectedBias(header.bias)); + warn(Warning::new( + Some(84..92), + WarningDetails::UnexpectedBias(header.bias), + )); } let creation_date = RawString(header.creation_date.into()); @@ -310,21 +313,29 @@ impl MissingValues { } }; - Self::read_inner(r, raw_width, individual_values, has_range, endian, warn).map_err( - |details| { - Error::new( - { - let n = individual_values + if has_range { 2 } else { 0 }; - Some(offsets.start..offsets.end + 8 * n as u64) - }, - details, - ) - }, + Self::read_inner( + r, + offsets.clone(), + raw_width, + individual_values, + has_range, + endian, + warn, ) + .map_err(|details| { + Error::new( + { + let n = individual_values + if has_range { 2 } else { 0 }; + Some(offsets.start..offsets.end + 8 * n as u64) + }, + details, + ) + }) } fn read_inner( r: &mut R, + offsets: Range, raw_width: RawWidth, individual_values: usize, has_range: bool, @@ -358,7 +369,10 @@ impl MissingValues { }); return Ok(Self::new(values, range).unwrap()); } - Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange), + Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::new( + Some(offsets), + WarningDetails::MissingValueStringRange, + )), Ok(VarWidth::String(width)) => { let width = width.min(8) as usize; let values = values @@ -367,7 +381,10 @@ impl MissingValues { .collect(); return Ok(Self::new(values, None).unwrap()); } - Err(()) => warn(Warning::MissingValueContinuation), + Err(()) => warn(Warning::new( + Some(offsets), + WarningDetails::MissingValueContinuation, + )), } Ok(Self::default()) } @@ -602,18 +619,17 @@ impl ValueLabelRecord { } let n: u32 = endian.parse(read_bytes(r)?); + let n_offsets = index_offset + 4..index_offset + 8; if n > Self::MAX_INDEXES { return Err(Error::new( - Some(index_offset + 4..index_offset + 8), + Some(n_offsets), ErrorDetails::TooManyVarIndexes { n, max: Self::MAX_INDEXES, }, )); } else if n == 0 { - warn(Warning::NoVarIndexes { - offset: index_offset, - }); + warn(Warning::new(Some(n_offsets), WarningDetails::NoVarIndexes)); return Ok(None); } @@ -628,12 +644,15 @@ impl ValueLabelRecord { invalid_indexes.push(index); } } + let index_offsets = index_offset..r.stream_position()?; if !invalid_indexes.is_empty() { - warn(Warning::InvalidVarIndexes { - offset: index_offset, - max: var_types.n_values(), - invalid: invalid_indexes, - }); + warn(Warning::new( + Some(index_offsets.clone()), + WarningDetails::InvalidVarIndexes { + max: var_types.n_values(), + invalid: invalid_indexes, + }, + )); } let Some(&first_index) = dict_indexes.first() else { @@ -650,11 +669,13 @@ impl ValueLabelRecord { } }); if !wrong_type_indexes.is_empty() { - warn(Warning::MixedVarTypes { - offset: index_offset, - var_type, - wrong_types: wrong_type_indexes, - }); + warn(Warning::new( + Some(index_offsets), + WarningDetails::MixedVarTypes { + var_type, + wrong_types: wrong_type_indexes, + }, + )); } let labels = labels @@ -780,7 +801,7 @@ static INTEGER_INFO_RECORD: ExtensionRecord = ExtensionRecord { }; impl IntegerInfoRecord { - pub fn parse(ext: &Extension, endian: Endian) -> Result { + pub fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(&INTEGER_INFO_RECORD)?; let mut input = &ext.data[..]; @@ -806,7 +827,7 @@ static FLOAT_INFO_RECORD: ExtensionRecord = ExtensionRecord { }; impl FloatInfoRecord { - pub fn parse(ext: &Extension, endian: Endian) -> Result { + pub fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(&FLOAT_INFO_RECORD)?; let mut input = &ext.data[..]; @@ -832,7 +853,7 @@ pub struct FloatInfoRecord { pub struct RawLongNamesRecord(TextRecord); impl RawLongNamesRecord { - pub fn parse(extension: Extension) -> Result { + pub fn parse(extension: Extension) -> Result { Ok(Record::LongNames(Self(TextRecord::parse( extension, "long names record", @@ -842,7 +863,8 @@ impl RawLongNamesRecord { let input = decoder.decode(&self.0.text); let mut names = Vec::new(); for pair in input.split('\t').filter(|s| !s.is_empty()) { - if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&mut decoder.warn) + if let Some(long_name) = + LongName::parse(pair, decoder).issue_warning(&self.0.offsets, &mut decoder.warn) { names.push(long_name); } @@ -860,7 +882,7 @@ pub struct TextRecord { } impl TextRecord { - pub fn parse(extension: Extension, name: &str) -> Result { + pub fn parse(extension: Extension, name: &str) -> Result { extension.check_size(&ExtensionRecord { size: Some(1), count: None, @@ -880,17 +902,17 @@ pub struct VeryLongString { } impl VeryLongString { - fn parse(decoder: &Decoder, input: &str) -> Result { + fn parse(decoder: &Decoder, input: &str) -> Result { let Some((short_name, length)) = input.split_once('=') else { - return Err(Warning::VeryLongStringMissingDelimiter(input.into())); + return Err(WarningDetails::VeryLongStringMissingDelimiter(input.into())); }; let short_name = decoder .new_identifier(short_name) .and_then(Identifier::must_be_ordinary) - .map_err(Warning::InvalidLongStringName)?; + .map_err(WarningDetails::InvalidLongStringName)?; let length = length .parse() - .map_err(|_| Warning::VeryLongStringInvalidLength(input.into()))?; + .map_err(|_| WarningDetails::VeryLongStringInvalidLength(input.into()))?; Ok(VeryLongString { short_name, length }) } } @@ -902,7 +924,7 @@ pub struct RawVeryLongStringsRecord(TextRecord); pub struct VeryLongStringsRecord(pub Vec); impl RawVeryLongStringsRecord { - pub fn parse(extension: Extension) -> Result { + pub fn parse(extension: Extension) -> Result { Ok(Record::VeryLongStrings(Self(TextRecord::parse( extension, "very long strings record", @@ -916,8 +938,8 @@ impl RawVeryLongStringsRecord { .map(|s| s.trim_start_matches('\t')) .filter(|s| !s.is_empty()) { - if let Some(vls) = - VeryLongString::parse(decoder, tuple).issue_warning(&mut decoder.warn) + if let Some(vls) = VeryLongString::parse(decoder, tuple) + .issue_warning(&self.0.offsets, &mut decoder.warn) { very_long_strings.push(vls) } @@ -936,7 +958,7 @@ pub enum MultipleResponseType { } impl MultipleResponseType { - fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> { + fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), WarningDetails> { let (mr_type, input) = match input.split_first() { Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input), Some((b'D', input)) => { @@ -955,7 +977,7 @@ impl MultipleResponseType { } else if let Some(rest) = input.strip_prefix(b" 11 ") { (CategoryLabels::VarLabels, rest) } else { - return Err(Warning::InvalidMultipleDichotomyLabelType); + return Err(WarningDetails::InvalidMultipleDichotomyLabelType); }; let (value, input) = parse_counted_string(input)?; ( @@ -963,7 +985,7 @@ impl MultipleResponseType { input, ) } - _ => return Err(Warning::InvalidMultipleResponseType), + _ => return Err(WarningDetails::InvalidMultipleResponseType), }; Ok((mr_type, input)) } @@ -982,15 +1004,15 @@ where } impl MultipleResponseSet { - fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> { + fn parse(input: &[u8]) -> Result<(Self, &[u8]), WarningDetails> { let Some(equals) = input.iter().position(|&b| b == b'=') else { - return Err(Warning::MultipleResponseSyntaxError("missing `=`")); + return Err(WarningDetails::MultipleResponseSyntaxError("missing `=`")); }; let (name, input) = input.split_at(equals); let input = input.strip_prefix(b"=").unwrap(); let (mr_type, input) = MultipleResponseType::parse(input)?; let Some(input) = input.strip_prefix(b" ") else { - return Err(Warning::MultipleResponseSyntaxError( + return Err(WarningDetails::MultipleResponseSyntaxError( "missing space after multiple response type", )); }; @@ -1000,7 +1022,7 @@ impl MultipleResponseSet { match input.split_first() { Some((b' ', rest)) => { let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else { - return Err(Warning::MultipleResponseSyntaxError( + return Err(WarningDetails::MultipleResponseSyntaxError( "missing variable name delimiter", )); }; @@ -1011,7 +1033,7 @@ impl MultipleResponseSet { input = rest; } _ => { - return Err(Warning::MultipleResponseSyntaxError( + return Err(WarningDetails::MultipleResponseSyntaxError( "missing space preceding variable name", )); } @@ -1033,14 +1055,15 @@ impl MultipleResponseSet { fn decode( &self, + offsets: &Range, decoder: &mut Decoder, - ) -> Result, Warning> { + ) -> Result, WarningDetails> { let mut short_names = Vec::with_capacity(self.short_names.len()); for short_name in self.short_names.iter() { if let Some(short_name) = decoder .decode_identifier(short_name) - .map_err(Warning::InvalidMrSetName) - .issue_warning(&mut decoder.warn) + .map_err(WarningDetails::InvalidMrSetName) + .issue_warning(offsets, &mut decoder.warn) { short_names.push(short_name); } @@ -1048,7 +1071,7 @@ impl MultipleResponseSet { Ok(MultipleResponseSet { name: decoder .decode_identifier(&self.name) - .map_err(Warning::InvalidMrSetVariableName)?, + .map_err(WarningDetails::InvalidMrSetVariableName)?, label: decoder.decode(&self.label).to_string(), mr_type: self.mr_type.clone(), short_names, @@ -1057,10 +1080,14 @@ impl MultipleResponseSet { } #[derive(Clone, Debug)] -pub struct MultipleResponseRecord(pub Vec>) +pub struct MultipleResponseRecord where I: Debug, - S: Debug; + S: Debug, +{ + pub offsets: Range, + pub sets: Vec>, +} static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord { size: Some(1), @@ -1069,7 +1096,7 @@ static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord { }; impl MultipleResponseRecord { - fn parse(ext: &Extension, _endian: Endian) -> Result { + fn parse(ext: &Extension, _endian: Endian) -> Result { ext.check_size(&MULTIPLE_RESPONSE_RECORD)?; let mut input = &ext.data[..]; @@ -1085,58 +1112,67 @@ impl MultipleResponseRecord { sets.push(set); input = rest; } - Ok(Record::MultipleResponse(MultipleResponseRecord(sets))) + Ok(Record::MultipleResponse(MultipleResponseRecord { + offsets: ext.offsets.clone(), + sets, + })) } } impl MultipleResponseRecord { pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord { let mut sets = Vec::new(); - for set in self.0.iter() { - if let Some(set) = set.decode(decoder).issue_warning(&mut decoder.warn) { + for set in self.sets.iter() { + if let Some(set) = set + .decode(&self.offsets, decoder) + .issue_warning(&self.offsets, &mut decoder.warn) + { sets.push(set); } } - DecodedRecord::MultipleResponse(MultipleResponseRecord(sets)) + DecodedRecord::MultipleResponse(MultipleResponseRecord { + offsets: self.offsets, + sets, + }) } } -fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> { +fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), WarningDetails> { let Some(space) = input.iter().position(|&b| b == b' ') else { - return Err(Warning::CountedStringMissingSpace); + return Err(WarningDetails::CountedStringMissingSpace); }; let Ok(length) = from_utf8(&input[..space]) else { - return Err(Warning::CountedStringInvalidUTF8); + return Err(WarningDetails::CountedStringInvalidUTF8); }; let Ok(length): Result = length.parse() else { - return Err(Warning::CountedStringInvalidLength(length.into())); + return Err(WarningDetails::CountedStringInvalidLength(length.into())); }; let Some((string, rest)) = input[space + 1..].split_at_checked(length) else { - return Err(Warning::CountedStringTooLong(length)); + return Err(WarningDetails::CountedStringTooLong(length)); }; Ok((string.into(), rest)) } impl Measure { - fn try_decode(source: u32) -> Result, Warning> { + fn try_decode(source: u32) -> Result, WarningDetails> { match source { 0 => Ok(None), 1 => Ok(Some(Measure::Nominal)), 2 => Ok(Some(Measure::Ordinal)), 3 => Ok(Some(Measure::Scale)), - _ => Err(Warning::InvalidMeasurement(source)), + _ => Err(WarningDetails::InvalidMeasurement(source)), } } } impl Alignment { - fn try_decode(source: u32) -> Result, Warning> { + fn try_decode(source: u32) -> Result, WarningDetails> { match source { 0 => Ok(Some(Alignment::Left)), 1 => Ok(Some(Alignment::Right)), 2 => Ok(Some(Alignment::Center)), - _ => Err(Warning::InvalidAlignment(source)), + _ => Err(WarningDetails::InvalidAlignment(source)), } } } @@ -1157,10 +1193,9 @@ impl VarDisplayRecord { var_types: &VarTypes, endian: Endian, warn: &mut dyn FnMut(Warning), - ) -> Result { + ) -> Result { if ext.size != 4 { - return Err(Warning::BadRecordSize { - offset: ext.offsets.start, + return Err(WarningDetails::BadRecordSize { record: String::from("variable display record"), size: ext.size, expected_size: 4, @@ -1173,7 +1208,7 @@ impl VarDisplayRecord { } else if ext.count as usize == 2 * n_vars { false } else { - return Err(Warning::InvalidVariableDisplayCount { + return Err(WarningDetails::InvalidVariableDisplayCount { count: ext.count as usize, first: 2 * n_vars, second: 3 * n_vars, @@ -1184,11 +1219,11 @@ impl VarDisplayRecord { let mut input = &ext.data[..]; for _ in 0..n_vars { let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap())) - .issue_warning(warn) + .issue_warning(&ext.offsets, warn) .flatten(); let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap())); let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap())) - .issue_warning(warn) + .issue_warning(&ext.offsets, warn) .flatten(); var_displays.push(VarDisplay { measure, @@ -1225,9 +1260,13 @@ impl LongStringMissingValues { } #[derive(Clone, Debug)] -pub struct LongStringMissingValueRecord(pub Vec>) +pub struct LongStringMissingValueRecord where - N: Debug; + N: Debug, +{ + pub offsets: Range, + pub values: Vec>, +} static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord { size: Some(1), @@ -1240,7 +1279,7 @@ impl LongStringMissingValueRecord { ext: &Extension, endian: Endian, warn: &mut dyn FnMut(Warning), - ) -> Result { + ) -> Result { ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?; let mut input = &ext.data[..]; @@ -1251,11 +1290,10 @@ impl LongStringMissingValueRecord { let value_len: u32 = endian.parse(read_bytes(&mut input)?); if value_len != 8 { let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start; - warn(Warning::BadLongMissingValueLength { - record_offset: ext.offsets.start, - offset, - value_len, - }); + warn(Warning::new( + Some(ext.offsets.clone()), + WarningDetails::BadLongMissingValueLength { offset, value_len }, + )); read_vec(&mut input, value_len as usize * n_missing_values as usize)?; continue; } @@ -1281,24 +1319,30 @@ impl LongStringMissingValueRecord { }); } Ok(Record::LongStringMissingValues( - LongStringMissingValueRecord(missing_value_set), + LongStringMissingValueRecord { + offsets: ext.offsets.clone(), + values: missing_value_set, + }, )) } } impl LongStringMissingValueRecord { pub fn decode(self, decoder: &mut Decoder) -> LongStringMissingValueRecord { - let mut mvs = Vec::with_capacity(self.0.len()); - for mv in self.0.iter() { + let mut mvs = Vec::with_capacity(self.values.len()); + for mv in self.values.iter() { if let Some(mv) = mv .decode(decoder) - .map_err(Warning::InvalidLongStringMissingValueVariableName) - .issue_warning(&mut decoder.warn) + .map_err(WarningDetails::InvalidLongStringMissingValueVariableName) + .issue_warning(&self.offsets, &mut decoder.warn) { mvs.push(mv); } } - LongStringMissingValueRecord(mvs) + LongStringMissingValueRecord { + offsets: self.offsets, + values: mvs, + } } } @@ -1312,13 +1356,11 @@ static ENCODING_RECORD: ExtensionRecord = ExtensionRecord { }; impl EncodingRecord { - fn parse(ext: &Extension, _endian: Endian) -> Result { + fn parse(ext: &Extension, _endian: Endian) -> Result { ext.check_size(&ENCODING_RECORD)?; Ok(Record::Encoding(EncodingRecord( - String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName { - offset: ext.offsets.start, - })?, + String::from_utf8(ext.data.clone()).map_err(|_| WarningDetails::BadEncodingName)?, ))) } } @@ -1339,7 +1381,7 @@ static NUMBER_OF_CASES_RECORD: ExtensionRecord = ExtensionRecord { }; impl NumberOfCasesRecord { - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(&NUMBER_OF_CASES_RECORD)?; let mut input = &ext.data[..]; @@ -1354,7 +1396,7 @@ impl NumberOfCasesRecord { pub struct RawVariableSetRecord(TextRecord); impl RawVariableSetRecord { - fn parse(extension: Extension) -> Result { + fn parse(extension: Extension) -> Result { Ok(Record::VariableSets(Self(TextRecord::parse( extension, "variable sets record", @@ -1364,7 +1406,9 @@ impl RawVariableSetRecord { let mut sets = Vec::new(); let input = decoder.decode(&self.0.text); for line in input.lines() { - if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&mut decoder.warn) { + if let Some(set) = VariableSet::parse(line, decoder, &self.0.offsets) + .issue_warning(&self.0.offsets, &mut decoder.warn) + { sets.push(set) } } @@ -1379,7 +1423,7 @@ impl RawVariableSetRecord { pub struct RawProductInfoRecord(TextRecord); impl RawProductInfoRecord { - fn parse(extension: Extension) -> Result { + fn parse(extension: Extension) -> Result { Ok(Record::ProductInfo(Self(TextRecord::parse( extension, "product info record", @@ -1397,17 +1441,21 @@ pub struct Attribute { } impl Attribute { - fn parse<'a>(decoder: &mut Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> { + fn parse<'a>( + decoder: &mut Decoder, + offsets: &Range, + input: &'a str, + ) -> Result<(Attribute, &'a str), WarningDetails> { let Some((name, mut input)) = input.split_once('(') else { - return Err(Warning::AttributeMissingLParen(input.into())); + return Err(WarningDetails::AttributeMissingLParen(input.into())); }; let name = decoder .new_identifier(name) - .map_err(Warning::InvalidAttributeName)?; + .map_err(WarningDetails::InvalidAttributeName)?; let mut values = Vec::new(); loop { let Some((value, rest)) = input.split_once('\n') else { - return Err(Warning::AttributeMissingValue { + return Err(WarningDetails::AttributeMissingValue { name: name.clone(), index: values.len(), }); @@ -1418,10 +1466,13 @@ impl Attribute { { values.push(stripped.into()); } else { - decoder.warn(Warning::AttributeMissingQuotes { - name: name.clone(), - index: values.len(), - }); + decoder.warn(Warning::new( + Some(offsets.clone()), + WarningDetails::AttributeMissingQuotes { + name: name.clone(), + index: values.len(), + }, + )); values.push(value.into()); } if let Some(rest) = rest.strip_prefix(')') { @@ -1436,9 +1487,10 @@ impl Attribute { impl Attributes { fn parse<'a>( decoder: &mut Decoder, + offsets: &Range, mut input: &'a str, sentinel: Option, - ) -> Result<(Attributes, &'a str, Vec), Warning> { + ) -> Result<(Attributes, &'a str, Vec), WarningDetails> { let mut attributes = BTreeMap::new(); let mut duplicates = Vec::new(); let rest = loop { @@ -1446,7 +1498,7 @@ impl Attributes { None => break input, c if c == sentinel => break &input[1..], _ => { - let (attribute, rest) = Attribute::parse(decoder, input)?; + let (attribute, rest) = Attribute::parse(decoder, offsets, input)?; if attributes.contains_key(&attribute.name) { duplicates.push(attribute.name.clone()); } @@ -1466,7 +1518,7 @@ pub struct RawFileAttributesRecord(TextRecord); pub struct FileAttributesRecord(pub Attributes); impl RawFileAttributesRecord { - fn parse(extension: Extension) -> Result { + fn parse(extension: Extension) -> Result { Ok(Record::FileAttributes(Self(TextRecord::parse( extension, "file attributes record", @@ -1474,15 +1526,23 @@ impl RawFileAttributesRecord { } pub fn decode(self, decoder: &mut Decoder) -> FileAttributesRecord { let input = decoder.decode(&self.0.text); - match Attributes::parse(decoder, &input, None).issue_warning(&mut decoder.warn) { + match Attributes::parse(decoder, &self.0.offsets, &input, None) + .issue_warning(&self.0.offsets, &mut decoder.warn) + { Some((set, rest, duplicates)) => { if !duplicates.is_empty() { - decoder.warn(Warning::DuplicateFileAttributes { - attributes: duplicates, - }); + decoder.warn(Warning::new( + Some(self.0.offsets.clone()), + WarningDetails::DuplicateFileAttributes { + attributes: duplicates, + }, + )); } if !rest.is_empty() { - decoder.warn(dbg!(Warning::TBD)); + decoder.warn(Warning::new( + Some(self.0.offsets.clone()), + WarningDetails::TBD, + )); } FileAttributesRecord(set) } @@ -1500,21 +1560,25 @@ pub struct VarAttributes { impl VarAttributes { fn parse<'a>( decoder: &mut Decoder, + offsets: &Range, input: &'a str, - ) -> Result<(VarAttributes, &'a str), Warning> { + ) -> Result<(VarAttributes, &'a str), WarningDetails> { let Some((long_var_name, rest)) = input.split_once(':') else { - return Err(dbg!(Warning::TBD)); + return Err(dbg!(WarningDetails::TBD)); }; let long_var_name = decoder .new_identifier(long_var_name) .and_then(Identifier::must_be_ordinary) - .map_err(Warning::InvalidAttributeVariableName)?; - let (attributes, rest, duplicates) = Attributes::parse(decoder, rest, Some('/'))?; + .map_err(WarningDetails::InvalidAttributeVariableName)?; + let (attributes, rest, duplicates) = Attributes::parse(decoder, offsets, rest, Some('/'))?; if !duplicates.is_empty() { - decoder.warn(Warning::DuplicateVariableAttributes { - variable: long_var_name.clone(), - attributes: duplicates, - }); + decoder.warn(Warning::new( + Some(offsets.clone()), + WarningDetails::DuplicateVariableAttributes { + variable: long_var_name.clone(), + attributes: duplicates, + }, + )); } let var_attribute = VarAttributes { long_var_name, @@ -1531,7 +1595,7 @@ pub struct RawVariableAttributesRecord(TextRecord); pub struct VariableAttributesRecord(pub Vec); impl RawVariableAttributesRecord { - fn parse(extension: Extension) -> Result { + fn parse(extension: Extension) -> Result { Ok(Record::VariableAttributes(Self(TextRecord::parse( extension, "variable attributes record", @@ -1542,8 +1606,8 @@ impl RawVariableAttributesRecord { let mut input = decoded.as_ref(); let mut var_attribute_sets = Vec::new(); while !input.is_empty() { - let Some((var_attribute, rest)) = - VarAttributes::parse(decoder, input).issue_warning(&mut decoder.warn) + let Some((var_attribute, rest)) = VarAttributes::parse(decoder, &self.0.offsets, input) + .issue_warning(&self.0.offsets, &mut decoder.warn) else { break; }; @@ -1561,18 +1625,18 @@ pub struct LongName { } impl LongName { - fn parse(input: &str, decoder: &Decoder) -> Result { + fn parse(input: &str, decoder: &Decoder) -> Result { let Some((short_name, long_name)) = input.split_once('=') else { - return Err(Warning::LongNameMissingEquals); + return Err(WarningDetails::LongNameMissingEquals); }; let short_name = decoder .new_identifier(short_name) .and_then(Identifier::must_be_ordinary) - .map_err(Warning::InvalidShortName)?; + .map_err(WarningDetails::InvalidShortName)?; let long_name = decoder .new_identifier(long_name) .and_then(Identifier::must_be_ordinary) - .map_err(Warning::InvalidLongName)?; + .map_err(WarningDetails::InvalidLongName)?; Ok(LongName { short_name, long_name, @@ -1593,17 +1657,21 @@ pub struct VariableSet { } impl VariableSet { - fn parse(input: &str, decoder: &mut Decoder) -> Result { + fn parse( + input: &str, + decoder: &mut Decoder, + offsets: &Range, + ) -> Result { let (name, input) = input .split_once('=') - .ok_or(Warning::VariableSetMissingEquals)?; + .ok_or(WarningDetails::VariableSetMissingEquals)?; let mut vars = Vec::new(); for var in input.split_ascii_whitespace() { if let Some(identifier) = decoder .new_identifier(var) .and_then(Identifier::must_be_ordinary) - .map_err(Warning::InvalidVariableSetName) - .issue_warning(&mut decoder.warn) + .map_err(WarningDetails::InvalidVariableSetName) + .issue_warning(offsets, &mut decoder.warn) { vars.push(identifier); } @@ -1622,14 +1690,14 @@ pub struct VariableSetRecord { } trait IssueWarning { - fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option; + fn issue_warning(self, offsets: &Range, warn: &mut dyn FnMut(Warning)) -> Option; } -impl IssueWarning for Result { - fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option { +impl IssueWarning for Result { + fn issue_warning(self, offsets: &Range, warn: &mut dyn FnMut(Warning)) -> Option { match self { Ok(result) => Some(result), Err(error) => { - warn(error); + warn(Warning::new(Some(offsets.clone()), error)); None } } @@ -1654,11 +1722,10 @@ pub struct Extension { } impl Extension { - pub fn check_size(&self, expected: &ExtensionRecord) -> Result<(), Warning> { + pub fn check_size(&self, expected: &ExtensionRecord) -> Result<(), WarningDetails> { match expected.size { Some(expected_size) if self.size != expected_size => { - return Err(Warning::BadRecordSize { - offset: self.offsets.start, + return Err(WarningDetails::BadRecordSize { record: expected.name.into(), size: self.size, expected_size, @@ -1668,8 +1735,7 @@ impl Extension { } match expected.count { Some(expected_count) if self.count != expected_count => { - return Err(Warning::BadRecordCount { - offset: self.offsets.start, + return Err(WarningDetails::BadRecordCount { record: expected.name.into(), count: self.count, expected_count, @@ -1703,8 +1769,9 @@ impl Extension { let start_offset = r.stream_position()?; let data = read_vec(r, product as usize)?; let end_offset = start_offset + product as u64; + let offsets = start_offset..end_offset; let extension = Extension { - offsets: start_offset..end_offset, + offsets: offsets.clone(), subtype, size, count, @@ -1729,8 +1796,8 @@ impl Extension { }; match result { Ok(result) => Ok(Some(result)), - Err(error) => { - warn(error); + Err(details) => { + warn(Warning::new(Some(offsets), details)); Ok(None) } } @@ -1753,10 +1820,10 @@ impl LongStringValueLabels { fn decode( &self, decoder: &mut Decoder, - ) -> Result, Warning> { + ) -> Result, WarningDetails> { let var_name = decoder.decode(&self.var_name); let var_name = Identifier::from_encoding(var_name.trim_end(), decoder.encoding) - .map_err(Warning::InvalidLongStringValueLabelName)?; + .map_err(WarningDetails::InvalidLongStringValueLabelName)?; let mut labels = Vec::with_capacity(self.labels.len()); for (value, label) in self.labels.iter() { @@ -1773,10 +1840,14 @@ impl LongStringValueLabels { } #[derive(Clone, Debug)] -pub struct LongStringValueLabelRecord(pub Vec>) +pub struct LongStringValueLabelRecord where N: Debug, - S: Debug; + S: Debug, +{ + pub offsets: Range, + pub labels: Vec>, +} static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord { size: Some(1), @@ -1785,7 +1856,7 @@ static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord { }; impl LongStringValueLabelRecord { - fn parse(ext: &Extension, endian: Endian) -> Result { + fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(&LONG_STRING_VALUE_LABEL_RECORD)?; let mut input = &ext.data[..]; @@ -1806,22 +1877,26 @@ impl LongStringValueLabelRecord { labels, }) } - Ok(Record::LongStringValueLabels(LongStringValueLabelRecord( - label_set, - ))) + Ok(Record::LongStringValueLabels(LongStringValueLabelRecord { + offsets: ext.offsets.clone(), + labels: label_set, + })) } } impl LongStringValueLabelRecord { pub fn decode(self, decoder: &mut Decoder) -> LongStringValueLabelRecord { - let mut labels = Vec::with_capacity(self.0.len()); - for label in &self.0 { + let mut labels = Vec::with_capacity(self.labels.len()); + for label in &self.labels { match label.decode(decoder) { Ok(set) => labels.push(set), - Err(error) => decoder.warn(error), + Err(error) => decoder.warn(Warning::new(Some(self.offsets.clone()), error)), } } - LongStringValueLabelRecord(labels) + LongStringValueLabelRecord { + offsets: self.offsets, + labels, + } } } @@ -1976,6 +2051,9 @@ impl ZTrailer { let mut expected_uncmp_ofs = zheader.zheader_offset; let mut expected_cmp_ofs = zheader.zheader_offset + 24; for (index, block) in blocks.iter().enumerate() { + let block_start = start_offset + 24 + 24 * index as u64; + let block_offsets = block_start..block_start + 24; + if block.uncompressed_ofs != expected_uncmp_ofs { Err(ErrorDetails::ZlibTrailerBlockWrongUncmpOfs { index, @@ -1997,31 +2075,29 @@ impl ZTrailer { } else { Ok(()) } - .map_err(|details| { - Error::new( - { - let block_start = start_offset + 24 + 24 * index as u64; - Some(block_start..block_start + 24) - }, - details, - ) - })?; + .map_err(|details| Error::new(Some(block_offsets.clone()), details))?; if index < blocks.len() - 1 { if block.uncompressed_size != block_size { - warn(Warning::ZlibTrailerBlockWrongSize { - index, - actual: block.uncompressed_size, - expected: block_size, - }); + warn(Warning::new( + Some(block_offsets), + WarningDetails::ZlibTrailerBlockWrongSize { + index, + actual: block.uncompressed_size, + expected: block_size, + }, + )); } } else { if block.uncompressed_size > block_size { - warn(Warning::ZlibTrailerBlockTooBig { - index, - actual: block.uncompressed_size, - max_expected: block_size, - }); + warn(Warning::new( + Some(block_offsets), + WarningDetails::ZlibTrailerBlockTooBig { + index, + actual: block.uncompressed_size, + max_expected: block_size, + }, + )); } } diff --git a/rust/pspp/src/sys/testdata/bad_machine_float_info_size.expected b/rust/pspp/src/sys/testdata/bad_machine_float_info_size.expected index 973ebaac59..5ebd48f370 100644 --- a/rust/pspp/src/sys/testdata/bad_machine_float_info_size.expected +++ b/rust/pspp/src/sys/testdata/bad_machine_float_info_size.expected @@ -1,4 +1,4 @@ -At offset 0xe0, floating point record has bad count 4 instead of the expected 3. +Warning at file offsets 0xe0 to 0x100: floating point record has bad count 4 instead of the expected 3. ╭──────────────────────┬────────────────────────╮ │ Created │ 01-JAN-2011 20:53:52│ diff --git a/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected b/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected index 72bda6df51..d08b9606ee 100644 --- a/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected +++ b/rust/pspp/src/sys/testdata/bad_machine_integer_info_count.expected @@ -1,4 +1,4 @@ -At offset 0xe0, integer record has bad count 9 instead of the expected 8. +Warning at file offsets 0xe0 to 0x104: integer record has bad count 9 instead of the expected 8. ╭──────────────────────┬────────────────────────╮ │ Created │ 01-JAN-2011 20:53:52│ diff --git a/rust/pspp/src/sys/testdata/bad_variable_name_in_variable_value_pair.expected b/rust/pspp/src/sys/testdata/bad_variable_name_in_variable_value_pair.expected index 2c10b1cc75..1447171474 100644 --- a/rust/pspp/src/sys/testdata/bad_variable_name_in_variable_value_pair.expected +++ b/rust/pspp/src/sys/testdata/bad_variable_name_in_variable_value_pair.expected @@ -1,4 +1,4 @@ -Missing `=` separator in long variable name record. +Warning at file offsets 0xe0 to 0xe5: Missing `=` separator in long variable name record. ╭──────────────────────┬────────────────────────╮ │ Created │ 01-JAN-2011 20:53:52│ diff --git a/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected b/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected index a2a09ddddd..7b7f43ebd9 100644 --- a/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected +++ b/rust/pspp/src/sys/testdata/compressed_data_other_bias.expected @@ -1,4 +1,4 @@ -Compression bias is 50 instead of the usual values of 0 or 100. +Warning at file offsets 0x54 to 0x5c: Compression bias is 50 instead of the usual values of 0 or 100. ╭──────────────────────┬────────────────────────╮ │ Created │ 01-JAN-2011 20:53:52│ diff --git a/rust/pspp/src/sys/testdata/duplicate_attribute_name.expected b/rust/pspp/src/sys/testdata/duplicate_attribute_name.expected index 8de0af1193..ec07279653 100644 --- a/rust/pspp/src/sys/testdata/duplicate_attribute_name.expected +++ b/rust/pspp/src/sys/testdata/duplicate_attribute_name.expected @@ -1,6 +1,6 @@ -Duplicate dataset attributes with names: Attr1. +Warning at file offsets 0xe0 to 0xfe: Duplicate dataset attributes with names: Attr1. -Duplicate attributes for variable FIRSTVAR: fred. +Warning at file offsets 0x10e to 0x12d: Duplicate attributes for variable FIRSTVAR: fred. ╭──────────────────────┬────────────────────────╮ │ Created │ 01-JAN-2011 20:53:52│ diff --git a/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected b/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected index 14947fd867..7a00f13ff3 100644 --- a/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected +++ b/rust/pspp/src/sys/testdata/duplicate_long_variable_name.expected @@ -1,8 +1,8 @@ -Invalid name in long variable name record. "_Invalid" may not be used as an identifier because it begins with disallowed character '_'. +Warning at file offsets 0x140 to 0x1aa: Invalid name in long variable name record. "_Invalid" may not be used as an identifier because it begins with disallowed character '_'. -Invalid name in long variable name record. "$Invalid" may not be used as an identifier because it begins with disallowed character '$'. +Warning at file offsets 0x140 to 0x1aa: Invalid name in long variable name record. "$Invalid" may not be used as an identifier because it begins with disallowed character '$'. -Invalid name in long variable name record. "#Invalid" may not be used as an identifier because it begins with disallowed character '#'. +Warning at file offsets 0x140 to 0x1aa: Invalid name in long variable name record. "#Invalid" may not be used as an identifier because it begins with disallowed character '#'. Duplicate long variable name LONGVARIABLENAME. diff --git a/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected b/rust/pspp/src/sys/testdata/integer_overflows_in_long_string_missing_values.expected index 10f60064aaa59c281b714805cac34b2bf77c8644..4dd9691195a69074db86c436397a483fb9016f23 100644 GIT binary patch delta 55 zcmew