From c94e90c936d693f1bdff02bf5c0fc0831a374e07 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 18 May 2025 09:34:16 -0700 Subject: [PATCH] work! --- ...ata-file-and-variable-attributes-record.md | 2 +- rust/doc/src/system-file/variable-record.md | 18 ++- rust/pspp/src/dictionary.rs | 39 +++++- rust/pspp/src/format/mod.rs | 2 +- rust/pspp/src/identifier.rs | 2 +- rust/pspp/src/main.rs | 2 +- rust/pspp/src/sys/cooked.rs | 124 +++++++++++------ rust/pspp/src/sys/encoding.rs | 2 +- rust/pspp/src/sys/raw.rs | 127 ++++++++++++------ rust/pspp/src/sys/test.rs | 11 +- 10 files changed, 226 insertions(+), 103 deletions(-) diff --git a/rust/doc/src/system-file/data-file-and-variable-attributes-record.md b/rust/doc/src/system-file/data-file-and-variable-attributes-record.md index fcdcfcc0f6..f7975bacbd 100644 --- a/rust/doc/src/system-file/data-file-and-variable-attributes-record.md +++ b/rust/doc/src/system-file/data-file-and-variable-attributes-record.md @@ -90,7 +90,7 @@ attribute has a single element whose values and their meanings are: |Value|Role | |----:|:----------| | 0 | Input | -| 1 | Output | +| 1 | Target | | 2 | Both | | 3 | None | | 4 | Partition | diff --git a/rust/doc/src/system-file/variable-record.md b/rust/doc/src/system-file/variable-record.md index 603cff8309..c07fde074d 100644 --- a/rust/doc/src/system-file/variable-record.md +++ b/rust/doc/src/system-file/variable-record.md @@ -11,7 +11,6 @@ string as name. A few system files have been encountered that include a variable label on dummy variable records, so readers should take care to parse dummy variable records in the same way as other variable records. - The "dictionary index" of a variable is a 1-based offset in the set of variable records, including dummy variable records for long string variables. The first variable record @@ -116,10 +115,7 @@ without any variables (thus, no data either). `n_missing_values`. Each element is interpreted as a number for numeric variables (with `HIGHEST` and `LOWEST` indicated as described in the [introduction](index.md)). For string variables of - width less than 8 bytes, elements are right-padded with spaces; for - string variables wider than 8 bytes, only the first 8 bytes of each - missing value are specified, with the remainder implicitly all - spaces. + width less than 8 bytes, elements are right-padded with spaces. For discrete missing values, each element represents one missing value. When a range is present, the first element denotes the @@ -186,3 +182,15 @@ Format types are defined as follows: A few system files have been observed in the wild with invalid `write` fields, in particular with value 0. Readers should probably treat invalid `print` or `write` fields as some default format. + +## Obsolete Treatment of Long String Missing Values + +SPSS and most versions of PSPP write missing values for string +variables wider than 8 bytes with a [Long String Missing Values +Record](long-string-missing-values-record.md). Very old versions of +PSPP instead wrote these missing values on the variables record, +writing only the first 8 bytes of each missing value, with the +remainder implicitly all spaces. Any new software should use the +[Long String Missing Values +Record](long-string-missing-values-record.md), but it might possibly +be worthwhile also to accept the old format used by PSPP. diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 6a0a1843ba..f8abe996d9 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -13,6 +13,7 @@ use encoding_rs::Encoding; use indexmap::IndexSet; use num::integer::div_ceil; use ordered_float::OrderedFloat; +use thiserror::Error as ThisError; use unicase::UniCase; use crate::{ @@ -543,6 +544,10 @@ pub enum Role { } impl Role { + /// Convert `input` to [Role]. + /// + /// This can't be `FromStr` because defining traits on `Option` + /// is not allowed. fn try_from_str(input: &str) -> Result, InvalidRole> { for (string, value) in [ ("input", Some(Role::Input)), @@ -556,7 +561,23 @@ impl Role { return Ok(value); } } - Err(InvalidRole) + Err(InvalidRole::UnknownRole(input.into())) + } + + /// Convert `integer` to [Role]. + /// + /// This can't be `TryFrom>` because defining traits on + /// `Option>` is not allowed. + fn try_from_integer(integer: i32) -> Result, InvalidRole> { + match integer { + 0 => Ok(Some(Role::Input)), + 1 => Ok(Some(Role::Target)), + 2 => Ok(Some(Role::Both)), + 4 => Ok(Some(Role::Partition)), + 5 => Ok(Some(Role::Split)), + 3 => Ok(None), + _ => Err(InvalidRole::UnknownRole(integer.to_string())), + } } } @@ -577,7 +598,14 @@ impl Attributes { } } -pub struct InvalidRole; +#[derive(Clone, Debug, ThisError, PartialEq, Eq)] +pub enum InvalidRole { + #[error("Unknown role {0:?}.")] + UnknownRole(String), + + #[error("Role attribute $@Role must have exactly one value (not {0}).")] + InvalidValues(usize), +} impl TryFrom<&Attributes> for Option { type Error = InvalidRole; @@ -586,9 +614,12 @@ impl TryFrom<&Attributes> for Option { let role = Identifier::new("$@Role").unwrap(); value.0.get(&role).map_or(Ok(None), |attribute| { if let Ok([string]) = <&[String; 1]>::try_from(attribute.as_slice()) { - Role::try_from_str(string) + match string.parse() { + Ok(integer) => Role::try_from_integer(integer), + Err(_) => Err(InvalidRole::UnknownRole(string.clone())), + } } else { - Err(InvalidRole) + Err(InvalidRole::InvalidValues(attribute.len())) } }) } diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index 7094dec8a4..442bf7fd4f 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -20,7 +20,7 @@ mod display; mod parse; pub use display::DisplayValue; -#[derive(ThisError, Debug)] +#[derive(Clone, ThisError, Debug, PartialEq, Eq)] pub enum Error { #[error("Unknown format type {value}.")] UnknownFormat { value: u16 }, diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index 032fc730d6..b3ce546bc9 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -93,7 +93,7 @@ impl IdentifierChar for char { } } -#[derive(Clone, Debug, ThisError)] +#[derive(Clone, Debug, ThisError, PartialEq, Eq)] pub enum Error { #[error("Identifier cannot be empty string.")] Empty, diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 35c057a341..7ffe019775 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -151,7 +151,7 @@ fn dissect( for header in headers { decoded_records.push(header.decode(&decoder)?); } - let headers = Headers::new(decoded_records, &|e| eprintln!("{e}"))?; + let headers = Headers::new(decoded_records, &mut |e| eprintln!("{e}"))?; let (dictionary, metadata) = decode(headers, encoding, |e| eprintln!("{e}"))?; println!("{dictionary:#?}"); println!("{metadata:#?}"); diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 43e4aa3da5..ac7272dab0 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -26,15 +26,11 @@ use thiserror::Error as ThisError; pub use crate::sys::raw::{CategoryLabels, Compression}; -#[derive(ThisError, Debug)] +#[derive(ThisError, Clone, Debug, PartialEq, Eq)] pub enum Error { #[error("Missing header record")] MissingHeaderRecord, - // XXX this is an internal error - #[error("More than one file header record")] - DuplicateHeaderRecord, - #[error("{0}")] EncodingError(EncodingError), @@ -92,6 +88,14 @@ pub enum Error { #[error("At offset {offset:#x}, one or more variable indexes for value labels referred to long string continuation records: {indexes:?}")] LongStringContinuationIndexes { offset: u64, indexes: Vec }, + #[error("Variable index {start_index} is a {width} that should be followed by long string continuation records up to index {end_index}, but index {error_index} is not a continuation")] + MissingLongStringContinuation { + width: RawWidth, + start_index: usize, + end_index: usize, + error_index: usize, + }, + #[error( "At offsets {:#x}...{:#x}, record types 3 and 4 may not add value labels to one or more long string variables: {variables:?}", .offsets.start, .offsets.end )] @@ -167,7 +171,21 @@ pub enum Error { #[error("Text string contains invalid bytes for {encoding} encoding: {text}")] MalformedString { encoding: String, text: String }, - #[error("Details TBD")] + #[error("File contains multiple {0:?} records.")] + MoreThanOne(&'static str), + + #[error("File designates string variable {name} (index {index}) as weight variable, but weight variables must be numeric.")] + InvalidWeightVar { name: Identifier, index: u32 }, + + #[error( + "File weight variable index {index} is greater than maximum variable index {max_index}." + )] + InvalidWeightIndex { index: u32, max_index: usize }, + + #[error("{0}")] + InvalidRole(InvalidRole), + + #[error("Details TBD (cooked)")] TBD, } @@ -198,18 +216,22 @@ pub struct Headers { pub cases: Option>>, } -fn take_first(mut vec: Vec, more_than_one: F) -> Option -where - F: FnOnce(), -{ +fn take_first( + mut vec: Vec, + record_name: &'static str, + warn: &mut impl FnMut(Error), +) -> Option { if vec.len() > 1 { - more_than_one(); + warn(Error::MoreThanOne(record_name)); } vec.drain(..).next() } impl Headers { - pub fn new(headers: Vec, warn: &impl Fn(Error)) -> Result { + pub fn new( + headers: Vec, + warn: &mut impl FnMut(Error), + ) -> Result { let mut file_header = Vec::new(); let mut variable = Vec::new(); let mut value_label = Vec::new(); @@ -308,8 +330,7 @@ impl Headers { } } - let Some(file_header) = take_first(file_header, || warn(Error::DuplicateHeaderRecord)) - else { + let Some(file_header) = take_first(file_header, "file header", warn) else { return Err(Error::MissingHeaderRecord); }; @@ -318,25 +339,25 @@ impl Headers { variable, value_label, document, - integer_info: take_first(integer_info, || warn(Error::TBD)), - float_info: take_first(float_info, || warn(Error::TBD)), - var_display: take_first(var_display, || warn(Error::TBD)), + integer_info: take_first(integer_info, "integer info", warn), + float_info: take_first(float_info, "float info", warn), + var_display: take_first(var_display, "variable display", warn), multiple_response, long_string_value_labels, long_string_missing_values, - encoding: take_first(encoding, || warn(Error::TBD)), - number_of_cases: take_first(number_of_cases, || warn(Error::TBD)), + encoding: take_first(encoding, "encoding", warn), + number_of_cases: take_first(number_of_cases, "number of cases", warn), variable_sets, - product_info: take_first(product_info, || warn(Error::TBD)), + product_info: take_first(product_info, "product info", warn), long_names, very_long_strings, file_attributes, variable_attributes, other_extension, - end_of_headers: take_first(end_of_headers, || warn(Error::TBD)), - z_header: take_first(z_header, || warn(Error::TBD)), - z_trailer: take_first(z_trailer, || warn(Error::TBD)), - cases: take_first(cases, || warn(Error::TBD)), + end_of_headers: take_first(end_of_headers, "end of headers", warn), + z_header: take_first(z_header, "z_header", warn), + z_trailer: take_first(z_trailer, "z_trailer", warn), + cases: take_first(cases, "cases", warn), }) } } @@ -353,7 +374,7 @@ pub struct Metadata { } impl Metadata { - fn decode(headers: &Headers, warn: impl Fn(Error)) -> Self { + fn decode(headers: &Headers, mut warn: impl FnMut(Error)) -> Self { let header = &headers.header; let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %Y") .unwrap_or_else(|_| { @@ -414,7 +435,7 @@ impl Decoder { pub fn decode( mut headers: Headers, encoding: &'static Encoding, - warn: impl Fn(Error), + mut warn: impl FnMut(Error), ) -> Result<(Dictionary, Metadata), Error> { let mut dictionary = Dictionary::new(encoding); @@ -515,7 +536,12 @@ pub fn decode( .get(index + offset) .is_none_or(|record| record.width != RawWidth::Continuation) { - warn(Error::TBD); + warn(Error::MissingLongStringContinuation { + width: input.width, + start_index: index, + end_index: index + n_values - 1, + error_index: index + offset, + }); break; } } @@ -531,10 +557,16 @@ pub fn decode( if variable.is_numeric() { dictionary.weight = Some(*dict_index); } else { - warn(Error::TBD); + warn(Error::InvalidWeightVar { + index: weight_index, + name: variable.name.clone(), + }); } } else { - warn(Error::TBD); + warn(Error::InvalidWeightIndex { + index: weight_index, + max_index: var_index_map.len(), + }); } } @@ -581,7 +613,7 @@ pub fn decode( variable.measure = Some(measure); } } else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); } } } @@ -591,7 +623,7 @@ pub fn decode( .iter() .flat_map(|record| record.0.iter()) { - match MultipleResponseSet::decode(&dictionary, record, &warn) { + match MultipleResponseSet::decode(&dictionary, record, &mut warn) { Ok(mrset) => { dictionary.mrsets.insert(ByIdentifier::new(mrset)); } @@ -605,17 +637,17 @@ pub fn decode( .flat_map(|record| record.0.into_iter()) { let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; }; let width = VarWidth::String(record.length); let n_segments = width.n_segments(); if n_segments == 1 { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; } if index + n_segments > dictionary.variables.len() { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; } let mut short_names = Vec::with_capacity(n_segments); @@ -625,7 +657,7 @@ pub fn decode( short_names.push(segment.short_names[0].clone()); let segment_width = segment.width.as_string_width().unwrap_or(0); if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue 'outer; } } @@ -667,7 +699,7 @@ pub fn decode( .unwrap() .short_names = vec![short_name]; } else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); } } } @@ -683,7 +715,7 @@ pub fn decode( { variable.attributes.append(&mut attr_set.attributes); } else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); } } @@ -692,7 +724,7 @@ pub fn decode( let variable = dictionary.variables.get_index_mut2(index).unwrap(); match variable.attributes.role() { Ok(role) => variable.role = role, - Err(InvalidRole) => warn(Error::TBD), + Err(error) => warn(Error::InvalidRole(error)), } } @@ -703,11 +735,11 @@ pub fn decode( .flat_map(|record| record.0.into_iter()) { let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; }; let Some(width) = variable.width.as_string_width() else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; }; for (mut value, label) in record.labels.into_iter() { @@ -725,7 +757,7 @@ pub fn decode( .flat_map(|record| record.0.into_iter()) { let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; }; let values = record @@ -752,7 +784,7 @@ pub fn decode( let mut variables = Vec::with_capacity(record.variable_names.len()); for variable_name in record.variable_names { let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else { - warn(Error::TBD); + warn(dbg!(Error::TBD)); continue; }; variables.push(dict_index); @@ -776,7 +808,7 @@ impl MultipleResponseSet { fn decode( dictionary: &Dictionary, input: &raw::MultipleResponseSet, - warn: &impl Fn(Error), + warn: &mut impl FnMut(Error), ) -> Result { let mr_set_name = input.name.clone(); let mut variables = Vec::with_capacity(input.short_names.len()); @@ -844,7 +876,11 @@ fn fix_line_ends(s: &str) -> String { out } -fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Format, FormatError)) -> Format { +fn decode_format( + raw: raw::Spec, + width: VarWidth, + mut warn: impl FnMut(Format, FormatError), +) -> Format { UncheckedFormat::try_from(raw) .and_then(Format::try_from) .and_then(|x| x.check_width_compatibility(width)) diff --git a/rust/pspp/src/sys/encoding.rs b/rust/pspp/src/sys/encoding.rs index c408bf56fa..7baf427a1e 100644 --- a/rust/pspp/src/sys/encoding.rs +++ b/rust/pspp/src/sys/encoding.rs @@ -11,7 +11,7 @@ pub fn codepage_from_encoding(encoding: &str) -> Option { use thiserror::Error as ThisError; -#[derive(ThisError, Debug)] +#[derive(Clone, ThisError, Debug, PartialEq, Eq)] pub enum Error { #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")] NoEncoding, diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 3d6f548e21..fe13fe3fdb 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -207,7 +207,56 @@ pub enum Warning { #[error("{0}")] EncodingError(EncodingError), - #[error("Details TBD")] + #[error("Missing value record with range not allowed for string variable")] + MissingValueStringRange, + + #[error("Missing value record at offset {0:#x} not allowed for long string continuation")] + MissingValueContinuation(u64), + + #[error("Invalid multiple dichotomy label type")] + InvalidMultipleDichotomyLabelType, + + #[error("Invalid multiple response type")] + InvalidMultipleResponseType, + + #[error("Syntax error in multiple response record")] + MultipleResponseSyntaxError, + + #[error("Syntax error parsing counted string (missing trailing space)")] + CountedStringMissingSpace, + + #[error("Syntax error parsing counted string (invalid UTF-8)")] + CountedStringInvalidUTF8, + + #[error("Syntax error parsing counted string (invalid length {0:?})")] + CountedStringInvalidLength(String), + + #[error("Syntax error parsing counted string (length {0:?} goes past end of input)")] + CountedStringTooLong(usize), + + #[error("Variable display record contains {count} items but should contain either {first} or {second}.")] + InvalidVariableDisplayCount { + count: usize, + first: usize, + second: usize, + }, + + #[error("Very long string record missing delimiter in {0:?}.")] + VeryLongStringMissingDelimiter(String), + + #[error("Very long string record has invalid length in {0:?}.")] + VeryLongStringInvalidLength(String), + + #[error("Attribute record missing left parenthesis, in {0:?}.")] + AttributeMissingLParen(String), + + #[error("Attribute record missing new-line, in {0:?}.")] + AttributeMissingNewline(String), + + #[error("Attribute record missing quotations, in {0:?}.")] + AttributeMissingQuotes(String), + + #[error("Details TBD (raw)")] TBD, } @@ -1071,7 +1120,7 @@ fn format_name(type_: u32) -> Cow<'static, str> { .into() } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct MissingValues> where S: Debug, @@ -1133,29 +1182,18 @@ where } } -impl Default for MissingValues -where - S: Debug, -{ - fn default() -> Self { - Self { - values: Vec::new(), - range: None, - } - } -} - impl MissingValues { fn read( r: &mut R, offset: u64, - width: RawWidth, + raw_width: RawWidth, code: i32, endian: Endian, warn: &dyn Fn(Warning), ) -> Result { let (individual_values, has_range) = match code { - 0..=3 => (code as usize, false), + 0 => return Ok(Self::default()), + 1..=3 => (code as usize, false), -2 => (0, true), -3 => (1, true), _ => return Err(Error::BadMissingValueCode { offset, code }), @@ -1173,7 +1211,7 @@ impl MissingValues { values.push(read_bytes::<8, _>(r)?); } - match VarWidth::try_from(width) { + match VarWidth::try_from(raw_width) { Ok(VarWidth::Numeric) => { let values = values .into_iter() @@ -1198,19 +1236,19 @@ impl MissingValues { ); return Ok(Self { values, range }); } - Ok(VarWidth::String(width)) if width <= 8 && range.is_none() => { + Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange), + Ok(VarWidth::String(width)) => { + let width = width.min(8) as usize; let values = values .into_iter() - .map(|value| Value::String(Box::from(&value[..width as usize]))) + .map(|value| Value::String(Box::from(&value[..width]))) .collect(); return Ok(Self { values, range: None, }); } - Ok(VarWidth::String(width)) if width > 8 => warn(Warning::TBD), - Ok(VarWidth::String(_)) => warn(Warning::TBD), - Err(()) => warn(Warning::TBD), + Err(()) => warn(Warning::MissingValueContinuation(offset)), } Ok(Self::default()) } @@ -1243,7 +1281,7 @@ where pub label: Option, } -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum RawWidth { Continuation, Numeric, @@ -1903,7 +1941,7 @@ impl MultipleResponseType { } else if let Some(rest) = input.strip_prefix(b" 11 ") { (CategoryLabels::VarLabels, rest) } else { - return Err(Warning::TBD); + return Err(Warning::InvalidMultipleDichotomyLabelType); }; let (value, input) = parse_counted_string(input)?; ( @@ -1911,7 +1949,7 @@ impl MultipleResponseType { input, ) } - _ => return Err(Warning::TBD), + _ => return Err(Warning::InvalidMultipleResponseType), }; Ok((mr_type, input)) } @@ -1932,12 +1970,12 @@ where impl MultipleResponseSet { fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> { let Some(equals) = input.iter().position(|&b| b == b'=') else { - return Err(Warning::TBD); + return Err(Warning::MultipleResponseSyntaxError); }; let (name, input) = input.split_at(equals); let (mr_type, input) = MultipleResponseType::parse(input)?; let Some(input) = input.strip_prefix(b" ") else { - return Err(Warning::TBD); + return Err(Warning::MultipleResponseSyntaxError); }; let (label, mut input) = parse_counted_string(input)?; let mut vars = Vec::new(); @@ -1945,7 +1983,7 @@ impl MultipleResponseSet { match input.split_first() { Some((b' ', rest)) => { let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else { - return Err(Warning::TBD); + return Err(Warning::MultipleResponseSyntaxError); }; let (var, rest) = rest.split_at(length); if !var.is_empty() { @@ -1953,7 +1991,7 @@ impl MultipleResponseSet { } input = rest; } - _ => return Err(Warning::TBD), + _ => return Err(Warning::MultipleResponseSyntaxError), } } while input.first() == Some(&b'\n') { @@ -2035,21 +2073,18 @@ impl MultipleResponseRecord { fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> { let Some(space) = input.iter().position(|&b| b == b' ') else { - return Err(Warning::TBD); + return Err(Warning::CountedStringMissingSpace); }; let Ok(length) = from_utf8(&input[..space]) else { - return Err(Warning::TBD); + return Err(Warning::CountedStringInvalidUTF8); }; let Ok(length): Result = length.parse() else { - return Err(Warning::TBD); + return Err(Warning::CountedStringInvalidLength(length.into())); }; - let input = &input[space + 1..]; - if input.len() < length { - return Err(Warning::TBD); + let Some((string, rest)) = input[space + 1..].split_at_checked(length) else { + return Err(Warning::CountedStringTooLong(length)); }; - - let (string, rest) = input.split_at(length); Ok((string.into(), rest)) } @@ -2144,7 +2179,11 @@ impl VarDisplayRecord { } else if ext.count as usize == 2 * n_vars { false } else { - return Err(Warning::TBD); + return Err(Warning::InvalidVariableDisplayCount { + count: ext.count as usize, + first: 2 * n_vars, + second: 2 * n_vars, + }); }; let mut var_displays = Vec::new(); @@ -2365,13 +2404,15 @@ pub struct VeryLongString { impl VeryLongString { fn parse(decoder: &Decoder, input: &str) -> Result { let Some((short_name, length)) = input.split_once('=') else { - return Err(Warning::TBD); + return Err(Warning::VeryLongStringMissingDelimiter(input.into())); }; let short_name = decoder .new_identifier(short_name) .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidLongStringName)?; - let length = length.parse().map_err(|_| Warning::TBD)?; + let length = length + .parse() + .map_err(|_| Warning::VeryLongStringInvalidLength(input.into()))?; Ok(VeryLongString { short_name, length }) } } @@ -2405,7 +2446,7 @@ pub struct Attribute { impl Attribute { fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> { let Some((name, mut input)) = input.split_once('(') else { - return Err(Warning::TBD); + return Err(Warning::AttributeMissingLParen(input.into())); }; let name = decoder .new_identifier(name) @@ -2413,7 +2454,7 @@ impl Attribute { let mut values = Vec::new(); loop { let Some((value, rest)) = input.split_once('\n') else { - return Err(Warning::TBD); + return Err(Warning::AttributeMissingNewline(input.into())); }; if let Some(stripped) = value .strip_prefix('\'') @@ -2421,7 +2462,7 @@ impl Attribute { { values.push(stripped.into()); } else { - decoder.warn(Warning::TBD); + decoder.warn(Warning::AttributeMissingQuotes(value.into())); values.push(value.into()); } if let Some(rest) = rest.strip_prefix(')') { diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index c5737b398c..2238194093 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -140,8 +140,15 @@ s16 "23456789abc"; s32 "defghijklmnopqstuvwxyzABC"; for header in headers { decoded_records.push(header.decode(&decoder).unwrap()); } - let headers = Headers::new(decoded_records, &|e| eprintln!("{e}")).unwrap(); - let (dictionary, metadata) = decode(headers, encoding, |e| eprintln!("{e}")).unwrap(); + + let mut errors = Vec::new(); + let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap(); + let (dictionary, metadata) = decode(headers, encoding, |e| errors.push(e)).unwrap(); + assert_eq!(errors, vec![]); println!("{dictionary:#?}"); + assert_eq!(metadata.endian, Endian::Big); + assert_eq!(metadata.compression, None); + assert_eq!(metadata.n_cases, Some(1)); + assert_eq!(metadata.version, Some((1, 2, 3))); println!("{metadata:#?}"); } -- 2.30.2