From ece097285d3fa0f73a08e8bb85c98ee22dfe23b0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 14 Jul 2025 09:10:10 -0700 Subject: [PATCH] cleanup and docs --- rust/pspp/src/sys/encoding.rs | 33 ++- rust/pspp/src/sys/raw.rs | 365 +---------------------- rust/pspp/src/sys/raw/records.rs | 486 +++++++++++++++++++++++++++++-- 3 files changed, 499 insertions(+), 385 deletions(-) diff --git a/rust/pspp/src/sys/encoding.rs b/rust/pspp/src/sys/encoding.rs index 0f09f6bc5e..7ca705465d 100644 --- a/rust/pspp/src/sys/encoding.rs +++ b/rust/pspp/src/sys/encoding.rs @@ -14,6 +14,11 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . +//! Character encodings in system files. + +// Warn about missing docs, but not for items declared with `#[cfg(test)]`. +#![cfg_attr(not(test), warn(missing_docs))] + use std::sync::LazyLock; use crate::locale_charset::locale_charset; @@ -21,6 +26,8 @@ use encoding_rs::{Encoding, UTF_8}; include!(concat!(env!("OUT_DIR"), "/encodings.rs")); +/// Returns the code page number corresponding to `encoding`, or `None` if +/// unknown. pub fn codepage_from_encoding(encoding: &str) -> Option { CODEPAGE_NAME_TO_NUMBER .get(encoding.to_ascii_lowercase().as_str()) @@ -29,27 +36,49 @@ pub fn codepage_from_encoding(encoding: &str) -> Option { use thiserror::Error as ThisError; +/// An error or warning related to encodings. #[derive(Clone, ThisError, Debug, PartialEq, Eq)] pub enum Error { + /// Warning that the system file doesn't indicate its own encoding. #[error("This system file does not indicate its own character encoding. For best results, specify an encoding explicitly. Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")] NoEncoding, + /// Unknown code page. #[error("This system file encodes text strings with unknown code page {0}.")] - UnknownCodepage(i32), + UnknownCodepage( + /// The code page number. + i32, + ), + /// Unknown encoding. #[error("This system file encodes text strings with unknown encoding {0}.")] - UnknownEncoding(String), + UnknownEncoding( + /// The encoding name. + String, + ), + /// EBCDIC not supported. #[error("This system file is encoded in EBCDIC, which is not supported.")] Ebcdic, } +/// Returns the default encoding to use. +/// +/// The default encoding is taken from the system or user's configured locale. pub fn default_encoding() -> &'static Encoding { static DEFAULT_ENCODING: LazyLock<&'static Encoding> = LazyLock::new(|| Encoding::for_label(locale_charset().as_bytes()).unwrap_or(UTF_8)); &DEFAULT_ENCODING } +/// Returns the character encoding to use for a system file. +/// +/// `encoding`, if any, should come from [EncodingRecord], and `character_code`, +/// if any, should from [IntegerInfoRecord]. Returns an error if the encoding +/// to use is unclear or unspecified, or if (for EBCDIC) it is unsupported. +/// +/// [EncodingRecord]: crate::sys::raw::records::EncodingRecord +/// [IntegerInfoRecord]: crate::sys::raw::records::IntegerInfoRecord pub fn get_encoding( encoding: Option<&str>, character_code: Option, diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index b41a6fa3b3..12d224586b 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -31,21 +31,23 @@ use crate::{ sys::{ encoding::{default_encoding, get_encoding, Error as EncodingError}, raw::records::{ - Compression, DocumentRecord, EncodingRecord, Extension, FileAttributesRecord, - FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongNamesRecord, - LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord, + AttributeWarning, Compression, DocumentRecord, EncodingRecord, Extension, + ExtensionWarning, FileAttributesRecord, FloatInfoRecord, HeaderRecord, HeaderWarning, + IntegerInfoRecord, LongNameWarning, LongNamesRecord, LongStringMissingValueRecord, + LongStringMissingValuesWarning, LongStringValueLabelRecord, + LongStringValueLabelWarning, MultipleResponseRecord, MultipleResponseWarning, NumberOfCasesRecord, ProductInfoRecord, RawDocumentLine, RawFileAttributesRecord, RawLongNamesRecord, RawProductInfoRecord, RawVariableAttributesRecord, - RawVariableSetRecord, RawVeryLongStringsRecord, ValueLabelRecord, VarDisplayRecord, - VariableAttributesRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord, - ZHeader, ZTrailer, + RawVariableSetRecord, RawVeryLongStringsRecord, ValueLabelRecord, ValueLabelWarning, + VarDisplayRecord, VariableAttributesRecord, VariableDisplayWarning, VariableRecord, + VariableSetRecord, VariableSetWarning, VariableWarning, VeryLongStringWarning, + VeryLongStringsRecord, ZHeader, ZTrailer, ZlibTrailerWarning, }, }, }; use encoding_rs::Encoding; use flate2::read::ZlibDecoder; -use itertools::Itertools; use smallvec::SmallVec; use std::{ borrow::Cow, @@ -492,355 +494,6 @@ impl From for WarningDetails { } } -/// A warning for a file header. -#[derive(ThisError, Debug)] -pub enum HeaderWarning { - /// Unexpected compression bias. - #[error("Compression bias is {0} instead of the usual values of 0 or 100.")] - UnexpectedBias(f64), -} - -/// Warning for a variable record. -#[derive(ThisError, Debug)] -pub enum VariableWarning { - /// Missing value record with range not allowed for string variable. - #[error("Missing value record with range not allowed for string variable.")] - MissingValueStringRange, - - /// Missing value not allowed for long string continuation. - #[error("Missing value not allowed for long string continuation")] - MissingValueContinuation, -} - -/// Warning for an extension record. -#[derive(ThisError, Debug)] -pub enum ExtensionWarning { - /// Unexpected end of data. - #[error("Unexpected end of data.")] - UnexpectedEndOfData, - - /// Invalid record size. - #[error("{record} has bad size {size} bytes instead of the expected {expected_size}.")] - BadRecordSize { - /// Name of the record. - record: &'static str, - /// Size of the elements in the record, in bytes. - size: u32, - /// Expected size of the elements in the record, in bytes. - expected_size: u32, - }, - - /// Invalid record count. - #[error("{record} has bad count {count} instead of the expected {expected_count}.")] - BadRecordCount { - /// Name of the record. - record: &'static str, - /// Number of elements in the record. - count: u32, - /// Expected number of elements in the record. - expected_count: u32, - }, -} - -/// Warning for a value label record. -#[derive(ThisError, Debug)] -pub enum ValueLabelWarning { - /// At least one valid variable index for value labels is required but none were specified. - #[error("At least one valid variable index is required but none were specified.")] - NoVarIndexes, - - /// Mixed variable types in value label record. - #[error("First variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)] - MixedVarTypes { - /// Variable type. - var_type: VarType, - /// Indexes of variables with the other type. - wrong_types: Vec, - }, - - /// Value label invalid variable indexes. - #[error( - "One or more variable indexes were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}" - )] - InvalidVarIndexes { - /// Maximum variable index. - max: usize, - /// Invalid variable indexes. - invalid: Vec, - }, -} - -/// Warning for a long string missing value record. -#[derive(ThisError, Debug)] -pub enum LongStringMissingValuesWarning { - /// Invalid value length. - #[error("Value length at offset {offset:#x} is {value_len} instead of the expected 8.")] - BadValueLength { - /// Offset of the value length. - offset: u64, - /// Actual value length. - value_len: u32, - }, - - /// Invalid variable name. - #[error("Invalid variable name. {0}")] - InvalidVariableName( - /// Variable name error. - IdError, - ), -} - -/// Warning for a long string value label record. -#[derive(ThisError, Debug)] -pub enum LongStringValueLabelWarning { - /// Invalid variable name. - #[error("Invalid variable name. {0}")] - InvalidVariableName( - /// Variable name error. - IdError, - ), -} - -/// Warning for a long variable name record. -#[derive(ThisError, Debug)] -pub enum LongNameWarning { - /// Missing `=`. - #[error("Missing `=` separator.")] - LongNameMissingEquals, - - /// Invalid short name. - #[error("Invalid short name. {0}")] - InvalidShortName( - /// Short variable name error. - IdError, - ), - - /// Invalid long name. - #[error("Invalid long name. {0}")] - InvalidLongName( - /// Long variable name error. - IdError, - ), -} - -/// Warning for a very long string variable record. -#[derive(ThisError, Debug)] -pub enum VeryLongStringWarning { - /// Invalid variable name. - #[error("Invalid variable name. {0}")] - InvalidLongStringName( - /// Variable name error. - IdError, - ), - - /// Missing delimiter. - #[error("Missing delimiter in {0:?}.")] - VeryLongStringMissingDelimiter(String), - - /// Invalid length. - #[error("Invalid length in {0:?}.")] - VeryLongStringInvalidLength( - /// Length. - String, - ), -} - -/// Warning for a multiple response set record. -#[derive(ThisError, Debug)] -pub enum MultipleResponseWarning { - /// Invalid multiple response set name. - #[error("Invalid multiple response set name. {0}")] - InvalidMrSetName( - /// Variable name error. - IdError, - ), - - /// Invalid variable name. - #[error("Invalid variable name. {0}")] - InvalidMrSetVariableName( - /// Variable name error. - IdError, - ), - - /// Invalid multiple dichotomy label type. - #[error("Invalid multiple dichotomy label type.")] - InvalidMultipleDichotomyLabelType, - - /// Invalid multiple response type. - #[error("Invalid multiple response type.")] - InvalidMultipleResponseType, - - /// Syntax error. - #[error("Syntax error ({0}).")] - MultipleResponseSyntaxError( - /// Detailed error. - &'static str, - ), - - /// Syntax error parsing counted string (missing trailing space). - #[error("Syntax error parsing counted string (missing trailing space).")] - CountedStringMissingSpace, - - /// Syntax error parsing counted string (invalid UTF-8). - #[error("Syntax error parsing counted string (invalid UTF-8).")] - CountedStringInvalidUTF8, - - /// Syntax error parsing counted string (invalid length). - #[error("Syntax error parsing counted string (invalid length {0:?}).")] - CountedStringInvalidLength( - /// Length. - String, - ), - - /// Syntax error parsing counted string (length goes past end of input). - #[error("Syntax error parsing counted string (length {0:?} goes past end of input).")] - CountedStringTooLong( - /// Length. - usize, - ), -} - -/// Warning for a file or variable attribute record. -#[derive(ThisError, Debug)] -pub enum AttributeWarning { - /// Invalid attribute name. - #[error("Invalid attribute name. {0}")] - InvalidAttributeName( - /// Attribute name error. - IdError, - ), - - /// Invalid variable name in attribute record. - #[error("Invalid variable name in attribute record. {0}")] - InvalidAttributeVariableName( - /// Variable name error. - IdError, - ), - - /// Attribute record missing left parenthesis. - #[error("Attribute record missing left parenthesis, in {0:?}.")] - AttributeMissingLParen( - /// Bad syntax. - String, - ), - - /// Attribute lacks value. - #[error("Attribute for {name}[{}] lacks value.", index + 1)] - AttributeMissingValue { - /// Attribute name. - name: Identifier, - /// 0-based index. - index: usize, - }, - - /// Attribute missing quotations. - #[error("Attribute for {name}[{}] missing quotations.", index + 1)] - AttributeMissingQuotes { - /// Attribute name. - name: Identifier, - /// 0-based index. - index: usize, - }, - - /// Variable attribute missing `:`. - #[error("Variable attribute missing `:`.")] - VariableAttributeMissingColon, - - /// Duplicate attributes for variable. - #[error("Duplicate attributes for variable {variable}: {}.", attributes.iter().join(", "))] - DuplicateVariableAttributes { - /// Variable name. - variable: Identifier, - /// Attributes with duplicates. - attributes: Vec, - }, - - /// Duplicate dataset attributes. - #[error("Duplicate dataset attributes with names: {}.", attributes.iter().join(", "))] - DuplicateFileAttributes { - /// Attributes with duplicates. - attributes: Vec, - }, - - /// File attributes record contains trailing garbage. - #[error("File attributes record contains trailing garbage.")] - FileAttributesTrailingGarbage, -} - -/// Warning for a variable display record. -#[derive(ThisError, Debug)] -pub enum VariableDisplayWarning { - /// Wrong number of variable display items. - #[error("Record contains {count} items but should contain either {first} or {second}.")] - InvalidVariableDisplayCount { - /// Actual count. - count: usize, - /// First valid count. - first: usize, - /// Second valid count. - second: usize, - }, - - /// Invalid variable measurement level value. - #[error("Invalid variable measurement level value {0}.")] - InvalidMeasurement( - /// Invalid value. - u32, - ), - - /// Invalid variable display alignment value. - #[error("Invalid variable display alignment value {0}.")] - InvalidAlignment( - /// Invalid value. - u32, - ), -} - -/// Warning for a variable sets record. -#[derive(ThisError, Debug)] -pub enum VariableSetWarning { - /// Invalid variable name. - #[error("Invalid variable name. {0}")] - InvalidVariableSetName( - /// Variable name error. - IdError, - ), - - /// Missing name delimiter. - #[error("Missing name delimiter.")] - VariableSetMissingEquals, -} - -/// Warning for a ZLIB trailer record. -#[derive(ThisError, Debug)] -pub enum ZlibTrailerWarning { - /// Wrong block size. - #[error( - "ZLIB block descriptor {index} reported block size {actual:#x}, when {expected:#x} was expected." - )] - ZlibTrailerBlockWrongSize { - /// 0-based block descriptor index. - index: usize, - /// Actual block size. - actual: u32, - /// Expected block size. - expected: u32, - }, - - /// Block too big. - #[error( - "ZLIB block descriptor {index} reported block size {actual:#x}, when at most {max_expected:#x} was expected." - )] - ZlibTrailerBlockTooBig { - /// 0-based block descriptor index. - index: usize, - /// Actual block size. - actual: u32, - /// Maximum expected block size. - max_expected: u32, - }, -} - /// A raw record in a system file. #[allow(missing_docs)] // Don't warn for missing docs on tuple members. #[derive(Clone, Debug)] diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index fe2588b786..d1335c23bb 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -20,16 +20,14 @@ use crate::{ endian::{Endian, Parse}, identifier::{Error as IdError, Identifier}, sys::raw::{ - read_bytes, read_string, read_vec, AttributeWarning, Decoder, Error, ErrorDetails, - ExtensionWarning, HeaderWarning, LongNameWarning, LongStringMissingValuesWarning, - LongStringValueLabelWarning, Magic, MultipleResponseWarning, RawDatum, RawStrArray, - RawWidth, Record, UntypedDatum, ValueLabelWarning, VarTypes, VariableDisplayWarning, - VariableSetWarning, VariableWarning, VeryLongStringWarning, Warning, WarningDetails, - ZlibTrailerWarning, + read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum, + RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails, }, }; use binrw::BinRead; +use itertools::Itertools; +use thiserror::Error as ThisError; /// Type of compression in a system file. #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -42,6 +40,14 @@ pub enum Compression { ZLib, } +/// A warning for a file header. +#[derive(ThisError, Debug)] +pub enum HeaderWarning { + /// Unexpected compression bias. + #[error("Compression bias is {0} instead of the usual values of 0 or 100.")] + UnexpectedBias(f64), +} + /// A file header record in a system file. #[derive(Clone)] pub struct HeaderRecord @@ -242,7 +248,7 @@ impl HeaderRecord { } } -/// [crate::format::Format] as represented in a system file. +/// [Format](crate::format::Format) as represented in a system file. #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct RawFormat( /// The most-significant 16 bits are the type, the next 8 bytes are the @@ -405,6 +411,18 @@ impl MissingValues { } } +/// Warning for a variable record. +#[derive(ThisError, Debug)] +pub enum VariableWarning { + /// Missing value record with range not allowed for string variable. + #[error("Missing value record with range not allowed for string variable.")] + MissingValueStringRange, + + /// Missing value not allowed for long string continuation. + #[error("Missing value not allowed for long string continuation")] + MissingValueContinuation, +} + /// A variable record in a system file. #[derive(Clone)] pub struct VariableRecord @@ -540,6 +558,34 @@ impl VariableRecord { } } +/// Warning for a value label record. +#[derive(ThisError, Debug)] +pub enum ValueLabelWarning { + /// At least one valid variable index for value labels is required but none were specified. + #[error("At least one valid variable index is required but none were specified.")] + NoVarIndexes, + + /// Mixed variable types in value label record. + #[error("First variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)] + MixedVarTypes { + /// Variable type. + var_type: VarType, + /// Indexes of variables with the other type. + wrong_types: Vec, + }, + + /// Value label invalid variable indexes. + #[error( + "One or more variable indexes were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}" + )] + InvalidVarIndexes { + /// Maximum variable index. + max: usize, + /// Invalid variable indexes. + invalid: Vec, + }, +} + /// A value and label in a system file. #[derive(Clone, Debug)] pub struct ValueLabel @@ -963,6 +1009,28 @@ impl TextRecord { } } +/// Warning for a very long string variable record. +#[derive(ThisError, Debug)] +pub enum VeryLongStringWarning { + /// Invalid variable name. + #[error("Invalid variable name. {0}")] + InvalidLongStringName( + /// Variable name error. + IdError, + ), + + /// Missing delimiter. + #[error("Missing delimiter in {0:?}.")] + VeryLongStringMissingDelimiter(String), + + /// Invalid length. + #[error("Invalid length in {0:?}.")] + VeryLongStringInvalidLength( + /// Length. + String, + ), +} + /// A very long string parsed from a [VeryLongStringsRecord]. #[derive(Clone, Debug)] pub struct VeryLongString { @@ -975,7 +1043,7 @@ pub struct VeryLongString { impl VeryLongString { /// Parses a [VeryLongString] from `input` using `decoder`. - fn parse(decoder: &Decoder, input: &str) -> Result { + pub fn parse(decoder: &Decoder, input: &str) -> Result { let Some((short_name, length)) = input.split_once('=') else { return Err(VeryLongStringWarning::VeryLongStringMissingDelimiter(input.into()).into()); }; @@ -1029,6 +1097,61 @@ impl RawVeryLongStringsRecord { } } +/// Warning for a multiple response set record. +#[derive(ThisError, Debug)] +pub enum MultipleResponseWarning { + /// Invalid multiple response set name. + #[error("Invalid multiple response set name. {0}")] + InvalidMrSetName( + /// Variable name error. + IdError, + ), + + /// Invalid variable name. + #[error("Invalid variable name. {0}")] + InvalidMrSetVariableName( + /// Variable name error. + IdError, + ), + + /// Invalid multiple dichotomy label type. + #[error("Invalid multiple dichotomy label type.")] + InvalidMultipleDichotomyLabelType, + + /// Invalid multiple response type. + #[error("Invalid multiple response type.")] + InvalidMultipleResponseType, + + /// Syntax error. + #[error("Syntax error ({0}).")] + MultipleResponseSyntaxError( + /// Detailed error. + &'static str, + ), + + /// Syntax error parsing counted string (missing trailing space). + #[error("Syntax error parsing counted string (missing trailing space).")] + CountedStringMissingSpace, + + /// Syntax error parsing counted string (invalid UTF-8). + #[error("Syntax error parsing counted string (invalid UTF-8).")] + CountedStringInvalidUTF8, + + /// Syntax error parsing counted string (invalid length). + #[error("Syntax error parsing counted string (invalid length {0:?}).")] + CountedStringInvalidLength( + /// Length. + String, + ), + + /// Syntax error parsing counted string (length goes past end of input). + #[error("Syntax error parsing counted string (length {0:?} goes past end of input).")] + CountedStringTooLong( + /// Length. + usize, + ), +} + /// The type of a multiple-response set. #[derive(Clone, Debug)] pub enum MultipleResponseType { @@ -1197,7 +1320,7 @@ where impl MultipleResponseRecord { /// Parses a multiple-response set from `ext`. - fn parse(ext: &Extension) -> Result { + pub fn parse(ext: &Extension) -> Result { ext.check_size(Some(1), None, "multiple response set record")?; let mut input = &ext.data[..]; @@ -1256,6 +1379,35 @@ fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), WarningDetai Ok((string.into(), rest)) } +/// Warning for a variable display record. +#[derive(ThisError, Debug)] +pub enum VariableDisplayWarning { + /// Wrong number of variable display items. + #[error("Record contains {count} items but should contain either {first} or {second}.")] + InvalidVariableDisplayCount { + /// Actual count. + count: usize, + /// First valid count. + first: usize, + /// Second valid count. + second: usize, + }, + + /// Invalid variable measurement level value. + #[error("Invalid variable measurement level value {0}.")] + InvalidMeasurement( + /// Invalid value. + u32, + ), + + /// Invalid variable display alignment value. + #[error("Invalid variable display alignment value {0}.")] + InvalidAlignment( + /// Invalid value. + u32, + ), +} + impl Measure { fn try_decode(source: u32) -> Result, WarningDetails> { match source { @@ -1343,6 +1495,26 @@ impl VarDisplayRecord { } } +/// Warning for a long string missing value record. +#[derive(ThisError, Debug)] +pub enum LongStringMissingValuesWarning { + /// Invalid value length. + #[error("Value length at offset {offset:#x} is {value_len} instead of the expected 8.")] + BadValueLength { + /// Offset of the value length. + offset: u64, + /// Actual value length. + value_len: u32, + }, + + /// Invalid variable name. + #[error("Invalid variable name. {0}")] + InvalidVariableName( + /// Variable name error. + IdError, + ), +} + /// Missing values for one long string variable. #[derive(Clone, Debug)] pub struct LongStringMissingValues @@ -1384,7 +1556,7 @@ where impl LongStringMissingValueRecord { /// Parses this record from `ext`. - fn parse( + pub fn parse( ext: &Extension, endian: Endian, warn: &mut dyn FnMut(Warning), @@ -1463,7 +1635,7 @@ pub struct EncodingRecord( impl EncodingRecord { /// Parses this record from `ext`. - fn parse(ext: &Extension) -> Result { + pub fn parse(ext: &Extension) -> Result { ext.check_size(Some(1), None, "encoding record")?; Ok(Record::Encoding(EncodingRecord( @@ -1483,7 +1655,8 @@ pub struct NumberOfCasesRecord { } impl NumberOfCasesRecord { - fn parse(ext: &Extension, endian: Endian) -> Result { + /// Parses a number of cases record from `ext` using `endian`. + pub fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(Some(8), Some(2), "extended number of cases record")?; let mut input = &ext.data[..]; @@ -1494,16 +1667,35 @@ impl NumberOfCasesRecord { } } +/// Warning for a variable sets record. +#[derive(ThisError, Debug)] +pub enum VariableSetWarning { + /// Invalid variable name. + #[error("Invalid variable name. {0}")] + InvalidVariableSetName( + /// Variable name error. + IdError, + ), + + /// Missing name delimiter. + #[error("Missing name delimiter.")] + VariableSetMissingEquals, +} + +/// Raw (text) version of the variable set record in a system file. #[derive(Clone, Debug)] pub struct RawVariableSetRecord(TextRecord); impl RawVariableSetRecord { - fn parse(extension: Extension) -> Result { + /// Parses the record from `extension`. + pub fn parse(extension: Extension) -> Result { Ok(Record::VariableSets(Self(TextRecord::parse( extension, "variable sets record", )?))) } + + /// Decodes the record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> VariableSetRecord { let mut sets = Vec::new(); let input = decoder.decode(&self.0.text); @@ -1521,28 +1713,106 @@ impl RawVariableSetRecord { } } +/// Raw (text) version of a product info record in a system file. #[derive(Clone, Debug)] pub struct RawProductInfoRecord(TextRecord); impl RawProductInfoRecord { - fn parse(extension: Extension) -> Result { + /// Parses the record from `extension`. + pub fn parse(extension: Extension) -> Result { Ok(Record::ProductInfo(Self(TextRecord::parse( extension, "product info record", )?))) } + + /// Decodes the record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> ProductInfoRecord { ProductInfoRecord(decoder.decode(&self.0.text).into()) } } +/// Warning for a file or variable attribute record. +#[derive(ThisError, Debug)] +pub enum AttributeWarning { + /// Invalid attribute name. + #[error("Invalid attribute name. {0}")] + InvalidAttributeName( + /// Attribute name error. + IdError, + ), + + /// Invalid variable name in attribute record. + #[error("Invalid variable name in attribute record. {0}")] + InvalidAttributeVariableName( + /// Variable name error. + IdError, + ), + + /// Attribute record missing left parenthesis. + #[error("Attribute record missing left parenthesis, in {0:?}.")] + AttributeMissingLParen( + /// Bad syntax. + String, + ), + + /// Attribute lacks value. + #[error("Attribute for {name}[{}] lacks value.", index + 1)] + AttributeMissingValue { + /// Attribute name. + name: Identifier, + /// 0-based index. + index: usize, + }, + + /// Attribute missing quotations. + #[error("Attribute for {name}[{}] missing quotations.", index + 1)] + AttributeMissingQuotes { + /// Attribute name. + name: Identifier, + /// 0-based index. + index: usize, + }, + + /// Variable attribute missing `:`. + #[error("Variable attribute missing `:`.")] + VariableAttributeMissingColon, + + /// Duplicate attributes for variable. + #[error("Duplicate attributes for variable {variable}: {}.", attributes.iter().join(", "))] + DuplicateVariableAttributes { + /// Variable name. + variable: Identifier, + /// Attributes with duplicates. + attributes: Vec, + }, + + /// Duplicate dataset attributes. + #[error("Duplicate dataset attributes with names: {}.", attributes.iter().join(", "))] + DuplicateFileAttributes { + /// Attributes with duplicates. + attributes: Vec, + }, + + /// File attributes record contains trailing garbage. + #[error("File attributes record contains trailing garbage.")] + FileAttributesTrailingGarbage, +} + +/// A file or variable attribute in a system file. #[derive(Clone, Debug)] pub struct Attribute { + /// The attribute's name. pub name: Identifier, + + /// The attribute's values. pub values: Vec, } impl Attribute { + /// Parses an attribute from the beginning of `input` using `decoder`. Uses + /// `offsets` to report warnings. Returns the decoded attribute and the + /// part of `input` that remains to be parsed following the attribute. fn parse<'a>( decoder: &mut Decoder, offsets: &Range, @@ -1588,6 +1858,10 @@ impl Attribute { } impl Attributes { + /// Parses a set of varaible or file attributes from `input` using + /// `decoder`. Uses `offsets` for reporting warnings. If not `None`, + /// `sentinel` terminates the attributes. Returns the attributes and the + /// part of `input` that remains after parsing the attributes. fn parse<'a>( decoder: &mut Decoder, offsets: &Range, @@ -1614,19 +1888,24 @@ impl Attributes { } } +/// A raw (text) file attributes record in a system file. #[derive(Clone, Debug)] pub struct RawFileAttributesRecord(TextRecord); +/// A decoded file attributes record in a system file. #[derive(Clone, Debug, Default)] pub struct FileAttributesRecord(pub Attributes); impl RawFileAttributesRecord { - fn parse(extension: Extension) -> Result { + /// Parses this record from `extension`. + pub fn parse(extension: Extension) -> Result { Ok(Record::FileAttributes(Self(TextRecord::parse( extension, "file attributes record", )?))) } + + /// Decodes this record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> FileAttributesRecord { let input = decoder.decode(&self.0.text); match Attributes::parse(decoder, &self.0.offsets, &input, None) @@ -1654,13 +1933,19 @@ impl RawFileAttributesRecord { } } +/// A set of variable attributes in a system file. #[derive(Clone, Debug)] pub struct VarAttributes { + /// The long name of the variable associated with the attributes. pub long_var_name: Identifier, + + /// The attributes. pub attributes: Attributes, } impl VarAttributes { + /// Parses a variable attribute set from `input` using `decoder`. Uses + /// `offsets` for reporting warnings. fn parse<'a>( decoder: &mut Decoder, offsets: &Range, @@ -1683,27 +1968,34 @@ impl VarAttributes { }, )); } - let var_attribute = VarAttributes { - long_var_name, - attributes, - }; - Ok((var_attribute, rest)) + Ok(( + VarAttributes { + long_var_name, + attributes, + }, + rest, + )) } } +/// A raw (text) variable attributes record in a system file. #[derive(Clone, Debug)] pub struct RawVariableAttributesRecord(TextRecord); +/// A decoded variable attributes record in a system file. #[derive(Clone, Debug)] pub struct VariableAttributesRecord(pub Vec); impl RawVariableAttributesRecord { - fn parse(extension: Extension) -> Result { + /// Parses a variable attributes record. + pub fn parse(extension: Extension) -> Result { Ok(Record::VariableAttributes(Self(TextRecord::parse( extension, "variable attributes record", )?))) } + + /// Decodes a variable attributes record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> VariableAttributesRecord { let decoded = decoder.decode(&self.0.text); let mut input = decoded.as_ref(); @@ -1721,14 +2013,41 @@ impl RawVariableAttributesRecord { } } +/// Warning for a long variable name record. +#[derive(ThisError, Debug)] +pub enum LongNameWarning { + /// Missing `=`. + #[error("Missing `=` separator.")] + LongNameMissingEquals, + + /// Invalid short name. + #[error("Invalid short name. {0}")] + InvalidShortName( + /// Short variable name error. + IdError, + ), + + /// Invalid long name. + #[error("Invalid long name. {0}")] + InvalidLongName( + /// Long variable name error. + IdError, + ), +} + +/// A long variable name in a system file. #[derive(Clone, Debug)] pub struct LongName { + /// The variable's short name. pub short_name: Identifier, + + /// The variable's long name. pub long_name: Identifier, } impl LongName { - fn parse(input: &str, decoder: &Decoder) -> Result { + /// Parses a long variable name from `input` using `decoder`. + pub fn parse(input: &str, decoder: &Decoder) -> Result { let Some((short_name, long_name)) = input.split_once('=') else { return Err(LongNameWarning::LongNameMissingEquals.into()); }; @@ -1747,19 +2066,27 @@ impl LongName { } } +/// A long variable name record in a system file. #[derive(Clone, Debug)] pub struct LongNamesRecord(pub Vec); +/// A product info record in a system file. #[derive(Clone, Debug)] pub struct ProductInfoRecord(pub String); +/// A variable set in a system file. #[derive(Clone, Debug)] pub struct VariableSet { + /// Name of the variable set. pub name: String, + + /// The long variable names of the members of the set. pub variable_names: Vec, } impl VariableSet { + /// Parses a variable set from `input` using `decoder`. Uses `offsets` to + /// report warnings. fn parse( input: &str, decoder: &mut Decoder, @@ -1786,9 +2113,13 @@ impl VariableSet { } } +/// A variable set record in a system file. #[derive(Clone, Debug)] pub struct VariableSetRecord { + /// Range of file offsets occupied by the record. pub offsets: Range, + + /// The variable sets in the record. pub sets: Vec, } @@ -1810,8 +2141,46 @@ where } } +/// Warning for an extension record. +#[derive(ThisError, Debug)] +pub enum ExtensionWarning { + /// Unexpected end of data. + #[error("Unexpected end of data.")] + UnexpectedEndOfData, + + /// Invalid record size. + #[error("{record} has bad size {size} bytes instead of the expected {expected_size}.")] + BadRecordSize { + /// Name of the record. + record: &'static str, + /// Size of the elements in the record, in bytes. + size: u32, + /// Expected size of the elements in the record, in bytes. + expected_size: u32, + }, + + /// Invalid record count. + #[error("{record} has bad count {count} instead of the expected {expected_count}.")] + BadRecordCount { + /// Name of the record. + record: &'static str, + /// Number of elements in the record. + count: u32, + /// Expected number of elements in the record. + expected_count: u32, + }, +} + +/// An extension record in a system file. +/// +/// Most of the records in system files are "extension records". This structure +/// collects everything in an extension record for later processing. #[derive(Clone, Debug)] pub struct Extension { + /// File offsets occupied by the extension record. + /// + /// These are the offsets of the `data` portion of the record, not including + /// the header that specifies the subtype, size, and count. pub offsets: Range, /// Record subtype. @@ -1917,12 +2286,27 @@ impl Extension { } } +/// Warning for a long string value label record. +#[derive(ThisError, Debug)] +pub enum LongStringValueLabelWarning { + /// Invalid variable name. + #[error("Invalid variable name. {0}")] + InvalidVariableName( + /// Variable name error. + IdError, + ), +} + +/// One set of long string value labels record in a system file. #[derive(Clone, Debug)] pub struct LongStringValueLabels where S: Debug, { + /// The variable being labeled. pub var_name: N, + + /// The variable's width (greater than 8, since it's a long string). pub width: u32, /// `(value, label)` pairs, where each value is `width` bytes. @@ -1930,6 +2314,7 @@ where } impl LongStringValueLabels { + /// Decodes a set of long string value labels using `decoder`. fn decode( &self, decoder: &mut Decoder, @@ -1952,17 +2337,22 @@ impl LongStringValueLabels { } } +/// A long string value labels record in a system file. #[derive(Clone, Debug)] pub struct LongStringValueLabelRecord where N: Debug, S: Debug, { + /// File offsets occupied by the record. pub offsets: Range, + + /// The labels. pub labels: Vec>, } impl LongStringValueLabelRecord { + /// Parses this record from `ext` using `endian`. fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(Some(1), None, "long string value labels record")?; @@ -1989,9 +2379,8 @@ impl LongStringValueLabelRecord { labels: label_set, })) } -} -impl LongStringValueLabelRecord { + /// Decodes this record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> LongStringValueLabelRecord { let mut labels = Vec::with_capacity(self.labels.len()); for label in &self.labels { @@ -2007,6 +2396,7 @@ impl LongStringValueLabelRecord { } } +/// ZLIB header, for [Compression::ZLib]. #[derive(Clone, Debug)] pub struct ZHeader { /// File offset to the start of the record. @@ -2023,7 +2413,11 @@ pub struct ZHeader { } impl ZHeader { - pub fn read(r: &mut R, endian: Endian) -> Result { + /// Reads a ZLIB header from `r` using `endian`. + pub fn read(r: &mut R, endian: Endian) -> Result + where + R: Read + Seek, + { let offset = r.stream_position()?; let zheader_offset: u64 = endian.parse(read_bytes(r)?); let ztrailer_offset: u64 = endian.parse(read_bytes(r)?); @@ -2050,6 +2444,7 @@ impl ZHeader { } } +/// A ZLIB trailer in a system file. #[derive(Clone, Debug)] pub struct ZTrailer { /// File offset to the start of the record. @@ -2069,6 +2464,37 @@ pub struct ZTrailer { pub blocks: Vec, } +/// Warning for a ZLIB trailer record. +#[derive(ThisError, Debug)] +pub enum ZlibTrailerWarning { + /// Wrong block size. + #[error( + "ZLIB block descriptor {index} reported block size {actual:#x}, when {expected:#x} was expected." + )] + ZlibTrailerBlockWrongSize { + /// 0-based block descriptor index. + index: usize, + /// Actual block size. + actual: u32, + /// Expected block size. + expected: u32, + }, + + /// Block too big. + #[error( + "ZLIB block descriptor {index} reported block size {actual:#x}, when at most {max_expected:#x} was expected." + )] + ZlibTrailerBlockTooBig { + /// 0-based block descriptor index. + index: usize, + /// Actual block size. + actual: u32, + /// Maximum expected block size. + max_expected: u32, + }, +} + +/// A ZLIB block descriptor in a system file. #[derive(Clone, Debug)] pub struct ZBlock { /// Offset of block of data if simple compression were used. @@ -2111,13 +2537,19 @@ impl ZBlock { } impl ZTrailer { - pub fn read( + /// Reads a ZLIB trailer from `reader` using `endian`. `bias` is the + /// floating-point bias for confirmation against the trailer, and `zheader` + /// is the previously read ZLIB header. Uses `warn` to report warnings. + pub fn read( reader: &mut R, endian: Endian, bias: f64, zheader: &ZHeader, warn: &mut dyn FnMut(Warning), - ) -> Result, Error> { + ) -> Result, Error> + where + R: Read + Seek, + { let start_offset = reader.stream_position()?; if reader .seek(SeekFrom::Start(zheader.ztrailer_offset)) -- 2.30.2