From: Ben Pfaff Date: Mon, 14 Jul 2025 02:59:58 +0000 (-0700) Subject: cleanup X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6b436c5fb15622149ffd26832e96d93ff5843ebb;p=pspp cleanup --- diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 8be83dea2a..b41a6fa3b3 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -19,6 +19,9 @@ //! This module facilitates reading records from system files in all of their //! raw details. Most readers will want to use higher-level interfaces. +// Warn about missing docs, but not for items declared with `#[cfg(test)]`. +#![cfg_attr(not(test), warn(missing_docs))] + use crate::{ data::{Case, Datum, RawStr, RawString}, dictionary::{VarType, VarWidth}, @@ -74,6 +77,7 @@ pub struct Error { impl std::error::Error for Error {} impl Error { + /// Constructs an error from `offsets` and `details`. pub fn new(offsets: Option>, details: ErrorDetails) -> Self { Self { offsets, details } } @@ -107,146 +111,261 @@ impl From for Error { /// Details of an [Error]. #[derive(ThisError, Debug)] pub enum ErrorDetails { + /// Not an SPSS system file. #[error("Not an SPSS system file")] NotASystemFile, + /// Bad [Magic]. #[error("Invalid magic number {0:?}")] BadMagic([u8; 4]), + /// I/O error. #[error("I/O error ({0})")] Io(#[from] IoError), + /// Invalid SAV compression code. #[error("Invalid SAV compression code {0}")] InvalidSavCompression(u32), + /// Invalid ZSAV compression code {0}. #[error("Invalid ZSAV compression code {0}")] InvalidZsavCompression(u32), + /// Document record has document line count ({n}) greater than the maximum number {max}. #[error( "Document record has document line count ({n}) greater than the maximum number {max}." )] - BadDocumentLength { n: usize, max: usize }, + BadDocumentLength { + /// Number of document lines. + n: usize, + /// Maximum number of document lines. + max: usize, + }, + /// Unrecognized record type. #[error("Unrecognized record type {0}.")] BadRecordType(u32), + /// Variable width in variable record is not in the valid range -1 to 255. #[error("Variable width {0} in variable record is not in the valid range -1 to 255.")] BadVariableWidth(i32), + /// In variable record, variable label code is not 0 or 1. #[error("In variable record, variable label code {0} is not 0 or 1.")] BadVariableLabelCode(u32), + /// Missing value code is not -3, -2, 0, 1, 2, or 3. #[error("Missing value code ({0}) is not -3, -2, 0, 1, 2, or 3.")] BadMissingValueCode(i32), + /// Numeric missing value code is not -3, -2, 0, 1, 2, or 3. #[error("Numeric missing value code ({0}) is not -3, -2, 0, 1, 2, or 3.")] BadNumericMissingValueCode(i32), + /// String missing value code is not 0, 1, 2, or 3. #[error("String missing value code ({0}) is not 0, 1, 2, or 3.")] BadStringMissingValueCode(i32), + /// Number of value labels ({n}) is greater than the maximum number {max}. #[error("Number of value labels ({n}) is greater than the maximum number {max}.")] - BadNumberOfValueLabels { n: u32, max: u32 }, - - #[error( - "Following value label record, found record type {0} instead of expected type 4 for variable index record" - )] - ExpectedVarIndexRecord(u32), + BadNumberOfValueLabels { + /// Number of value labels. + n: u32, + /// Maximum number of value labels. + max: u32, + }, + /// Following value label record, found record type {0} instead of expected + /// type 4 for variable index record. + #[ + error( + "Following value label record, found record type {0} instead of expected type 4 for variable index record" + )] + ExpectedVarIndexRecord( + /// Record type. + u32, + ), + + /// Number of variables indexes for value labels ({n}) is greater than the + /// maximum number ({max}). #[error( "Number of variables indexes for value labels ({n}) is greater than the maximum number ({max})." )] - TooManyVarIndexes { n: u32, max: u32 }, + TooManyVarIndexes { + /// Number of variable indexes. + n: u32, + /// Maximum number of variable indexes. + max: u32, + }, + /// Record type 7 subtype {subtype} is too large with element size {size} and {count} elements. #[error( "Record type 7 subtype {subtype} is too large with element size {size} and {count} elements." )] - ExtensionRecordTooLarge { subtype: u32, size: u32, count: u32 }, + ExtensionRecordTooLarge { + /// Subtype. + subtype: u32, + /// Element size in bytes. + size: u32, + /// Number of elements. + count: u32, + }, + /// Unexpected end of file {case_ofs} bytes into a {case_len}-byte case. #[error("Unexpected end of file {case_ofs} bytes into a {case_len}-byte case.")] - EofInCase { case_ofs: u64, case_len: usize }, + EofInCase { + /// Offset into case in bytes. + case_ofs: u64, + /// Expected case length in bytes. + case_len: usize, + }, + /// Unexpected end of file {case_ofs} bytes and {n_chunks} compression + /// chunks into a compressed case. #[error( "Unexpected end of file {case_ofs} bytes and {n_chunks} compression chunks into a compressed case." )] - EofInCompressedCase { case_ofs: u64, n_chunks: usize }, - - #[error("Data ends {case_ofs} bytes into a compressed case.")] - PartialCompressedCase { case_ofs: u64 }, - - #[error("At {0} bytes into compressed case, a string was found where a number was expected.")] - CompressedNumberExpected(u64), - - #[error("At {0} bytes into compressed case, a number was found where a string was expected.")] - CompressedStringExpected(u64), + EofInCompressedCase { + /// Offset into case in bytes. + case_ofs: u64, + /// Number of compression codes consumed. + n_chunks: usize, + }, + /// Impossible ztrailer_offset {0:#x}. #[error("Impossible ztrailer_offset {0:#x}.")] - ImpossibleZTrailerOffset(u64), + ImpossibleZTrailerOffset( + /// `ztrailer_offset` + u64, + ), + /// ZLIB header's zlib_offset is {actual:#x} instead of expected + /// {expected:#x}. #[error("ZLIB header's zlib_offset is {actual:#x} instead of expected {expected:#x}.")] - UnexpectedZHeaderOffset { actual: u64, expected: u64 }, + UnexpectedZHeaderOffset { + /// Actual `zlib_offset`. + actual: u64, + /// Expected `zlib_offset`. + expected: u64, + }, + /// Invalid ZLIB trailer length {0}. #[error("Invalid ZLIB trailer length {0}.")] - InvalidZTrailerLength(u64), - - #[error( + InvalidZTrailerLength( + /// ZLIB trailer length. + u64, + ), + + /// ZLIB trailer bias {actual} is not {} as expected from file header bias. + #[ + error( "ZLIB trailer bias {actual} is not {} as expected from file header bias.", DisplayPlainF64(*expected) )] - WrongZlibTrailerBias { actual: i64, expected: f64 }, + WrongZlibTrailerBias { + /// ZLIB trailer bias read from file. + actual: i64, + /// Expected ZLIB trailer bias. + expected: f64, + }, + /// ZLIB trailer \"zero\" field has nonzero value {0}. #[error("ZLIB trailer \"zero\" field has nonzero value {0}.")] - WrongZlibTrailerZero(u64), + WrongZlibTrailerZero( + /// Actual value that should have been zero. + u64, + ), + /// ZLIB trailer specifies unexpected {0}-byte block size. #[error("ZLIB trailer specifies unexpected {0}-byte block size.")] - WrongZlibTrailerBlockSize(u32), + WrongZlibTrailerBlockSize( + /// Block size read from file. + u32, + ), + /// Block count in ZLIB trailer differs from expected block count calculated + /// from trailer length. #[error( "Block count {n_blocks} in ZLIB trailer differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}." )] BadZlibTrailerNBlocks { + /// Number of blocks. n_blocks: u32, + /// Expected number of blocks. expected_n_blocks: u64, + /// ZLIB trailer length in bytes. ztrailer_len: u64, }, + /// ZLIB block descriptor reported uncompressed data offset different from + /// expected. #[error( "ZLIB block descriptor {index} reported uncompressed data offset {actual:#x}, when {expected:#x} was expected." )] ZlibTrailerBlockWrongUncmpOfs { + /// Block descriptor index. index: usize, + /// Actual uncompressed data offset. actual: u64, + /// Expected uncompressed data offset. expected: u64, }, + /// ZLIB block descriptor {index} reported compressed data offset + /// {actual:#x}, when {expected:#x} was expected. #[error( "ZLIB block descriptor {index} reported compressed data offset {actual:#x}, when {expected:#x} was expected." )] ZlibTrailerBlockWrongCmpOfs { + /// Block descriptor index. index: usize, + /// Actual compressed data offset. actual: u64, + /// Expected compressed data offset. expected: u64, }, + /// ZLIB block descriptor {index} reports compressed size {compressed_size} + /// and uncompressed size {uncompressed_size}. #[error( "ZLIB block descriptor {index} reports compressed size {compressed_size} and uncompressed size {uncompressed_size}." )] ZlibExpansion { + /// Block descriptor index. index: usize, + /// Compressed size. compressed_size: u32, + /// Uncompressed size. uncompressed_size: u32, }, + /// ZLIB trailer at unexpected offset. #[error( - "ZLIB trailer is at offset {zheader:#x} but {descriptors:#x} would be expected from block descriptors." + "ZLIB trailer is at offset {actual:#x} but {expected:#x} would be expected from block descriptors." )] - ZlibTrailerOffsetInconsistency { descriptors: u64, zheader: u64 }, + ZlibTrailerOffsetInconsistency { + /// Expected offset. + expected: u64, + /// Actual offset. + actual: u64, + }, + /// File metadata says it contains {expected} cases, but {actual} cases were read. #[error("File metadata says it contains {expected} cases, but {actual} cases were read.")] - WrongNumberOfCases { expected: u64, actual: u64 }, + WrongNumberOfCases { + /// Expected number of cases. + expected: u64, + /// Actual number of cases. + actual: u64, + }, + /// Encoding error. #[error("{0}")] - EncodingError(EncodingError), + EncodingError( + /// The error. + #[from] + EncodingError, + ), } /// A warning reading a raw system file record. @@ -265,6 +384,7 @@ pub struct Warning { impl std::error::Error for Warning {} impl Warning { + /// Constructs a new [Warning] from `offsets` and `details`. pub fn new(offsets: Option>, details: impl Into) -> Self { Self { offsets, @@ -347,15 +467,23 @@ pub enum WarningDetails { #[error("In ZLIB trailer: {0}")] ZlibTrailer(#[from] ZlibTrailerWarning), + /// Bad encoding name. #[error("Encoding record contains an encoding name that is not valid UTF-8.")] BadEncodingName, + /// Mis-encoded bytes in string. // XXX This is risky because `text` might be arbitarily long. #[error("Text string contains invalid bytes for {encoding} encoding: {text:?}")] - MalformedString { encoding: String, text: String }, + MalformedString { + /// The encoding. + encoding: String, + /// The problematic string. + text: String, + }, + /// Encoding error. #[error("{0}")] - EncodingError(EncodingError), + EncodingError(#[from] EncodingError), } impl From for WarningDetails { @@ -364,21 +492,27 @@ impl From for WarningDetails { } } +/// A warning for a file header. #[derive(ThisError, Debug)] pub enum HeaderWarning { + /// Unexpected compression bias. #[error("Compression bias is {0} instead of the usual values of 0 or 100.")] UnexpectedBias(f64), } +/// Warning for a variable record. #[derive(ThisError, Debug)] pub enum VariableWarning { - #[error("Missing value record with range not allowed for string variable")] + /// Missing value record with range not allowed for string variable. + #[error("Missing value record with range not allowed for string variable.")] MissingValueStringRange, + /// Missing value not allowed for long string continuation. #[error("Missing value not allowed for long string continuation")] MissingValueContinuation, } +/// Warning for an extension record. #[derive(ThisError, Debug)] pub enum ExtensionWarning { /// Unexpected end of data. @@ -388,20 +522,27 @@ pub enum ExtensionWarning { /// Invalid record size. #[error("{record} has bad size {size} bytes instead of the expected {expected_size}.")] BadRecordSize { - record: String, + /// Name of the record. + record: &'static str, + /// Size of the elements in the record, in bytes. size: u32, + /// Expected size of the elements in the record, in bytes. expected_size: u32, }, /// Invalid record count. #[error("{record} has bad count {count} instead of the expected {expected_count}.")] BadRecordCount { - record: String, + /// Name of the record. + record: &'static str, + /// Number of elements in the record. count: u32, + /// Expected number of elements in the record. expected_count: u32, }, } +/// Warning for a value label record. #[derive(ThisError, Debug)] pub enum ValueLabelWarning { /// At least one valid variable index for value labels is required but none were specified. @@ -411,7 +552,9 @@ pub enum ValueLabelWarning { /// Mixed variable types in value label record. #[error("First variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)] MixedVarTypes { + /// Variable type. var_type: VarType, + /// Indexes of variables with the other type. wrong_types: Vec, }, @@ -419,153 +562,281 @@ pub enum ValueLabelWarning { #[error( "One or more variable indexes were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}" )] - InvalidVarIndexes { max: usize, invalid: Vec }, + InvalidVarIndexes { + /// Maximum variable index. + max: usize, + /// Invalid variable indexes. + invalid: Vec, + }, } +/// Warning for a long string missing value record. #[derive(ThisError, Debug)] pub enum LongStringMissingValuesWarning { + /// Invalid value length. #[error("Value length at offset {offset:#x} is {value_len} instead of the expected 8.")] - BadValueLength { offset: u64, value_len: u32 }, + BadValueLength { + /// Offset of the value length. + offset: u64, + /// Actual value length. + value_len: u32, + }, + /// Invalid variable name. #[error("Invalid variable name. {0}")] - InvalidVariableName(IdError), + InvalidVariableName( + /// Variable name error. + IdError, + ), } +/// Warning for a long string value label record. #[derive(ThisError, Debug)] pub enum LongStringValueLabelWarning { + /// Invalid variable name. #[error("Invalid variable name. {0}")] - InvalidVariableName(IdError), + InvalidVariableName( + /// Variable name error. + IdError, + ), } +/// Warning for a long variable name record. #[derive(ThisError, Debug)] pub enum LongNameWarning { + /// Missing `=`. #[error("Missing `=` separator.")] LongNameMissingEquals, + /// Invalid short name. #[error("Invalid short name. {0}")] - InvalidShortName(IdError), + InvalidShortName( + /// Short variable name error. + IdError, + ), + /// Invalid long name. #[error("Invalid long name. {0}")] - InvalidLongName(IdError), + InvalidLongName( + /// Long variable name error. + IdError, + ), } +/// Warning for a very long string variable record. #[derive(ThisError, Debug)] pub enum VeryLongStringWarning { + /// Invalid variable name. #[error("Invalid variable name. {0}")] - InvalidLongStringName(IdError), + InvalidLongStringName( + /// Variable name error. + IdError, + ), + /// Missing delimiter. #[error("Missing delimiter in {0:?}.")] VeryLongStringMissingDelimiter(String), + /// Invalid length. #[error("Invalid length in {0:?}.")] - VeryLongStringInvalidLength(String), + VeryLongStringInvalidLength( + /// Length. + String, + ), } +/// Warning for a multiple response set record. #[derive(ThisError, Debug)] pub enum MultipleResponseWarning { + /// Invalid multiple response set name. #[error("Invalid multiple response set name. {0}")] - InvalidMrSetName(IdError), + InvalidMrSetName( + /// Variable name error. + IdError, + ), + /// Invalid variable name. #[error("Invalid variable name. {0}")] - InvalidMrSetVariableName(IdError), + InvalidMrSetVariableName( + /// Variable name error. + IdError, + ), + /// Invalid multiple dichotomy label type. #[error("Invalid multiple dichotomy label type.")] InvalidMultipleDichotomyLabelType, + /// Invalid multiple response type. #[error("Invalid multiple response type.")] InvalidMultipleResponseType, + /// Syntax error. #[error("Syntax error ({0}).")] - MultipleResponseSyntaxError(&'static str), + MultipleResponseSyntaxError( + /// Detailed error. + &'static str, + ), + /// Syntax error parsing counted string (missing trailing space). #[error("Syntax error parsing counted string (missing trailing space).")] CountedStringMissingSpace, + /// Syntax error parsing counted string (invalid UTF-8). #[error("Syntax error parsing counted string (invalid UTF-8).")] CountedStringInvalidUTF8, + /// Syntax error parsing counted string (invalid length). #[error("Syntax error parsing counted string (invalid length {0:?}).")] - CountedStringInvalidLength(String), + CountedStringInvalidLength( + /// Length. + String, + ), + /// Syntax error parsing counted string (length goes past end of input). #[error("Syntax error parsing counted string (length {0:?} goes past end of input).")] - CountedStringTooLong(usize), + CountedStringTooLong( + /// Length. + usize, + ), } +/// Warning for a file or variable attribute record. #[derive(ThisError, Debug)] pub enum AttributeWarning { + /// Invalid attribute name. #[error("Invalid attribute name. {0}")] - InvalidAttributeName(IdError), + InvalidAttributeName( + /// Attribute name error. + IdError, + ), + /// Invalid variable name in attribute record. #[error("Invalid variable name in attribute record. {0}")] - InvalidAttributeVariableName(IdError), + InvalidAttributeVariableName( + /// Variable name error. + IdError, + ), + /// Attribute record missing left parenthesis. #[error("Attribute record missing left parenthesis, in {0:?}.")] - AttributeMissingLParen(String), + AttributeMissingLParen( + /// Bad syntax. + String, + ), + /// Attribute lacks value. #[error("Attribute for {name}[{}] lacks value.", index + 1)] - AttributeMissingValue { name: Identifier, index: usize }, + AttributeMissingValue { + /// Attribute name. + name: Identifier, + /// 0-based index. + index: usize, + }, + /// Attribute missing quotations. #[error("Attribute for {name}[{}] missing quotations.", index + 1)] - AttributeMissingQuotes { name: Identifier, index: usize }, + AttributeMissingQuotes { + /// Attribute name. + name: Identifier, + /// 0-based index. + index: usize, + }, + /// Variable attribute missing `:`. #[error("Variable attribute missing `:`.")] VariableAttributeMissingColon, + /// Duplicate attributes for variable. #[error("Duplicate attributes for variable {variable}: {}.", attributes.iter().join(", "))] DuplicateVariableAttributes { + /// Variable name. variable: Identifier, + /// Attributes with duplicates. attributes: Vec, }, + /// Duplicate dataset attributes. #[error("Duplicate dataset attributes with names: {}.", attributes.iter().join(", "))] - DuplicateFileAttributes { attributes: Vec }, + DuplicateFileAttributes { + /// Attributes with duplicates. + attributes: Vec, + }, + /// File attributes record contains trailing garbage. #[error("File attributes record contains trailing garbage.")] FileAttributesTrailingGarbage, } +/// Warning for a variable display record. #[derive(ThisError, Debug)] pub enum VariableDisplayWarning { + /// Wrong number of variable display items. #[error("Record contains {count} items but should contain either {first} or {second}.")] InvalidVariableDisplayCount { + /// Actual count. count: usize, + /// First valid count. first: usize, + /// Second valid count. second: usize, }, + /// Invalid variable measurement level value. #[error("Invalid variable measurement level value {0}.")] - InvalidMeasurement(u32), + InvalidMeasurement( + /// Invalid value. + u32, + ), + /// Invalid variable display alignment value. #[error("Invalid variable display alignment value {0}.")] - InvalidAlignment(u32), + InvalidAlignment( + /// Invalid value. + u32, + ), } +/// Warning for a variable sets record. #[derive(ThisError, Debug)] pub enum VariableSetWarning { + /// Invalid variable name. #[error("Invalid variable name. {0}")] - InvalidVariableSetName(IdError), + InvalidVariableSetName( + /// Variable name error. + IdError, + ), + /// Missing name delimiter. #[error("Missing name delimiter.")] VariableSetMissingEquals, } +/// Warning for a ZLIB trailer record. #[derive(ThisError, Debug)] pub enum ZlibTrailerWarning { + /// Wrong block size. #[error( "ZLIB block descriptor {index} reported block size {actual:#x}, when {expected:#x} was expected." )] ZlibTrailerBlockWrongSize { + /// 0-based block descriptor index. index: usize, + /// Actual block size. actual: u32, + /// Expected block size. expected: u32, }, + /// Block too big. #[error( "ZLIB block descriptor {index} reported block size {actual:#x}, when at most {max_expected:#x} was expected." )] ZlibTrailerBlockTooBig { + /// 0-based block descriptor index. index: usize, + /// Actual block size. actual: u32, + /// Maximum expected block size. max_expected: u32, }, } @@ -765,14 +1036,16 @@ impl Record { /// Decodes this record into a [DecodedRecord] using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> Result { Ok(match self { - Record::Header(record) => record.decode(decoder), - Record::Variable(record) => record.decode(decoder), + Record::Header(record) => DecodedRecord::Header(record.decode(decoder)), + Record::Variable(record) => DecodedRecord::Variable(record.decode(decoder)), Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)), - Record::Document(record) => record.decode(decoder), + Record::Document(record) => DecodedRecord::Document(record.decode(decoder)), Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()), Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()), Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()), - Record::MultipleResponse(record) => record.decode(decoder), + Record::MultipleResponse(record) => { + DecodedRecord::MultipleResponse(record.decode(decoder)) + } Record::LongStringValueLabels(record) => { DecodedRecord::LongStringValueLabels(record.decode(decoder)) } @@ -835,9 +1108,9 @@ pub fn infer_encoding( match get_encoding(encoding, character_code) { Ok(encoding) => Ok(encoding), - Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, ErrorDetails::EncodingError(err))), + Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, err.into())), Err(err) => { - warn(Warning::new(None, WarningDetails::EncodingError(err))); + warn(Warning::new(None, err)); // Warn that we're using the default encoding. Ok(default_encoding()) } @@ -994,8 +1267,20 @@ impl TryFrom for VarWidth { /// An 8-byte [Datum] but we don't know the string width or character encoding. #[derive(Copy, Clone)] pub enum RawDatum { - Number(Option), - String([u8; 8]), + /// Number. + Number( + /// Numeric value. + /// + /// `None` represents the system-missing value. + Option, + ), + /// String. + String( + // String value. + // + // The true string width and character encoding are unknown. + [u8; 8], + ), } impl Debug for RawDatum { @@ -1669,8 +1954,12 @@ impl Debug for UntypedDatum { } } +/// An 8-byte raw string whose type and encoding are unknown. #[derive(Copy, Clone)] -pub struct RawStrArray(pub [u8; N]); +pub struct RawStrArray( + /// Content. + pub [u8; N], +); impl From<[u8; N]> for RawStrArray { fn from(source: [u8; N]) -> Self { diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index 2561d32c84..fe2588b786 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -20,23 +20,29 @@ use crate::{ endian::{Endian, Parse}, identifier::{Error as IdError, Identifier}, sys::raw::{ - read_bytes, read_string, read_vec, AttributeWarning, DecodedRecord, Decoder, Error, - ErrorDetails, ExtensionWarning, HeaderWarning, LongNameWarning, - LongStringMissingValuesWarning, LongStringValueLabelWarning, Magic, - MultipleResponseWarning, RawDatum, RawStrArray, RawWidth, Record, UntypedDatum, - ValueLabelWarning, VarTypes, VariableDisplayWarning, VariableSetWarning, VariableWarning, - VeryLongStringWarning, Warning, WarningDetails, ZlibTrailerWarning, + read_bytes, read_string, read_vec, AttributeWarning, Decoder, Error, ErrorDetails, + ExtensionWarning, HeaderWarning, LongNameWarning, LongStringMissingValuesWarning, + LongStringValueLabelWarning, Magic, MultipleResponseWarning, RawDatum, RawStrArray, + RawWidth, Record, UntypedDatum, ValueLabelWarning, VarTypes, VariableDisplayWarning, + VariableSetWarning, VariableWarning, VeryLongStringWarning, Warning, WarningDetails, + ZlibTrailerWarning, }, }; use binrw::BinRead; +/// Type of compression in a system file. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum Compression { + /// Simple bytecode-based compression. Simple, + /// [ZLIB] compression. + /// + /// [ZLIB]: https://www.zlib.net/ ZLib, } +/// A file header record in a system file. #[derive(Clone)] pub struct HeaderRecord where @@ -116,7 +122,11 @@ where } impl HeaderRecord { - pub fn read(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result { + /// Reads a header record from `r`, reporting any warnings via `warn`. + pub fn read(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result + where + R: Read + Seek, + { let header_bytes = read_vec(r, 176).map_err(|e| { Error::new( None, @@ -209,12 +219,13 @@ impl HeaderRecord { }) } - pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord { + /// Decodes this record with `decoder` and returns the decoded version. + pub fn decode(self, decoder: &mut Decoder) -> HeaderRecord { let eye_catcher = decoder.decode(&self.eye_catcher).to_string(); let file_label = decoder.decode(&self.file_label).to_string(); let creation_date = decoder.decode(&self.creation_date).to_string(); let creation_time = decoder.decode(&self.creation_time).to_string(); - DecodedRecord::Header(HeaderRecord { + HeaderRecord { eye_catcher, weight_index: self.weight_index, n_cases: self.n_cases, @@ -227,7 +238,7 @@ impl HeaderRecord { creation_date, creation_time, endian: self.endian, - }) + } } } @@ -293,7 +304,7 @@ fn format_name(type_: u32) -> Cow<'static, str> { } impl MissingValues { - pub fn read( + fn read( r: &mut R, offsets: Range, raw_width: RawWidth, @@ -394,6 +405,7 @@ impl MissingValues { } } +/// A variable record in a system file. #[derive(Clone)] pub struct VariableRecord where @@ -436,11 +448,15 @@ where } impl VariableRecord { - pub fn read( + /// Reads a variable record from `r`. + pub fn read( r: &mut R, endian: Endian, warn: &mut dyn FnMut(Warning), - ) -> Result { + ) -> Result + where + R: Read + Seek, + { #[derive(BinRead)] struct RawVariableRecord { width: i32, @@ -507,8 +523,9 @@ impl VariableRecord { })) } - pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord { - DecodedRecord::Variable(VariableRecord { + /// Decodes a variable record using `decoder`. + pub fn decode(self, decoder: &mut Decoder) -> VariableRecord { + VariableRecord { offsets: self.offsets.clone(), width: self.width, name: decoder.decode(&self.name).to_string(), @@ -519,20 +536,27 @@ impl VariableRecord { .label .as_ref() .map(|label| decoder.decode(label).to_string()), - }) + } } } +/// A value and label in a system file. #[derive(Clone, Debug)] pub struct ValueLabel where D: Debug, S: Debug, { + /// The value being labeled. pub datum: D, + /// The label. pub label: S, } +/// A value label record in a system file. +/// +/// This represents both the type-3 and type-4 records together, since they are +/// always paired anyway. #[derive(Clone)] pub struct ValueLabelRecord where @@ -702,6 +726,7 @@ impl ValueLabelRecord { }))) } + /// Decodes a value label record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> ValueLabelRecord { let labels = self .labels @@ -725,11 +750,13 @@ impl ValueLabelRecord { } } +/// A document record in a system file. #[derive(Clone, Debug)] pub struct DocumentRecord where S: Debug, { + /// The range of file offsets occupied by the record. pub offsets: Range, /// The document, as an array of lines. Raw lines are exactly 80 bytes long @@ -737,6 +764,7 @@ where pub lines: Vec, } +/// One line in a document. pub type RawDocumentLine = RawStrArray; /// Length of a line in a document. Document lines are fixed-length and @@ -748,7 +776,11 @@ impl DocumentRecord { /// the maximum number that will fit in a 32-bit space. pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN; - pub fn read(r: &mut R, endian: Endian) -> Result { + /// Reads a document record from `r`. + pub fn read(r: &mut R, endian: Endian) -> Result + where + R: Read + Seek, + { let start_offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); let n = n as usize; @@ -772,44 +804,66 @@ impl DocumentRecord { } } - pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord { - DecodedRecord::Document(DocumentRecord { + /// Decodes the document record using `decoder`. + pub fn decode(self, decoder: &mut Decoder) -> DocumentRecord { + DocumentRecord { offsets: self.offsets.clone(), lines: self .lines .iter() .map(|s| decoder.decode_slice(&s.0).to_string()) .collect(), - }) + } } } +/// Constraints on an extension record in a system file. pub struct ExtensionRecord<'a> { + /// The allowed size for elements in the extension record, or `None` to not + /// enforce a particular size. pub size: Option, + + /// The allowed number elements in the extension record, or `None` to not + /// enforce a particular count. pub count: Option, + + /// The name of the record, for error messages. pub name: &'a str, } +/// An integer info record in a system file. #[derive(Clone, Debug)] pub struct IntegerInfoRecord { + /// File offsets occupied by the record. pub offsets: Range, + + /// Version number. + /// + /// e.g. `(1,2,3)` for version 1.2.3. pub version: (i32, i32, i32), + + /// Identifies the type of machine. + /// + /// Mostly useless. PSPP uses value -1. pub machine_code: i32, + + /// Floating point representation (1 for IEEE 754). pub floating_point_rep: i32, + + /// [Compression]. pub compression_code: i32, + + /// Endianness. pub endianness: i32, + + /// Character encoding (usually a code page number). pub character_code: i32, } -static INTEGER_INFO_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(4), - count: Some(8), - name: "integer record", -}; - impl IntegerInfoRecord { + /// Parses this record from `ext`. pub fn parse(ext: &Extension, endian: Endian) -> Result { - ext.check_size(&INTEGER_INFO_RECORD)?; + ext.check_size(Some(4), Some(8), "integer record")?; let mut input = &ext.data[..]; let data: Vec = (0..8) @@ -827,15 +881,10 @@ impl IntegerInfoRecord { } } -static FLOAT_INFO_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(8), - count: Some(3), - name: "floating point record", -}; - impl FloatInfoRecord { + /// Parses this record from `ext`. pub fn parse(ext: &Extension, endian: Endian) -> Result { - ext.check_size(&FLOAT_INFO_RECORD)?; + ext.check_size(Some(8), Some(3), "floating point record")?; let mut input = &ext.data[..]; let data: Vec = (0..3) @@ -849,23 +898,36 @@ impl FloatInfoRecord { } } +/// A floating-point info record. #[derive(Clone, Debug)] pub struct FloatInfoRecord { + /// Value used for system-missing values. pub sysmis: f64, + + /// Highest numeric value (e.g. [f64::MAX]). pub highest: f64, + + /// Smallest numeric value (e.g. -[f64::MAX]). pub lowest: f64, } +/// Long variable names record. #[derive(Clone, Debug)] -pub struct RawLongNamesRecord(TextRecord); +pub struct RawLongNamesRecord( + /// Text contents of record. + TextRecord, +); impl RawLongNamesRecord { + /// Parses this record from `extension`. pub fn parse(extension: Extension) -> Result { Ok(Record::LongNames(Self(TextRecord::parse( extension, "long names record", )?))) } + + /// Decodes this record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> LongNamesRecord { let input = decoder.decode(&self.0.text); let mut names = Vec::new(); @@ -880,8 +942,10 @@ impl RawLongNamesRecord { } } +/// An extension record whose contents are a text string. #[derive(Clone, Debug)] pub struct TextRecord { + /// Range of file offsets for this record in bytes. pub offsets: Range, /// The text content of the record. @@ -889,12 +953,9 @@ pub struct TextRecord { } impl TextRecord { - pub fn parse(extension: Extension, name: &str) -> Result { - extension.check_size(&ExtensionRecord { - size: Some(1), - count: None, - name, - })?; + /// Parses this record from `extension`. + pub fn parse(extension: Extension, name: &'static str) -> Result { + extension.check_size(Some(1), None, name)?; Ok(Self { offsets: extension.offsets, text: extension.data.into(), @@ -902,13 +963,18 @@ impl TextRecord { } } +/// A very long string parsed from a [VeryLongStringsRecord]. #[derive(Clone, Debug)] pub struct VeryLongString { + /// Short name of very long string variable. pub short_name: Identifier, + + /// Length of very long string variable (in `256..=32767`). pub length: u16, } impl VeryLongString { + /// Parses a [VeryLongString] from `input` using `decoder`. fn parse(decoder: &Decoder, input: &str) -> Result { let Some((short_name, length)) = input.split_once('=') else { return Err(VeryLongStringWarning::VeryLongStringMissingDelimiter(input.into()).into()); @@ -924,19 +990,27 @@ impl VeryLongString { } } +/// A very long string record as text. #[derive(Clone, Debug)] pub struct RawVeryLongStringsRecord(TextRecord); +/// A parsed very long string record. #[derive(Clone, Debug)] -pub struct VeryLongStringsRecord(pub Vec); +pub struct VeryLongStringsRecord( + /// The very long strings. + pub Vec, +); impl RawVeryLongStringsRecord { + /// Parses this record from `extension`. pub fn parse(extension: Extension) -> Result { Ok(Record::VeryLongStrings(Self(TextRecord::parse( extension, "very long strings record", )?))) } + + /// Decodes this record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> VeryLongStringsRecord { let input = decoder.decode(&self.0.text); let mut very_long_strings = Vec::new(); @@ -955,16 +1029,25 @@ impl RawVeryLongStringsRecord { } } +/// The type of a multiple-response set. #[derive(Clone, Debug)] pub enum MultipleResponseType { + /// Multiple-dichotomy set. MultipleDichotomy { + /// The value that is counted in the set. value: RawString, + + /// What categories are labeled. labels: CategoryLabels, }, + + /// Multiple-category set. MultipleCategory, } impl MultipleResponseType { + /// Parses a [MultipleResponseType] from `input`, returning the type and the + /// input remaining to be parsed. fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), WarningDetails> { let (mr_type, input) = match input.split_first() { Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input), @@ -998,19 +1081,26 @@ impl MultipleResponseType { } } +/// A multiple-response set in a system file. #[derive(Clone, Debug)] pub struct MultipleResponseSet where I: Debug, S: Debug, { + /// The set's name. pub name: I, + /// The set's label. pub label: S, + /// The type of multiple-response set. pub mr_type: MultipleResponseType, + /// Short names of the variables in the set. pub short_names: Vec, } impl MultipleResponseSet { + /// Parses a multiple-response set from `input`. Returns the set and the + /// input remaining to be parsed following the set. fn parse(input: &[u8]) -> Result<(Self, &[u8]), WarningDetails> { let Some(equals) = input.iter().position(|&b| b == b'=') else { return Err(MultipleResponseWarning::MultipleResponseSyntaxError("missing `=`").into()); @@ -1063,6 +1153,8 @@ impl MultipleResponseSet { )) } + /// Decodes this multiple-response set using `decoder`. `offsets` is used + /// for issuing warnings. fn decode( &self, offsets: &Range, @@ -1089,25 +1181,24 @@ impl MultipleResponseSet { } } +/// A multiple-response set record in a system file. #[derive(Clone, Debug)] pub struct MultipleResponseRecord where I: Debug, S: Debug, { + /// File offsets of the record. pub offsets: Range, + + /// The multiple-response sets. pub sets: Vec>, } -static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(1), - count: None, - name: "multiple response set record", -}; - impl MultipleResponseRecord { - fn parse(ext: &Extension, _endian: Endian) -> Result { - ext.check_size(&MULTIPLE_RESPONSE_RECORD)?; + /// Parses a multiple-response set from `ext`. + fn parse(ext: &Extension) -> Result { + ext.check_size(Some(1), None, "multiple response set record")?; let mut input = &ext.data[..]; let mut sets = Vec::new(); @@ -1130,7 +1221,8 @@ impl MultipleResponseRecord { } impl MultipleResponseRecord { - pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord { + /// Decodes this record using `decoder`. + pub fn decode(self, decoder: &mut Decoder) -> MultipleResponseRecord { let mut sets = Vec::new(); for set in self.sets.iter() { if let Some(set) = set @@ -1140,10 +1232,10 @@ impl MultipleResponseRecord { sets.push(set); } } - DecodedRecord::MultipleResponse(MultipleResponseRecord { + MultipleResponseRecord { offsets: self.offsets, sets, - }) + } } } @@ -1187,29 +1279,35 @@ impl Alignment { } } +/// Variable display settings for one variable, in a system file. #[derive(Clone, Debug)] pub struct VarDisplay { + /// Measurement level. pub measure: Option, + + /// Variable display width. pub width: Option, + + /// Variable alignment. pub alignment: Option, } +/// A variable display record in a system file. #[derive(Clone, Debug)] -pub struct VarDisplayRecord(pub Vec); +pub struct VarDisplayRecord( + /// Variable display settings for each variable. + pub Vec, +); impl VarDisplayRecord { + /// Parses a variable display record from `ext` given variable types `var_types`. fn parse( ext: &Extension, var_types: &VarTypes, endian: Endian, warn: &mut dyn FnMut(Warning), ) -> Result { - static VAR_DISPLAY_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(4), - count: None, - name: "variable display record", - }; - ext.check_size(&VAR_DISPLAY_RECORD)?; + ext.check_size(Some(4), None, "variable display record")?; let n_vars = var_types.n_vars(); let has_width = if ext.count as usize == 3 * n_vars { @@ -1245,6 +1343,7 @@ impl VarDisplayRecord { } } +/// Missing values for one long string variable. #[derive(Clone, Debug)] pub struct LongStringMissingValues where @@ -1258,6 +1357,7 @@ where } impl LongStringMissingValues { + /// Decodes these settings using `decoder`. fn decode( &self, decoder: &mut Decoder, @@ -1269,28 +1369,27 @@ impl LongStringMissingValues { } } +/// Long string missing values record in a sytem file. #[derive(Clone, Debug)] pub struct LongStringMissingValueRecord where N: Debug, { + /// The record's file offsets. pub offsets: Range, + + /// The long string missing values. pub values: Vec>, } -static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(1), - count: None, - name: "long string missing values record", -}; - impl LongStringMissingValueRecord { + /// Parses this record from `ext`. fn parse( ext: &Extension, endian: Endian, warn: &mut dyn FnMut(Warning), ) -> Result { - ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?; + ext.check_size(Some(1), None, "long string missing values record")?; let mut input = &ext.data[..]; let mut missing_value_set = Vec::new(); @@ -1335,9 +1434,8 @@ impl LongStringMissingValueRecord { }, )) } -} -impl LongStringMissingValueRecord { + /// Decodes this record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> LongStringMissingValueRecord { let mut mvs = Vec::with_capacity(self.values.len()); for mv in self.values.iter() { @@ -1356,18 +1454,17 @@ impl LongStringMissingValueRecord { } } +/// A character encoding record in a system file. #[derive(Clone, Debug)] -pub struct EncodingRecord(pub String); - -static ENCODING_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(1), - count: None, - name: "encoding record", -}; +pub struct EncodingRecord( + /// The encoding name. + pub String, +); impl EncodingRecord { - fn parse(ext: &Extension, _endian: Endian) -> Result { - ext.check_size(&ENCODING_RECORD)?; + /// Parses this record from `ext`. + fn parse(ext: &Extension) -> Result { + ext.check_size(Some(1), None, "encoding record")?; Ok(Record::Encoding(EncodingRecord( String::from_utf8(ext.data.clone()).map_err(|_| WarningDetails::BadEncodingName)?, @@ -1375,6 +1472,7 @@ impl EncodingRecord { } } +/// The extended number of cases record in a system file. #[derive(Clone, Debug)] pub struct NumberOfCasesRecord { /// Always observed as 1. @@ -1384,15 +1482,9 @@ pub struct NumberOfCasesRecord { pub n_cases: u64, } -static NUMBER_OF_CASES_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(8), - count: Some(2), - name: "extended number of cases record", -}; - impl NumberOfCasesRecord { fn parse(ext: &Extension, endian: Endian) -> Result { - ext.check_size(&NUMBER_OF_CASES_RECORD)?; + ext.check_size(Some(8), Some(2), "extended number of cases record")?; let mut input = &ext.data[..]; let one = endian.parse(read_bytes(&mut input)?); @@ -1736,30 +1828,35 @@ pub struct Extension { } impl Extension { - pub fn check_size(&self, expected: &ExtensionRecord) -> Result<(), WarningDetails> { - match expected.size { - Some(expected_size) if self.size != expected_size => { - return Err(ExtensionWarning::BadRecordSize { - record: expected.name.into(), - size: self.size, - expected_size, - } - .into()); + /// Checks that this extension has `size`-byte elements and `count` elements + /// total. Uses `name` for error reporting. + pub fn check_size( + &self, + size: Option, + count: Option, + name: &'static str, + ) -> Result<(), WarningDetails> { + if let Some(expected_size) = size + && self.size != expected_size + { + Err(ExtensionWarning::BadRecordSize { + record: name, + size: self.size, + expected_size, } - _ => (), - } - match expected.count { - Some(expected_count) if self.count != expected_count => { - return Err(ExtensionWarning::BadRecordCount { - record: expected.name.into(), - count: self.count, - expected_count, - } - .into()); + .into()) + } else if let Some(expected_count) = count + && self.count != expected_count + { + Err(ExtensionWarning::BadRecordCount { + record: name, + count: self.count, + expected_count, } - _ => (), + .into()) + } else { + Ok(()) } - Ok(()) } pub(super) fn read( @@ -1797,10 +1894,10 @@ impl Extension { 3 => IntegerInfoRecord::parse(&extension, endian), 4 => FloatInfoRecord::parse(&extension, endian), 11 => VarDisplayRecord::parse(&extension, var_types, endian, warn), - 7 | 19 => MultipleResponseRecord::parse(&extension, endian), + 7 | 19 => MultipleResponseRecord::parse(&extension), 21 => LongStringValueLabelRecord::parse(&extension, endian), 22 => LongStringMissingValueRecord::parse(&extension, endian, warn), - 20 => EncodingRecord::parse(&extension, endian), + 20 => EncodingRecord::parse(&extension), 16 => NumberOfCasesRecord::parse(&extension, endian), 5 => RawVariableSetRecord::parse(extension), 10 => RawProductInfoRecord::parse(extension), @@ -1865,15 +1962,9 @@ where pub labels: Vec>, } -static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord { - size: Some(1), - count: None, - name: "long string value labels record", -}; - impl LongStringValueLabelRecord { fn parse(ext: &Extension, endian: Endian) -> Result { - ext.check_size(&LONG_STRING_VALUE_LABEL_RECORD)?; + ext.check_size(Some(1), None, "long string value labels record")?; let mut input = &ext.data[..]; let mut label_set = Vec::new(); @@ -2125,8 +2216,8 @@ impl ZTrailer { return Err(Error::new( Some(start_offset..start_offset + 24 + 24 * n_blocks as u64), ErrorDetails::ZlibTrailerOffsetInconsistency { - descriptors: expected_cmp_ofs, - zheader: zheader.ztrailer_offset, + expected: expected_cmp_ofs, + actual: zheader.ztrailer_offset, }, )); }