//! This module facilitates reading records from system files in all of their
//! raw details. Most readers will want to use higher-level interfaces.
+// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
+#![cfg_attr(not(test), warn(missing_docs))]
+
use crate::{
data::{Case, Datum, RawStr, RawString},
dictionary::{VarType, VarWidth},
impl std::error::Error for Error {}
impl Error {
+ /// Constructs an error from `offsets` and `details`.
pub fn new(offsets: Option<Range<u64>>, details: ErrorDetails) -> Self {
Self { offsets, details }
}
/// Details of an [Error].
#[derive(ThisError, Debug)]
pub enum ErrorDetails {
+ /// Not an SPSS system file.
#[error("Not an SPSS system file")]
NotASystemFile,
+ /// Bad [Magic].
#[error("Invalid magic number {0:?}")]
BadMagic([u8; 4]),
+ /// I/O error.
#[error("I/O error ({0})")]
Io(#[from] IoError),
+ /// Invalid SAV compression code.
#[error("Invalid SAV compression code {0}")]
InvalidSavCompression(u32),
+ /// Invalid ZSAV compression code {0}.
#[error("Invalid ZSAV compression code {0}")]
InvalidZsavCompression(u32),
+ /// Document record has document line count ({n}) greater than the maximum number {max}.
#[error(
"Document record has document line count ({n}) greater than the maximum number {max}."
)]
- BadDocumentLength { n: usize, max: usize },
+ BadDocumentLength {
+ /// Number of document lines.
+ n: usize,
+ /// Maximum number of document lines.
+ max: usize,
+ },
+ /// Unrecognized record type.
#[error("Unrecognized record type {0}.")]
BadRecordType(u32),
+ /// Variable width in variable record is not in the valid range -1 to 255.
#[error("Variable width {0} in variable record is not in the valid range -1 to 255.")]
BadVariableWidth(i32),
+ /// In variable record, variable label code is not 0 or 1.
#[error("In variable record, variable label code {0} is not 0 or 1.")]
BadVariableLabelCode(u32),
+ /// Missing value code is not -3, -2, 0, 1, 2, or 3.
#[error("Missing value code ({0}) is not -3, -2, 0, 1, 2, or 3.")]
BadMissingValueCode(i32),
+ /// Numeric missing value code is not -3, -2, 0, 1, 2, or 3.
#[error("Numeric missing value code ({0}) is not -3, -2, 0, 1, 2, or 3.")]
BadNumericMissingValueCode(i32),
+ /// String missing value code is not 0, 1, 2, or 3.
#[error("String missing value code ({0}) is not 0, 1, 2, or 3.")]
BadStringMissingValueCode(i32),
+ /// Number of value labels ({n}) is greater than the maximum number {max}.
#[error("Number of value labels ({n}) is greater than the maximum number {max}.")]
- BadNumberOfValueLabels { n: u32, max: u32 },
-
- #[error(
- "Following value label record, found record type {0} instead of expected type 4 for variable index record"
- )]
- ExpectedVarIndexRecord(u32),
+ BadNumberOfValueLabels {
+ /// Number of value labels.
+ n: u32,
+ /// Maximum number of value labels.
+ max: u32,
+ },
+ /// Following value label record, found record type {0} instead of expected
+ /// type 4 for variable index record.
+ #[
+ error(
+ "Following value label record, found record type {0} instead of expected type 4 for variable index record"
+ )]
+ ExpectedVarIndexRecord(
+ /// Record type.
+ u32,
+ ),
+
+ /// Number of variables indexes for value labels ({n}) is greater than the
+ /// maximum number ({max}).
#[error(
"Number of variables indexes for value labels ({n}) is greater than the maximum number ({max})."
)]
- TooManyVarIndexes { n: u32, max: u32 },
+ TooManyVarIndexes {
+ /// Number of variable indexes.
+ n: u32,
+ /// Maximum number of variable indexes.
+ max: u32,
+ },
+ /// Record type 7 subtype {subtype} is too large with element size {size} and {count} elements.
#[error(
"Record type 7 subtype {subtype} is too large with element size {size} and {count} elements."
)]
- ExtensionRecordTooLarge { subtype: u32, size: u32, count: u32 },
+ ExtensionRecordTooLarge {
+ /// Subtype.
+ subtype: u32,
+ /// Element size in bytes.
+ size: u32,
+ /// Number of elements.
+ count: u32,
+ },
+ /// Unexpected end of file {case_ofs} bytes into a {case_len}-byte case.
#[error("Unexpected end of file {case_ofs} bytes into a {case_len}-byte case.")]
- EofInCase { case_ofs: u64, case_len: usize },
+ EofInCase {
+ /// Offset into case in bytes.
+ case_ofs: u64,
+ /// Expected case length in bytes.
+ case_len: usize,
+ },
+ /// Unexpected end of file {case_ofs} bytes and {n_chunks} compression
+ /// chunks into a compressed case.
#[error(
"Unexpected end of file {case_ofs} bytes and {n_chunks} compression chunks into a compressed case."
)]
- EofInCompressedCase { case_ofs: u64, n_chunks: usize },
-
- #[error("Data ends {case_ofs} bytes into a compressed case.")]
- PartialCompressedCase { case_ofs: u64 },
-
- #[error("At {0} bytes into compressed case, a string was found where a number was expected.")]
- CompressedNumberExpected(u64),
-
- #[error("At {0} bytes into compressed case, a number was found where a string was expected.")]
- CompressedStringExpected(u64),
+ EofInCompressedCase {
+ /// Offset into case in bytes.
+ case_ofs: u64,
+ /// Number of compression codes consumed.
+ n_chunks: usize,
+ },
+ /// Impossible ztrailer_offset {0:#x}.
#[error("Impossible ztrailer_offset {0:#x}.")]
- ImpossibleZTrailerOffset(u64),
+ ImpossibleZTrailerOffset(
+ /// `ztrailer_offset`
+ u64,
+ ),
+ /// ZLIB header's zlib_offset is {actual:#x} instead of expected
+ /// {expected:#x}.
#[error("ZLIB header's zlib_offset is {actual:#x} instead of expected {expected:#x}.")]
- UnexpectedZHeaderOffset { actual: u64, expected: u64 },
+ UnexpectedZHeaderOffset {
+ /// Actual `zlib_offset`.
+ actual: u64,
+ /// Expected `zlib_offset`.
+ expected: u64,
+ },
+ /// Invalid ZLIB trailer length {0}.
#[error("Invalid ZLIB trailer length {0}.")]
- InvalidZTrailerLength(u64),
-
- #[error(
+ InvalidZTrailerLength(
+ /// ZLIB trailer length.
+ u64,
+ ),
+
+ /// ZLIB trailer bias {actual} is not {} as expected from file header bias.
+ #[
+ error(
"ZLIB trailer bias {actual} is not {} as expected from file header bias.",
DisplayPlainF64(*expected)
)]
- WrongZlibTrailerBias { actual: i64, expected: f64 },
+ WrongZlibTrailerBias {
+ /// ZLIB trailer bias read from file.
+ actual: i64,
+ /// Expected ZLIB trailer bias.
+ expected: f64,
+ },
+ /// ZLIB trailer \"zero\" field has nonzero value {0}.
#[error("ZLIB trailer \"zero\" field has nonzero value {0}.")]
- WrongZlibTrailerZero(u64),
+ WrongZlibTrailerZero(
+ /// Actual value that should have been zero.
+ u64,
+ ),
+ /// ZLIB trailer specifies unexpected {0}-byte block size.
#[error("ZLIB trailer specifies unexpected {0}-byte block size.")]
- WrongZlibTrailerBlockSize(u32),
+ WrongZlibTrailerBlockSize(
+ /// Block size read from file.
+ u32,
+ ),
+ /// Block count in ZLIB trailer differs from expected block count calculated
+ /// from trailer length.
#[error(
"Block count {n_blocks} in ZLIB trailer differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}."
)]
BadZlibTrailerNBlocks {
+ /// Number of blocks.
n_blocks: u32,
+ /// Expected number of blocks.
expected_n_blocks: u64,
+ /// ZLIB trailer length in bytes.
ztrailer_len: u64,
},
+ /// ZLIB block descriptor reported uncompressed data offset different from
+ /// expected.
#[error(
"ZLIB block descriptor {index} reported uncompressed data offset {actual:#x}, when {expected:#x} was expected."
)]
ZlibTrailerBlockWrongUncmpOfs {
+ /// Block descriptor index.
index: usize,
+ /// Actual uncompressed data offset.
actual: u64,
+ /// Expected uncompressed data offset.
expected: u64,
},
+ /// ZLIB block descriptor {index} reported compressed data offset
+ /// {actual:#x}, when {expected:#x} was expected.
#[error(
"ZLIB block descriptor {index} reported compressed data offset {actual:#x}, when {expected:#x} was expected."
)]
ZlibTrailerBlockWrongCmpOfs {
+ /// Block descriptor index.
index: usize,
+ /// Actual compressed data offset.
actual: u64,
+ /// Expected compressed data offset.
expected: u64,
},
+ /// ZLIB block descriptor {index} reports compressed size {compressed_size}
+ /// and uncompressed size {uncompressed_size}.
#[error(
"ZLIB block descriptor {index} reports compressed size {compressed_size} and uncompressed size {uncompressed_size}."
)]
ZlibExpansion {
+ /// Block descriptor index.
index: usize,
+ /// Compressed size.
compressed_size: u32,
+ /// Uncompressed size.
uncompressed_size: u32,
},
+ /// ZLIB trailer at unexpected offset.
#[error(
- "ZLIB trailer is at offset {zheader:#x} but {descriptors:#x} would be expected from block descriptors."
+ "ZLIB trailer is at offset {actual:#x} but {expected:#x} would be expected from block descriptors."
)]
- ZlibTrailerOffsetInconsistency { descriptors: u64, zheader: u64 },
+ ZlibTrailerOffsetInconsistency {
+ /// Expected offset.
+ expected: u64,
+ /// Actual offset.
+ actual: u64,
+ },
+ /// File metadata says it contains {expected} cases, but {actual} cases were read.
#[error("File metadata says it contains {expected} cases, but {actual} cases were read.")]
- WrongNumberOfCases { expected: u64, actual: u64 },
+ WrongNumberOfCases {
+ /// Expected number of cases.
+ expected: u64,
+ /// Actual number of cases.
+ actual: u64,
+ },
+ /// Encoding error.
#[error("{0}")]
- EncodingError(EncodingError),
+ EncodingError(
+ /// The error.
+ #[from]
+ EncodingError,
+ ),
}
/// A warning reading a raw system file record.
impl std::error::Error for Warning {}
impl Warning {
+ /// Constructs a new [Warning] from `offsets` and `details`.
pub fn new(offsets: Option<Range<u64>>, details: impl Into<WarningDetails>) -> Self {
Self {
offsets,
#[error("In ZLIB trailer: {0}")]
ZlibTrailer(#[from] ZlibTrailerWarning),
+ /// Bad encoding name.
#[error("Encoding record contains an encoding name that is not valid UTF-8.")]
BadEncodingName,
+ /// Mis-encoded bytes in string.
// XXX This is risky because `text` might be arbitarily long.
#[error("Text string contains invalid bytes for {encoding} encoding: {text:?}")]
- MalformedString { encoding: String, text: String },
+ MalformedString {
+ /// The encoding.
+ encoding: String,
+ /// The problematic string.
+ text: String,
+ },
+ /// Encoding error.
#[error("{0}")]
- EncodingError(EncodingError),
+ EncodingError(#[from] EncodingError),
}
impl From<IoError> for WarningDetails {
}
}
+/// A warning for a file header.
#[derive(ThisError, Debug)]
pub enum HeaderWarning {
+ /// Unexpected compression bias.
#[error("Compression bias is {0} instead of the usual values of 0 or 100.")]
UnexpectedBias(f64),
}
+/// Warning for a variable record.
#[derive(ThisError, Debug)]
pub enum VariableWarning {
- #[error("Missing value record with range not allowed for string variable")]
+ /// Missing value record with range not allowed for string variable.
+ #[error("Missing value record with range not allowed for string variable.")]
MissingValueStringRange,
+ /// Missing value not allowed for long string continuation.
#[error("Missing value not allowed for long string continuation")]
MissingValueContinuation,
}
+/// Warning for an extension record.
#[derive(ThisError, Debug)]
pub enum ExtensionWarning {
/// Unexpected end of data.
/// Invalid record size.
#[error("{record} has bad size {size} bytes instead of the expected {expected_size}.")]
BadRecordSize {
- record: String,
+ /// Name of the record.
+ record: &'static str,
+ /// Size of the elements in the record, in bytes.
size: u32,
+ /// Expected size of the elements in the record, in bytes.
expected_size: u32,
},
/// Invalid record count.
#[error("{record} has bad count {count} instead of the expected {expected_count}.")]
BadRecordCount {
- record: String,
+ /// Name of the record.
+ record: &'static str,
+ /// Number of elements in the record.
count: u32,
+ /// Expected number of elements in the record.
expected_count: u32,
},
}
+/// Warning for a value label record.
#[derive(ThisError, Debug)]
pub enum ValueLabelWarning {
/// At least one valid variable index for value labels is required but none were specified.
/// Mixed variable types in value label record.
#[error("First variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", !var_type)]
MixedVarTypes {
+ /// Variable type.
var_type: VarType,
+ /// Indexes of variables with the other type.
wrong_types: Vec<u32>,
},
#[error(
"One or more variable indexes were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}"
)]
- InvalidVarIndexes { max: usize, invalid: Vec<u32> },
+ InvalidVarIndexes {
+ /// Maximum variable index.
+ max: usize,
+ /// Invalid variable indexes.
+ invalid: Vec<u32>,
+ },
}
+/// Warning for a long string missing value record.
#[derive(ThisError, Debug)]
pub enum LongStringMissingValuesWarning {
+ /// Invalid value length.
#[error("Value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
- BadValueLength { offset: u64, value_len: u32 },
+ BadValueLength {
+ /// Offset of the value length.
+ offset: u64,
+ /// Actual value length.
+ value_len: u32,
+ },
+ /// Invalid variable name.
#[error("Invalid variable name. {0}")]
- InvalidVariableName(IdError),
+ InvalidVariableName(
+ /// Variable name error.
+ IdError,
+ ),
}
+/// Warning for a long string value label record.
#[derive(ThisError, Debug)]
pub enum LongStringValueLabelWarning {
+ /// Invalid variable name.
#[error("Invalid variable name. {0}")]
- InvalidVariableName(IdError),
+ InvalidVariableName(
+ /// Variable name error.
+ IdError,
+ ),
}
+/// Warning for a long variable name record.
#[derive(ThisError, Debug)]
pub enum LongNameWarning {
+ /// Missing `=`.
#[error("Missing `=` separator.")]
LongNameMissingEquals,
+ /// Invalid short name.
#[error("Invalid short name. {0}")]
- InvalidShortName(IdError),
+ InvalidShortName(
+ /// Short variable name error.
+ IdError,
+ ),
+ /// Invalid long name.
#[error("Invalid long name. {0}")]
- InvalidLongName(IdError),
+ InvalidLongName(
+ /// Long variable name error.
+ IdError,
+ ),
}
+/// Warning for a very long string variable record.
#[derive(ThisError, Debug)]
pub enum VeryLongStringWarning {
+ /// Invalid variable name.
#[error("Invalid variable name. {0}")]
- InvalidLongStringName(IdError),
+ InvalidLongStringName(
+ /// Variable name error.
+ IdError,
+ ),
+ /// Missing delimiter.
#[error("Missing delimiter in {0:?}.")]
VeryLongStringMissingDelimiter(String),
+ /// Invalid length.
#[error("Invalid length in {0:?}.")]
- VeryLongStringInvalidLength(String),
+ VeryLongStringInvalidLength(
+ /// Length.
+ String,
+ ),
}
+/// Warning for a multiple response set record.
#[derive(ThisError, Debug)]
pub enum MultipleResponseWarning {
+ /// Invalid multiple response set name.
#[error("Invalid multiple response set name. {0}")]
- InvalidMrSetName(IdError),
+ InvalidMrSetName(
+ /// Variable name error.
+ IdError,
+ ),
+ /// Invalid variable name.
#[error("Invalid variable name. {0}")]
- InvalidMrSetVariableName(IdError),
+ InvalidMrSetVariableName(
+ /// Variable name error.
+ IdError,
+ ),
+ /// Invalid multiple dichotomy label type.
#[error("Invalid multiple dichotomy label type.")]
InvalidMultipleDichotomyLabelType,
+ /// Invalid multiple response type.
#[error("Invalid multiple response type.")]
InvalidMultipleResponseType,
+ /// Syntax error.
#[error("Syntax error ({0}).")]
- MultipleResponseSyntaxError(&'static str),
+ MultipleResponseSyntaxError(
+ /// Detailed error.
+ &'static str,
+ ),
+ /// Syntax error parsing counted string (missing trailing space).
#[error("Syntax error parsing counted string (missing trailing space).")]
CountedStringMissingSpace,
+ /// Syntax error parsing counted string (invalid UTF-8).
#[error("Syntax error parsing counted string (invalid UTF-8).")]
CountedStringInvalidUTF8,
+ /// Syntax error parsing counted string (invalid length).
#[error("Syntax error parsing counted string (invalid length {0:?}).")]
- CountedStringInvalidLength(String),
+ CountedStringInvalidLength(
+ /// Length.
+ String,
+ ),
+ /// Syntax error parsing counted string (length goes past end of input).
#[error("Syntax error parsing counted string (length {0:?} goes past end of input).")]
- CountedStringTooLong(usize),
+ CountedStringTooLong(
+ /// Length.
+ usize,
+ ),
}
+/// Warning for a file or variable attribute record.
#[derive(ThisError, Debug)]
pub enum AttributeWarning {
+ /// Invalid attribute name.
#[error("Invalid attribute name. {0}")]
- InvalidAttributeName(IdError),
+ InvalidAttributeName(
+ /// Attribute name error.
+ IdError,
+ ),
+ /// Invalid variable name in attribute record.
#[error("Invalid variable name in attribute record. {0}")]
- InvalidAttributeVariableName(IdError),
+ InvalidAttributeVariableName(
+ /// Variable name error.
+ IdError,
+ ),
+ /// Attribute record missing left parenthesis.
#[error("Attribute record missing left parenthesis, in {0:?}.")]
- AttributeMissingLParen(String),
+ AttributeMissingLParen(
+ /// Bad syntax.
+ String,
+ ),
+ /// Attribute lacks value.
#[error("Attribute for {name}[{}] lacks value.", index + 1)]
- AttributeMissingValue { name: Identifier, index: usize },
+ AttributeMissingValue {
+ /// Attribute name.
+ name: Identifier,
+ /// 0-based index.
+ index: usize,
+ },
+ /// Attribute missing quotations.
#[error("Attribute for {name}[{}] missing quotations.", index + 1)]
- AttributeMissingQuotes { name: Identifier, index: usize },
+ AttributeMissingQuotes {
+ /// Attribute name.
+ name: Identifier,
+ /// 0-based index.
+ index: usize,
+ },
+ /// Variable attribute missing `:`.
#[error("Variable attribute missing `:`.")]
VariableAttributeMissingColon,
+ /// Duplicate attributes for variable.
#[error("Duplicate attributes for variable {variable}: {}.", attributes.iter().join(", "))]
DuplicateVariableAttributes {
+ /// Variable name.
variable: Identifier,
+ /// Attributes with duplicates.
attributes: Vec<Identifier>,
},
+ /// Duplicate dataset attributes.
#[error("Duplicate dataset attributes with names: {}.", attributes.iter().join(", "))]
- DuplicateFileAttributes { attributes: Vec<Identifier> },
+ DuplicateFileAttributes {
+ /// Attributes with duplicates.
+ attributes: Vec<Identifier>,
+ },
+ /// File attributes record contains trailing garbage.
#[error("File attributes record contains trailing garbage.")]
FileAttributesTrailingGarbage,
}
+/// Warning for a variable display record.
#[derive(ThisError, Debug)]
pub enum VariableDisplayWarning {
+ /// Wrong number of variable display items.
#[error("Record contains {count} items but should contain either {first} or {second}.")]
InvalidVariableDisplayCount {
+ /// Actual count.
count: usize,
+ /// First valid count.
first: usize,
+ /// Second valid count.
second: usize,
},
+ /// Invalid variable measurement level value.
#[error("Invalid variable measurement level value {0}.")]
- InvalidMeasurement(u32),
+ InvalidMeasurement(
+ /// Invalid value.
+ u32,
+ ),
+ /// Invalid variable display alignment value.
#[error("Invalid variable display alignment value {0}.")]
- InvalidAlignment(u32),
+ InvalidAlignment(
+ /// Invalid value.
+ u32,
+ ),
}
+/// Warning for a variable sets record.
#[derive(ThisError, Debug)]
pub enum VariableSetWarning {
+ /// Invalid variable name.
#[error("Invalid variable name. {0}")]
- InvalidVariableSetName(IdError),
+ InvalidVariableSetName(
+ /// Variable name error.
+ IdError,
+ ),
+ /// Missing name delimiter.
#[error("Missing name delimiter.")]
VariableSetMissingEquals,
}
+/// Warning for a ZLIB trailer record.
#[derive(ThisError, Debug)]
pub enum ZlibTrailerWarning {
+ /// Wrong block size.
#[error(
"ZLIB block descriptor {index} reported block size {actual:#x}, when {expected:#x} was expected."
)]
ZlibTrailerBlockWrongSize {
+ /// 0-based block descriptor index.
index: usize,
+ /// Actual block size.
actual: u32,
+ /// Expected block size.
expected: u32,
},
+ /// Block too big.
#[error(
"ZLIB block descriptor {index} reported block size {actual:#x}, when at most {max_expected:#x} was expected."
)]
ZlibTrailerBlockTooBig {
+ /// 0-based block descriptor index.
index: usize,
+ /// Actual block size.
actual: u32,
+ /// Maximum expected block size.
max_expected: u32,
},
}
/// Decodes this record into a [DecodedRecord] using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> Result<DecodedRecord, Error> {
Ok(match self {
- Record::Header(record) => record.decode(decoder),
- Record::Variable(record) => record.decode(decoder),
+ Record::Header(record) => DecodedRecord::Header(record.decode(decoder)),
+ Record::Variable(record) => DecodedRecord::Variable(record.decode(decoder)),
Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
- Record::Document(record) => record.decode(decoder),
+ Record::Document(record) => DecodedRecord::Document(record.decode(decoder)),
Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
- Record::MultipleResponse(record) => record.decode(decoder),
+ Record::MultipleResponse(record) => {
+ DecodedRecord::MultipleResponse(record.decode(decoder))
+ }
Record::LongStringValueLabels(record) => {
DecodedRecord::LongStringValueLabels(record.decode(decoder))
}
match get_encoding(encoding, character_code) {
Ok(encoding) => Ok(encoding),
- Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, ErrorDetails::EncodingError(err))),
+ Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, err.into())),
Err(err) => {
- warn(Warning::new(None, WarningDetails::EncodingError(err)));
+ warn(Warning::new(None, err));
// Warn that we're using the default encoding.
Ok(default_encoding())
}
/// An 8-byte [Datum] but we don't know the string width or character encoding.
#[derive(Copy, Clone)]
pub enum RawDatum {
- Number(Option<f64>),
- String([u8; 8]),
+ /// Number.
+ Number(
+ /// Numeric value.
+ ///
+ /// `None` represents the system-missing value.
+ Option<f64>,
+ ),
+ /// String.
+ String(
+ // String value.
+ //
+ // The true string width and character encoding are unknown.
+ [u8; 8],
+ ),
}
impl Debug for RawDatum {
}
}
+/// An 8-byte raw string whose type and encoding are unknown.
#[derive(Copy, Clone)]
-pub struct RawStrArray<const N: usize>(pub [u8; N]);
+pub struct RawStrArray<const N: usize>(
+ /// Content.
+ pub [u8; N],
+);
impl<const N: usize> From<[u8; N]> for RawStrArray<N> {
fn from(source: [u8; N]) -> Self {
endian::{Endian, Parse},
identifier::{Error as IdError, Identifier},
sys::raw::{
- read_bytes, read_string, read_vec, AttributeWarning, DecodedRecord, Decoder, Error,
- ErrorDetails, ExtensionWarning, HeaderWarning, LongNameWarning,
- LongStringMissingValuesWarning, LongStringValueLabelWarning, Magic,
- MultipleResponseWarning, RawDatum, RawStrArray, RawWidth, Record, UntypedDatum,
- ValueLabelWarning, VarTypes, VariableDisplayWarning, VariableSetWarning, VariableWarning,
- VeryLongStringWarning, Warning, WarningDetails, ZlibTrailerWarning,
+ read_bytes, read_string, read_vec, AttributeWarning, Decoder, Error, ErrorDetails,
+ ExtensionWarning, HeaderWarning, LongNameWarning, LongStringMissingValuesWarning,
+ LongStringValueLabelWarning, Magic, MultipleResponseWarning, RawDatum, RawStrArray,
+ RawWidth, Record, UntypedDatum, ValueLabelWarning, VarTypes, VariableDisplayWarning,
+ VariableSetWarning, VariableWarning, VeryLongStringWarning, Warning, WarningDetails,
+ ZlibTrailerWarning,
},
};
use binrw::BinRead;
+/// Type of compression in a system file.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Compression {
+ /// Simple bytecode-based compression.
Simple,
+ /// [ZLIB] compression.
+ ///
+ /// [ZLIB]: https://www.zlib.net/
ZLib,
}
+/// A file header record in a system file.
#[derive(Clone)]
pub struct HeaderRecord<S>
where
}
impl HeaderRecord<RawString> {
- pub fn read<R: Read + Seek>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error> {
+ /// Reads a header record from `r`, reporting any warnings via `warn`.
+ pub fn read<R>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error>
+ where
+ R: Read + Seek,
+ {
let header_bytes = read_vec(r, 176).map_err(|e| {
Error::new(
None,
})
}
- pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+ /// Decodes this record with `decoder` and returns the decoded version.
+ pub fn decode(self, decoder: &mut Decoder) -> HeaderRecord<String> {
let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
let file_label = decoder.decode(&self.file_label).to_string();
let creation_date = decoder.decode(&self.creation_date).to_string();
let creation_time = decoder.decode(&self.creation_time).to_string();
- DecodedRecord::Header(HeaderRecord {
+ HeaderRecord {
eye_catcher,
weight_index: self.weight_index,
n_cases: self.n_cases,
creation_date,
creation_time,
endian: self.endian,
- })
+ }
}
}
}
impl MissingValues {
- pub fn read<R>(
+ fn read<R>(
r: &mut R,
offsets: Range<u64>,
raw_width: RawWidth,
}
}
+/// A variable record in a system file.
#[derive(Clone)]
pub struct VariableRecord<S>
where
}
impl VariableRecord<RawString> {
- pub fn read<R: Read + Seek>(
+ /// Reads a variable record from `r`.
+ pub fn read<R>(
r: &mut R,
endian: Endian,
warn: &mut dyn FnMut(Warning),
- ) -> Result<Record, Error> {
+ ) -> Result<Record, Error>
+ where
+ R: Read + Seek,
+ {
#[derive(BinRead)]
struct RawVariableRecord {
width: i32,
}))
}
- pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
- DecodedRecord::Variable(VariableRecord {
+ /// Decodes a variable record using `decoder`.
+ pub fn decode(self, decoder: &mut Decoder) -> VariableRecord<String> {
+ VariableRecord {
offsets: self.offsets.clone(),
width: self.width,
name: decoder.decode(&self.name).to_string(),
.label
.as_ref()
.map(|label| decoder.decode(label).to_string()),
- })
+ }
}
}
+/// A value and label in a system file.
#[derive(Clone, Debug)]
pub struct ValueLabel<D, S>
where
D: Debug,
S: Debug,
{
+ /// The value being labeled.
pub datum: D,
+ /// The label.
pub label: S,
}
+/// A value label record in a system file.
+///
+/// This represents both the type-3 and type-4 records together, since they are
+/// always paired anyway.
#[derive(Clone)]
pub struct ValueLabelRecord<D, S>
where
})))
}
+ /// Decodes a value label record using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> ValueLabelRecord<RawDatum, String> {
let labels = self
.labels
}
}
+/// A document record in a system file.
#[derive(Clone, Debug)]
pub struct DocumentRecord<S>
where
S: Debug,
{
+ /// The range of file offsets occupied by the record.
pub offsets: Range<u64>,
/// The document, as an array of lines. Raw lines are exactly 80 bytes long
pub lines: Vec<S>,
}
+/// One line in a document.
pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
/// Length of a line in a document. Document lines are fixed-length and
/// the maximum number that will fit in a 32-bit space.
pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
- pub fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+ /// Reads a document record from `r`.
+ pub fn read<R>(r: &mut R, endian: Endian) -> Result<Record, Error>
+ where
+ R: Read + Seek,
+ {
let start_offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
let n = n as usize;
}
}
- pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
- DecodedRecord::Document(DocumentRecord {
+ /// Decodes the document record using `decoder`.
+ pub fn decode(self, decoder: &mut Decoder) -> DocumentRecord<String> {
+ DocumentRecord {
offsets: self.offsets.clone(),
lines: self
.lines
.iter()
.map(|s| decoder.decode_slice(&s.0).to_string())
.collect(),
- })
+ }
}
}
+/// Constraints on an extension record in a system file.
pub struct ExtensionRecord<'a> {
+ /// The allowed size for elements in the extension record, or `None` to not
+ /// enforce a particular size.
pub size: Option<u32>,
+
+ /// The allowed number elements in the extension record, or `None` to not
+ /// enforce a particular count.
pub count: Option<u32>,
+
+ /// The name of the record, for error messages.
pub name: &'a str,
}
+/// An integer info record in a system file.
#[derive(Clone, Debug)]
pub struct IntegerInfoRecord {
+ /// File offsets occupied by the record.
pub offsets: Range<u64>,
+
+ /// Version number.
+ ///
+ /// e.g. `(1,2,3)` for version 1.2.3.
pub version: (i32, i32, i32),
+
+ /// Identifies the type of machine.
+ ///
+ /// Mostly useless. PSPP uses value -1.
pub machine_code: i32,
+
+ /// Floating point representation (1 for IEEE 754).
pub floating_point_rep: i32,
+
+ /// [Compression].
pub compression_code: i32,
+
+ /// Endianness.
pub endianness: i32,
+
+ /// Character encoding (usually a code page number).
pub character_code: i32,
}
-static INTEGER_INFO_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(4),
- count: Some(8),
- name: "integer record",
-};
-
impl IntegerInfoRecord {
+ /// Parses this record from `ext`.
pub fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
- ext.check_size(&INTEGER_INFO_RECORD)?;
+ ext.check_size(Some(4), Some(8), "integer record")?;
let mut input = &ext.data[..];
let data: Vec<i32> = (0..8)
}
}
-static FLOAT_INFO_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(8),
- count: Some(3),
- name: "floating point record",
-};
-
impl FloatInfoRecord {
+ /// Parses this record from `ext`.
pub fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
- ext.check_size(&FLOAT_INFO_RECORD)?;
+ ext.check_size(Some(8), Some(3), "floating point record")?;
let mut input = &ext.data[..];
let data: Vec<f64> = (0..3)
}
}
+/// A floating-point info record.
#[derive(Clone, Debug)]
pub struct FloatInfoRecord {
+ /// Value used for system-missing values.
pub sysmis: f64,
+
+ /// Highest numeric value (e.g. [f64::MAX]).
pub highest: f64,
+
+ /// Smallest numeric value (e.g. -[f64::MAX]).
pub lowest: f64,
}
+/// Long variable names record.
#[derive(Clone, Debug)]
-pub struct RawLongNamesRecord(TextRecord);
+pub struct RawLongNamesRecord(
+ /// Text contents of record.
+ TextRecord,
+);
impl RawLongNamesRecord {
+ /// Parses this record from `extension`.
pub fn parse(extension: Extension) -> Result<Record, WarningDetails> {
Ok(Record::LongNames(Self(TextRecord::parse(
extension,
"long names record",
)?)))
}
+
+ /// Decodes this record using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> LongNamesRecord {
let input = decoder.decode(&self.0.text);
let mut names = Vec::new();
}
}
+/// An extension record whose contents are a text string.
#[derive(Clone, Debug)]
pub struct TextRecord {
+ /// Range of file offsets for this record in bytes.
pub offsets: Range<u64>,
/// The text content of the record.
}
impl TextRecord {
- pub fn parse(extension: Extension, name: &str) -> Result<TextRecord, WarningDetails> {
- extension.check_size(&ExtensionRecord {
- size: Some(1),
- count: None,
- name,
- })?;
+ /// Parses this record from `extension`.
+ pub fn parse(extension: Extension, name: &'static str) -> Result<TextRecord, WarningDetails> {
+ extension.check_size(Some(1), None, name)?;
Ok(Self {
offsets: extension.offsets,
text: extension.data.into(),
}
}
+/// A very long string parsed from a [VeryLongStringsRecord].
#[derive(Clone, Debug)]
pub struct VeryLongString {
+ /// Short name of very long string variable.
pub short_name: Identifier,
+
+ /// Length of very long string variable (in `256..=32767`).
pub length: u16,
}
impl VeryLongString {
+ /// Parses a [VeryLongString] from `input` using `decoder`.
fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, WarningDetails> {
let Some((short_name, length)) = input.split_once('=') else {
return Err(VeryLongStringWarning::VeryLongStringMissingDelimiter(input.into()).into());
}
}
+/// A very long string record as text.
#[derive(Clone, Debug)]
pub struct RawVeryLongStringsRecord(TextRecord);
+/// A parsed very long string record.
#[derive(Clone, Debug)]
-pub struct VeryLongStringsRecord(pub Vec<VeryLongString>);
+pub struct VeryLongStringsRecord(
+ /// The very long strings.
+ pub Vec<VeryLongString>,
+);
impl RawVeryLongStringsRecord {
+ /// Parses this record from `extension`.
pub fn parse(extension: Extension) -> Result<Record, WarningDetails> {
Ok(Record::VeryLongStrings(Self(TextRecord::parse(
extension,
"very long strings record",
)?)))
}
+
+ /// Decodes this record using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> VeryLongStringsRecord {
let input = decoder.decode(&self.0.text);
let mut very_long_strings = Vec::new();
}
}
+/// The type of a multiple-response set.
#[derive(Clone, Debug)]
pub enum MultipleResponseType {
+ /// Multiple-dichotomy set.
MultipleDichotomy {
+ /// The value that is counted in the set.
value: RawString,
+
+ /// What categories are labeled.
labels: CategoryLabels,
},
+
+ /// Multiple-category set.
MultipleCategory,
}
impl MultipleResponseType {
+ /// Parses a [MultipleResponseType] from `input`, returning the type and the
+ /// input remaining to be parsed.
fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), WarningDetails> {
let (mr_type, input) = match input.split_first() {
Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
}
}
+/// A multiple-response set in a system file.
#[derive(Clone, Debug)]
pub struct MultipleResponseSet<I, S>
where
I: Debug,
S: Debug,
{
+ /// The set's name.
pub name: I,
+ /// The set's label.
pub label: S,
+ /// The type of multiple-response set.
pub mr_type: MultipleResponseType,
+ /// Short names of the variables in the set.
pub short_names: Vec<I>,
}
impl MultipleResponseSet<RawString, RawString> {
+ /// Parses a multiple-response set from `input`. Returns the set and the
+ /// input remaining to be parsed following the set.
fn parse(input: &[u8]) -> Result<(Self, &[u8]), WarningDetails> {
let Some(equals) = input.iter().position(|&b| b == b'=') else {
return Err(MultipleResponseWarning::MultipleResponseSyntaxError("missing `=`").into());
))
}
+ /// Decodes this multiple-response set using `decoder`. `offsets` is used
+ /// for issuing warnings.
fn decode(
&self,
offsets: &Range<u64>,
}
}
+/// A multiple-response set record in a system file.
#[derive(Clone, Debug)]
pub struct MultipleResponseRecord<I, S>
where
I: Debug,
S: Debug,
{
+ /// File offsets of the record.
pub offsets: Range<u64>,
+
+ /// The multiple-response sets.
pub sets: Vec<MultipleResponseSet<I, S>>,
}
-static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(1),
- count: None,
- name: "multiple response set record",
-};
-
impl MultipleResponseRecord<RawString, RawString> {
- fn parse(ext: &Extension, _endian: Endian) -> Result<Record, WarningDetails> {
- ext.check_size(&MULTIPLE_RESPONSE_RECORD)?;
+ /// Parses a multiple-response set from `ext`.
+ fn parse(ext: &Extension) -> Result<Record, WarningDetails> {
+ ext.check_size(Some(1), None, "multiple response set record")?;
let mut input = &ext.data[..];
let mut sets = Vec::new();
}
impl MultipleResponseRecord<RawString, RawString> {
- pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+ /// Decodes this record using `decoder`.
+ pub fn decode(self, decoder: &mut Decoder) -> MultipleResponseRecord<Identifier, String> {
let mut sets = Vec::new();
for set in self.sets.iter() {
if let Some(set) = set
sets.push(set);
}
}
- DecodedRecord::MultipleResponse(MultipleResponseRecord {
+ MultipleResponseRecord {
offsets: self.offsets,
sets,
- })
+ }
}
}
}
}
+/// Variable display settings for one variable, in a system file.
#[derive(Clone, Debug)]
pub struct VarDisplay {
+ /// Measurement level.
pub measure: Option<Measure>,
+
+ /// Variable display width.
pub width: Option<u32>,
+
+ /// Variable alignment.
pub alignment: Option<Alignment>,
}
+/// A variable display record in a system file.
#[derive(Clone, Debug)]
-pub struct VarDisplayRecord(pub Vec<VarDisplay>);
+pub struct VarDisplayRecord(
+ /// Variable display settings for each variable.
+ pub Vec<VarDisplay>,
+);
impl VarDisplayRecord {
+ /// Parses a variable display record from `ext` given variable types `var_types`.
fn parse(
ext: &Extension,
var_types: &VarTypes,
endian: Endian,
warn: &mut dyn FnMut(Warning),
) -> Result<Record, WarningDetails> {
- static VAR_DISPLAY_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(4),
- count: None,
- name: "variable display record",
- };
- ext.check_size(&VAR_DISPLAY_RECORD)?;
+ ext.check_size(Some(4), None, "variable display record")?;
let n_vars = var_types.n_vars();
let has_width = if ext.count as usize == 3 * n_vars {
}
}
+/// Missing values for one long string variable.
#[derive(Clone, Debug)]
pub struct LongStringMissingValues<N>
where
}
impl LongStringMissingValues<RawString> {
+ /// Decodes these settings using `decoder`.
fn decode(
&self,
decoder: &mut Decoder,
}
}
+/// Long string missing values record in a sytem file.
#[derive(Clone, Debug)]
pub struct LongStringMissingValueRecord<N>
where
N: Debug,
{
+ /// The record's file offsets.
pub offsets: Range<u64>,
+
+ /// The long string missing values.
pub values: Vec<LongStringMissingValues<N>>,
}
-static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(1),
- count: None,
- name: "long string missing values record",
-};
-
impl LongStringMissingValueRecord<RawString> {
+ /// Parses this record from `ext`.
fn parse(
ext: &Extension,
endian: Endian,
warn: &mut dyn FnMut(Warning),
) -> Result<Record, WarningDetails> {
- ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?;
+ ext.check_size(Some(1), None, "long string missing values record")?;
let mut input = &ext.data[..];
let mut missing_value_set = Vec::new();
},
))
}
-}
-impl LongStringMissingValueRecord<RawString> {
+ /// Decodes this record using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> LongStringMissingValueRecord<Identifier> {
let mut mvs = Vec::with_capacity(self.values.len());
for mv in self.values.iter() {
}
}
+/// A character encoding record in a system file.
#[derive(Clone, Debug)]
-pub struct EncodingRecord(pub String);
-
-static ENCODING_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(1),
- count: None,
- name: "encoding record",
-};
+pub struct EncodingRecord(
+ /// The encoding name.
+ pub String,
+);
impl EncodingRecord {
- fn parse(ext: &Extension, _endian: Endian) -> Result<Record, WarningDetails> {
- ext.check_size(&ENCODING_RECORD)?;
+ /// Parses this record from `ext`.
+ fn parse(ext: &Extension) -> Result<Record, WarningDetails> {
+ ext.check_size(Some(1), None, "encoding record")?;
Ok(Record::Encoding(EncodingRecord(
String::from_utf8(ext.data.clone()).map_err(|_| WarningDetails::BadEncodingName)?,
}
}
+/// The extended number of cases record in a system file.
#[derive(Clone, Debug)]
pub struct NumberOfCasesRecord {
/// Always observed as 1.
pub n_cases: u64,
}
-static NUMBER_OF_CASES_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(8),
- count: Some(2),
- name: "extended number of cases record",
-};
-
impl NumberOfCasesRecord {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
- ext.check_size(&NUMBER_OF_CASES_RECORD)?;
+ ext.check_size(Some(8), Some(2), "extended number of cases record")?;
let mut input = &ext.data[..];
let one = endian.parse(read_bytes(&mut input)?);
}
impl Extension {
- pub fn check_size(&self, expected: &ExtensionRecord) -> Result<(), WarningDetails> {
- match expected.size {
- Some(expected_size) if self.size != expected_size => {
- return Err(ExtensionWarning::BadRecordSize {
- record: expected.name.into(),
- size: self.size,
- expected_size,
- }
- .into());
+ /// Checks that this extension has `size`-byte elements and `count` elements
+ /// total. Uses `name` for error reporting.
+ pub fn check_size(
+ &self,
+ size: Option<u32>,
+ count: Option<u32>,
+ name: &'static str,
+ ) -> Result<(), WarningDetails> {
+ if let Some(expected_size) = size
+ && self.size != expected_size
+ {
+ Err(ExtensionWarning::BadRecordSize {
+ record: name,
+ size: self.size,
+ expected_size,
}
- _ => (),
- }
- match expected.count {
- Some(expected_count) if self.count != expected_count => {
- return Err(ExtensionWarning::BadRecordCount {
- record: expected.name.into(),
- count: self.count,
- expected_count,
- }
- .into());
+ .into())
+ } else if let Some(expected_count) = count
+ && self.count != expected_count
+ {
+ Err(ExtensionWarning::BadRecordCount {
+ record: name,
+ count: self.count,
+ expected_count,
}
- _ => (),
+ .into())
+ } else {
+ Ok(())
}
- Ok(())
}
pub(super) fn read<R: Read + Seek>(
3 => IntegerInfoRecord::parse(&extension, endian),
4 => FloatInfoRecord::parse(&extension, endian),
11 => VarDisplayRecord::parse(&extension, var_types, endian, warn),
- 7 | 19 => MultipleResponseRecord::parse(&extension, endian),
+ 7 | 19 => MultipleResponseRecord::parse(&extension),
21 => LongStringValueLabelRecord::parse(&extension, endian),
22 => LongStringMissingValueRecord::parse(&extension, endian, warn),
- 20 => EncodingRecord::parse(&extension, endian),
+ 20 => EncodingRecord::parse(&extension),
16 => NumberOfCasesRecord::parse(&extension, endian),
5 => RawVariableSetRecord::parse(extension),
10 => RawProductInfoRecord::parse(extension),
pub labels: Vec<LongStringValueLabels<N, S>>,
}
-static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord {
- size: Some(1),
- count: None,
- name: "long string value labels record",
-};
-
impl LongStringValueLabelRecord<RawString, RawString> {
fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
- ext.check_size(&LONG_STRING_VALUE_LABEL_RECORD)?;
+ ext.check_size(Some(1), None, "long string value labels record")?;
let mut input = &ext.data[..];
let mut label_set = Vec::new();
return Err(Error::new(
Some(start_offset..start_offset + 24 + 24 * n_blocks as u64),
ErrorDetails::ZlibTrailerOffsetInconsistency {
- descriptors: expected_cmp_ofs,
- zheader: zheader.ztrailer_offset,
+ expected: expected_cmp_ofs,
+ actual: zheader.ztrailer_offset,
},
));
}