-#![allow(unused_variables)]
-use endian::{Endian, Parse};
-use std::io::{BufReader, Error as IoError, Read};
-use thiserror::Error;
+use std::io::Error as IoError;
+use thiserror::Error as ThisError;
pub mod endian;
+pub mod raw;
+pub mod sack;
-#[derive(Error, Debug)]
+#[derive(ThisError, Debug)]
pub enum Error {
#[error("Not an SPSS system file")]
NotASystemFile,
- #[error("I/O error ({source})")]
- Io {
- #[from]
- source: IoError,
- },
+ #[error("Invalid magic number {0:?}")]
+ BadMagic([u8; 4]),
+
+ #[error("I/O error ({0})")]
+ Io(#[from] IoError),
#[error("Invalid SAV compression code {0}")]
InvalidSavCompression(u32),
#[error("Invalid ZSAV compression code {0}")]
InvalidZsavCompression(u32),
-}
-
-#[derive(Error, Debug)]
-pub enum Warning {
- #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
- UnexpectedBias(f64),
-}
-
-#[derive(Copy, Clone, Debug)]
-pub enum Compression {
- Simple,
- ZLib,
-}
-pub struct Reader<R: Read> {
- r: BufReader<R>,
-}
+ #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
+ BadVariableWidth { offset: u64, width: i32 },
-pub const ASCII_MAGIC: &[u8; 4] = b"$FL2";
-pub const ASCII_ZMAGIC: &[u8; 4] = b"$FL3";
-pub const EBCDIC_MAGIC: &[u8; 4] = &[0x5b, 0xc6, 0xd3, 0xf2];
+ #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
+ BadDocumentLength { offset: u64, n: u32, max: u32 },
-pub struct FileHeader {
- /// First 4 bytes of the file, one of `ASCII_MAGIC`, `ASCII_ZMAGIC`, and
- /// `EBCDIC_MAGIC`.
- pub magic: [u8; 4],
+ #[error("At offset {offset:#x}, Unrecognized record type {rec_type}.")]
+ BadRecordType { offset: u64, rec_type: u32 },
- /// True if `magic` indicates that this file contained zlib-compressed data.
- pub is_zsav: bool,
+ #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
+ BadVariableLabelCode { offset: u64, code: u32 },
- /// True if `magic` indicates that this file contained EBCDIC data.
- pub is_ebcdic: bool,
+ #[error(
+ "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
+ )]
+ BadNumericMissingValueCode { offset: u64, code: i32 },
- /// 0-based variable index of the weight variable, or `None` if the file is
- /// unweighted.
- pub weight_index: Option<u32>,
+ #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
+ BadStringMissingValueCode { offset: u64, code: i32 },
- /// Number of variable positions, or `None` if the value in the file is
- /// questionably trustworthy.
- pub nominal_case_size: Option<u32>,
+ #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
+ BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
- /// `dd mmm yy` in the file's encoding.
- pub creation_date: [u8; 9],
+ #[error("At offset {offset:#x}, number of variables indexes ({n}) is greater than the maximum number ({max}).")]
+ BadNumberOfVarIndexes { offset: u64, n: u32, max: u32 },
- /// `HH:MM:SS` in the file's encoding.
- pub creation_time: [u8; 8],
+ #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
+ ExtensionRecordTooLarge {
+ offset: u64,
+ subtype: u32,
+ size: u32,
+ count: u32,
+ },
- /// Eye-catcher string, then product name, in the file's encoding. Padded
- /// on the right with spaces.
- pub eye_catcher: [u8; 60],
+ #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
+ EofInCase {
+ offset: u64,
+ case_ofs: u64,
+ case_len: usize,
+ },
- /// File label, in the file's encoding. Padded on the right with spaces.
- pub file_label: [u8; 64],
-}
+ #[error(
+ "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
+ )]
+ EofInCompressedCase { offset: u64, case_ofs: u64 },
-impl<R: Read> Reader<R> {
- pub fn new(r: R, warn: impl Fn(Warning)) -> Result<Reader<R>, Error> {
- let mut r = BufReader::new(r);
-
- let magic: [u8; 4] = read_bytes(&mut r)?;
- let (is_zsav, is_ebcdic) = match &magic {
- ASCII_MAGIC => (false, false),
- ASCII_ZMAGIC => (true, false),
- EBCDIC_MAGIC => (false, true),
- _ => return Err(Error::NotASystemFile),
- };
-
- let eye_catcher: [u8; 60] = read_bytes(&mut r)?;
- let layout_code: [u8; 4] = read_bytes(&mut r)?;
- let endianness = Endian::identify_u32(2, layout_code)
- .or_else(|| Endian::identify_u32(2, layout_code))
- .ok_or_else(|| Error::NotASystemFile)?;
-
- let nominal_case_size: u32 = endianness.parse(read_bytes(&mut r)?);
- let nominal_case_size = (nominal_case_size <= u32::MAX / 32).then_some(nominal_case_size);
-
- let compression_code: u32 = endianness.parse(read_bytes(&mut r)?);
- let compression = match (is_zsav, compression_code) {
- (false, 0) => None,
- (false, 1) => Some(Compression::Simple),
- (true, 2) => Some(Compression::ZLib),
- (false, code) => return Err(Error::InvalidSavCompression(code)),
- (true, code) => return Err(Error::InvalidZsavCompression(code)),
- };
-
- let weight_index: u32 = endianness.parse(read_bytes(&mut r)?);
- let weight_index = (weight_index > 0).then_some(weight_index - 1);
-
- let n_cases: u32 = endianness.parse(read_bytes(&mut r)?);
- let n_cases = (n_cases <= u32::MAX / 4).then_some(n_cases);
-
- let bias: f64 = endianness.parse(read_bytes(&mut r)?);
- if bias != 100.0 {
- warn(Warning::UnexpectedBias(bias))
- }
-
- let creation_date: [u8; 9] = read_bytes(&mut r)?;
- let creation_time: [u8; 8] = read_bytes(&mut r)?;
- let file_label: [u8; 64] = read_bytes(&mut r)?;
- let _: [u8; 3] = read_bytes(&mut r)?;
-
- let header = FileHeader {
- magic,
- is_zsav,
- is_ebcdic,
- weight_index,
- nominal_case_size,
- creation_date,
- creation_time,
- eye_catcher,
- file_label,
- };
-
- Ok(Reader { r })
- }
-}
+ #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
+ PartialCompressedCase { offset: u64, case_ofs: u64 },
-fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
- let mut buf = [0; N];
- r.read_exact(&mut buf)?;
- Ok(buf)
-}
+ #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
+ CompressedNumberExpected { offset: u64, case_ofs: u64 },
-/*
-fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
- while s.last() == Some(&c) {
- s.pop();
- }
- s
-}
+ #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
+ CompressedStringExpected { offset: u64, case_ofs: u64 },
-fn skip_bytes<R: Read>(r: &mut R, mut n: u64) -> Result<(), IoError> {
- let mut buf = [0; 1024];
- while n > 0 {
- let chunk = u64::min(n, buf.len() as u64);
- r.read_exact(&mut buf[0..chunk as usize])?;
- n -= chunk;
- }
- Ok(())
+ #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
+ BadZlibTrailerNBlocks {
+ offset: u64,
+ n_blocks: u32,
+ expected_n_blocks: u64,
+ ztrailer_len: u64,
+ },
}
-
-*/