#![allow(unused_variables)]
use endian::{Endian, Parse};
use num::Integer;
+use num_derive::FromPrimitive;
use std::io::{BufReader, Error as IoError, Read, Seek};
use thiserror::Error;
#[error("At offset {offset:#x}, number of variables associated with a value label ({n}) is not between 1 and the number of variables ({max}).")]
BadNumberOfValueLabelVariables { offset: u64, n: u32, max: u32 },
+
+ #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
+ ExtensionRecordTooLarge {
+ offset: u64,
+ subtype: u32,
+ size: u32,
+ count: u32,
+ },
+
+ #[error("Wrong ZLIB data header offset {zheader_offset:#x} (expected {offset:#x}).")]
+ BadZlibHeaderOffset { offset: u64, zheader_offset: u64 },
+
+ #[error("At offset {offset:#x}, impossible ZLIB trailer offset {ztrailer_offset:#x}.")]
+ BadZlibTrailerOffset { offset: u64, ztrailer_offset: u64 },
+
+ #[error("At offset {offset:#x}, impossible ZLIB trailer length {ztrailer_len}.")]
+ BadZlibTrailerLen { offset: u64, ztrailer_len: u64 },
}
#[derive(Error, Debug)]
pub struct Reader<R: Read> {
r: BufReader<R>,
-
- document_record: Option<DocumentRecord>,
-
+ documents: Vec<DocumentRecord>,
variables: Vec<VariableRecord>,
-
value_labels: Vec<ValueLabelRecord>,
+ extensions: Vec<ExtensionRecord>,
+ zheader: Option<ZHeader>,
}
/// Magic number for a regular system file.
pub file_label: [u8; 64],
}
-pub const DOC_LINE_LEN: u32 = 80;
-pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
-
impl<R: Read + Seek> Reader<R> {
pub fn new(r: R, warn: impl Fn(Warning)) -> Result<Reader<R>, Error> {
let mut r = BufReader::new(r);
let header = read_header(&mut r, &warn)?;
let e = header.endianness;
- let mut document_record = None;
+ let mut documents = Vec::new();
let mut variables = Vec::new();
let mut value_labels = Vec::new();
+ let mut extensions = Vec::new();
loop {
let offset = r.stream_position()?;
let rec_type: u32 = e.parse(read_bytes(&mut r)?);
match rec_type {
2 => variables.push(read_variable_record(&mut r, e)?),
3 => value_labels.push(read_value_label_record(&mut r, e, variables.len())?),
- // A Type 4 record is always immediately after a type 3 record,
- // the code for type 3 records reads the type 4 record too.
4 => return Err(Error::MisplacedType4Record(offset)),
-
- 6 => {
- let d = read_document_record(&mut r, e)?;
- if document_record.is_some() {
- warn(Warning::DuplicateDocumentRecord);
- } else {
- document_record = d;
- }
- }
- /*
- 7 => d.read_extension_record()?,
- */
+ 6 => documents.push(read_document_record(&mut r, e)?),
+ 7 => extensions.push(read_extension_record(&mut r, e)?),
999 => break,
_ => return Err(Error::BadRecordType { offset, rec_type }),
}
}
+ let _: [u8; 4] = read_bytes(&mut r)?;
+ let zheader = match header.is_zsav {
+ true => Some(read_zheader(&mut r, e)?),
+ false => None,
+ };
Ok(Reader {
r,
- document_record,
+ documents,
variables,
value_labels,
+ extensions,
+ zheader,
})
}
}
})
}
+pub const DOC_LINE_LEN: u32 = 80;
+pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
+
pub struct DocumentRecord {
/// Offset from the start of the file to the start of the record.
pub pos: u64,
fn read_document_record<R: Read + Seek>(
r: &mut BufReader<R>,
e: Endian,
-) -> Result<Option<DocumentRecord>, Error> {
+) -> Result<DocumentRecord, Error> {
let offset = r.stream_position()?;
let n: u32 = e.parse(read_bytes(r)?);
- if n == 0 {
- Ok(None)
- } else if n > DOC_MAX_LINES {
- Err(Error::BadDocumentLength {
+ match n {
+ 0..=DOC_MAX_LINES => {
+ let pos = r.stream_position()?;
+ let mut lines = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ let line: [u8; 80] = read_bytes(r)?;
+ lines.push(line);
+ }
+ Ok(DocumentRecord { pos, lines })
+ }
+ _ => Err(Error::BadDocumentLength {
offset,
n,
max: DOC_MAX_LINES,
- })
- } else {
- let pos = r.stream_position()?;
- let mut lines = Vec::with_capacity(n as usize);
- for i in 0..n {
- let line: [u8; 80] = read_bytes(r)?;
- lines.push(line);
- }
- Ok(Some(DocumentRecord { pos, lines }))
+ }),
+ }
+}
+
+#[derive(FromPrimitive)]
+enum Extension {
+ /// Machine integer info.
+ Integer = 3,
+ /// Machine floating-point info.
+ Float = 4,
+ /// Variable sets.
+ VarSets = 5,
+ /// DATE.
+ Date = 6,
+ /// Multiple response sets.
+ Mrsets = 7,
+ /// SPSS Data Entry.
+ DataEntry = 8,
+ /// Extra product info text.
+ ProductInfo = 10,
+ /// Variable display parameters.
+ Display = 11,
+ /// Long variable names.
+ LongNames = 13,
+ /// Long strings.
+ LongStrings = 14,
+ /// Extended number of cases.
+ Ncases = 16,
+ /// Data file attributes.
+ FileAttrs = 17,
+ /// Variable attributes.
+ VarAttrs = 18,
+ /// Multiple response sets (extended).
+ Mrsets2 = 19,
+ /// Character encoding.
+ Encoding = 20,
+ /// Value labels for long strings.
+ LongLabels = 21,
+ /// Missing values for long strings.
+ LongMissing = 22,
+ /// "Format properties in dataview table".
+ Dataview = 24,
+}
+
+struct ExtensionRecord {
+ /// Offset from the start of the file to the start of the record.
+ offset: u64,
+
+ /// Record subtype.
+ subtype: u32,
+
+ /// Size of each data element.
+ size: u32,
+
+ /// Number of data elements.
+ count: u32,
+
+ /// `size * count` bytes of data.
+ data: Vec<u8>,
+}
+
+fn extension_record_size_requirements(extension: Extension) -> (u32, u32) {
+ match extension {
+ /* Implemented record types. */
+ Extension::Integer => (4, 8),
+ Extension::Float => (8, 3),
+ Extension::VarSets => (1, 0),
+ Extension::Mrsets => (1, 0),
+ Extension::ProductInfo => (1, 0),
+ Extension::Display => (4, 0),
+ Extension::LongNames => (1, 0),
+ Extension::LongStrings => (1, 0),
+ Extension::Ncases => (8, 2),
+ Extension::FileAttrs => (1, 0),
+ Extension::VarAttrs => (1, 0),
+ Extension::Mrsets2 => (1, 0),
+ Extension::Encoding => (1, 0),
+ Extension::LongLabels => (1, 0),
+ Extension::LongMissing => (1, 0),
+
+ /* Ignored record types. */
+ Extension::Date => (0, 0),
+ Extension::DataEntry => (0, 0),
+ Extension::Dataview => (0, 0),
}
}
+fn read_extension_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<ExtensionRecord, Error> {
+ let subtype = e.parse(read_bytes(r)?);
+ let offset = r.stream_position()?;
+ let size: u32 = e.parse(read_bytes(r)?);
+ let count = e.parse(read_bytes(r)?);
+ let Some(product) = size.checked_mul(count) else {
+ return Err(Error::ExtensionRecordTooLarge {
+ offset,
+ subtype,
+ size,
+ count,
+ });
+ };
+ let offset = r.stream_position()?;
+ let data = read_vec(r, product as usize)?;
+ Ok(ExtensionRecord {
+ offset,
+ subtype,
+ size,
+ count,
+ data,
+ })
+}
+
+struct ZHeader {
+ /// File offset to the start of the record.
+ offset: u64,
+
+ /// File offset to the ZLIB data header.
+ zheader_offset: u64,
+
+ /// File offset to the ZLIB trailer.
+ ztrailer_offset: u64,
+
+ /// Length of the ZLIB trailer in bytes.
+ ztrailer_len: u64,
+}
+
+fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, e: Endian) -> Result<ZHeader, Error> {
+ let offset = r.stream_position()?;
+ let zheader_offset: u64 = e.parse(read_bytes(r)?);
+ let ztrailer_offset: u64 = e.parse(read_bytes(r)?);
+ let ztrailer_len: u64 = e.parse(read_bytes(r)?);
+
+ if zheader_offset != offset {
+ return Err(Error::BadZlibHeaderOffset {
+ offset,
+ zheader_offset,
+ });
+ }
+ if ztrailer_offset < offset {
+ return Err(Error::BadZlibTrailerOffset {
+ offset,
+ ztrailer_offset,
+ });
+ }
+ if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
+ return Err(Error::BadZlibTrailerLen {
+ offset,
+ ztrailer_len,
+ });
+ }
+
+ Ok(ZHeader {
+ offset,
+ zheader_offset,
+ ztrailer_offset,
+ ztrailer_len,
+ })
+}
+
fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
let mut buf = [0; N];
r.read_exact(&mut buf)?;