+ fn _next(&mut self) -> Result<Option<(Record, ReaderState)>, Error> {
+ match self.state {
+ ReaderState::Start => {
+ let header = read_header(&mut self.r)?;
+ let next_state = ReaderState::Headers(header.endianness, header.compression);
+ Ok(Some((Record::Header(header), next_state)))
+ }
+ ReaderState::Headers(endian, compression) => {
+ let rec_type: u32 = endian.parse(read_bytes(&mut self.r)?);
+ let record = match rec_type {
+ 2 => {
+ let variable = read_variable_record(&mut self.r, endian)?;
+ self.var_types.push(VarType::from_width(variable.width));
+ Record::Variable(variable)
+ }
+ 3 => Record::ValueLabel(read_value_label_record(&mut self.r, endian)?),
+ 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, endian)?),
+ 6 => Record::Document(read_document_record(&mut self.r, endian)?),
+ 7 => Record::Extension(read_extension_record(&mut self.r, endian)?),
+ 999 => {
+ let _: [u8; 4] = read_bytes(&mut self.r)?;
+ let next_state = match compression {
+ None => ReaderState::Data(endian),
+ _ => ReaderState::End,
+ };
+ return Ok(Some((Record::EndOfHeaders, next_state)));
+ }
+ _ => {
+ return Err(Error::BadRecordType {
+ offset: self.r.stream_position()?,
+ rec_type,
+ })
+ }
+ };
+ Ok(Some((record, ReaderState::Headers(endian, compression))))
+ }
+ ReaderState::Data(endian) => {
+ let mut values = Vec::with_capacity(self.var_types.len());
+ for (i, &var_type) in self.var_types.iter().enumerate() {
+ let raw = match read_bytes(&mut self.r) {
+ Ok(raw) => raw,
+ Err(err) => {
+ if i == 0 && err.kind() == ErrorKind::UnexpectedEof {
+ return Ok(None);
+ } else {
+ return Err(Error::Io(err));
+ }
+ }
+ };
+ values.push(Value::from_raw(var_type, raw, endian));
+ }
+ Ok(Some((Record::Case(values), ReaderState::Data(endian))))
+ }
+ ReaderState::End => Ok(None),
+ }
+ }
+}
+
+impl<R: Read + Seek> Iterator for Reader<R> {
+ type Item = Result<Record, Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let retval = self._next();
+ match retval {
+ Ok(None) => {
+ self.state = ReaderState::End;
+ None
+ }
+ Ok(Some((record, next_state))) => {
+ self.state = next_state;
+ Some(Ok(record))
+ }
+ Err(error) => {
+ self.state = ReaderState::End;
+ Some(Err(error))
+ }
+ }
+ }
+}
+
+fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
+ let magic: [u8; 4] = read_bytes(r)?;
+ let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
+
+ let eye_catcher: [u8; 60] = read_bytes(r)?;
+ let layout_code: [u8; 4] = read_bytes(r)?;
+ let endianness = Endian::identify_u32(2, layout_code)
+ .or_else(|| Endian::identify_u32(2, layout_code))
+ .ok_or_else(|| Error::NotASystemFile)?;
+ let layout_code = endianness.parse(layout_code);
+
+ let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
+ let nominal_case_size =
+ (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
+
+ let compression_code: u32 = endianness.parse(read_bytes(r)?);
+ let compression = match (magic, compression_code) {
+ (Magic::ZSAV, 2) => Some(Compression::ZLib),
+ (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
+ (_, 0) => None,
+ (_, 1) => Some(Compression::Simple),
+ (_, code) => return Err(Error::InvalidSavCompression(code)),
+ };
+
+ let weight_index: u32 = endianness.parse(read_bytes(r)?);
+ let weight_index = (weight_index > 0).then_some(weight_index - 1);
+
+ let n_cases: u32 = endianness.parse(read_bytes(r)?);
+ let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+
+ let bias: f64 = endianness.parse(read_bytes(r)?);
+
+ let creation_date: [u8; 9] = read_bytes(r)?;
+ let creation_time: [u8; 8] = read_bytes(r)?;
+ let file_label: [u8; 64] = read_bytes(r)?;
+ let _: [u8; 3] = read_bytes(r)?;
+
+ Ok(Header {
+ magic,
+ layout_code,
+ nominal_case_size,
+ compression,
+ weight_index,
+ n_cases,
+ bias,
+ creation_date,
+ creation_time,
+ eye_catcher,
+ file_label,
+ endianness,
+ })
+}
+
+pub struct Variable {
+ /// Offset from the start of the file to the start of the record.
+ pub offset: u64,
+
+ /// Variable width, in the range -1..=255.
+ pub width: i32,
+
+ /// Variable name, padded on the right with spaces.
+ pub name: [u8; 8],
+
+ /// Print format.
+ pub print_format: u32,
+
+ /// Write format.
+ pub write_format: u32,
+
+ /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
+ pub missing_value_code: i32,
+
+ /// Raw missing values, up to 3 of them.
+ pub missing: Vec<[u8; 8]>,
+
+ /// Optional variable label.
+ pub label: Option<Vec<u8>>,
+}
+
+fn read_variable_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<Variable, Error> {
+ let offset = r.stream_position()?;
+ let width: i32 = e.parse(read_bytes(r)?);
+ let has_variable_label: u32 = e.parse(read_bytes(r)?);
+ let missing_value_code: i32 = e.parse(read_bytes(r)?);
+ let print_format: u32 = e.parse(read_bytes(r)?);
+ let write_format: u32 = e.parse(read_bytes(r)?);
+ let name: [u8; 8] = read_bytes(r)?;
+
+ let label = match has_variable_label {
+ 0 => None,
+ 1 => {
+ let len: u32 = e.parse(read_bytes(r)?);
+ let read_len = len.min(65535) as usize;
+ let label = Some(read_vec(r, read_len)?);
+
+ let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
+ let _ = read_vec(r, padding_bytes as usize)?;
+
+ label
+ }
+ _ => {
+ return Err(Error::BadVariableLabelCode {
+ offset,
+ code: has_variable_label,
+ })
+ }
+ };
+
+ let mut missing = Vec::new();
+ if missing_value_code != 0 {
+ match (width, missing_value_code) {
+ (0, -3 | -2 | 1 | 2 | 3) => (),
+ (0, _) => {
+ return Err(Error::BadNumericMissingValueCode {
+ offset,
+ code: missing_value_code,
+ })
+ }
+ (_, 0..=3) => (),
+ (_, _) => {
+ return Err(Error::BadStringMissingValueCode {
+ offset,
+ code: missing_value_code,
+ })
+ }
+ }
+
+ for _ in 0..missing_value_code.abs() {
+ missing.push(read_bytes(r)?);
+ }
+ }
+
+ Ok(Variable {
+ offset,
+ width,
+ name,
+ print_format,
+ write_format,
+ missing_value_code,
+ missing,
+ label,
+ })
+}
+
+pub struct ValueLabel {
+ /// Offset from the start of the file to the start of the record.
+ pub offset: u64,
+
+ /// The labels.
+ pub labels: Vec<([u8; 8], Vec<u8>)>,
+}
+
+impl ValueLabel {
+ /// Maximum number of value labels in a record.
+ pub const MAX: u32 = u32::MAX / 8;
+}
+
+fn read_value_label_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<ValueLabel, Error> {
+ let offset = r.stream_position()?;
+ let n: u32 = e.parse(read_bytes(r)?);
+ if n > ValueLabel::MAX {
+ return Err(Error::BadNumberOfValueLabels {
+ offset,
+ n,
+ max: ValueLabel::MAX,
+ });
+ }
+
+ let mut labels = Vec::new();
+ for _ in 0..n {
+ let value: [u8; 8] = read_bytes(r)?;
+ let label_len: u8 = e.parse(read_bytes(r)?);
+ let label_len = label_len as usize;
+ let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
+
+ let mut label = read_vec(r, padded_len)?;
+ label.truncate(label_len);
+ labels.push((value, label));
+ }
+ Ok(ValueLabel { offset, labels })
+}
+
+pub struct VarIndexes {
+ /// Offset from the start of the file to the start of the record.
+ pub offset: u64,
+
+ /// The 0-based indexes of the variable indexes.
+ pub var_indexes: Vec<u32>,
+}
+
+impl VarIndexes {
+ /// Maximum number of variable indexes in a record.
+ pub const MAX: u32 = u32::MAX / 8;
+}
+
+fn read_var_indexes_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<VarIndexes, Error> {
+ let offset = r.stream_position()?;
+ let n: u32 = e.parse(read_bytes(r)?);
+ if n > VarIndexes::MAX {
+ return Err(Error::BadNumberOfVarIndexes {
+ offset,
+ n,
+ max: VarIndexes::MAX,
+ });
+ }
+ let mut var_indexes = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ var_indexes.push(e.parse(read_bytes(r)?));
+ }
+
+ Ok(VarIndexes {
+ offset,
+ var_indexes,
+ })
+}
+
+pub const DOC_LINE_LEN: u32 = 80;
+pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
+
+pub struct Document {
+ /// Offset from the start of the file to the start of the record.
+ pub pos: u64,
+
+ /// The document, as an array of 80-byte lines.
+ pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
+}
+
+fn read_document_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<Document, Error> {
+ let offset = r.stream_position()?;
+ let n: u32 = e.parse(read_bytes(r)?);
+ match n {
+ 0..=DOC_MAX_LINES => {
+ let pos = r.stream_position()?;
+ let mut lines = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ let line: [u8; 80] = read_bytes(r)?;
+ lines.push(line);
+ }
+ Ok(Document { pos, lines })
+ }
+ _ => Err(Error::BadDocumentLength {
+ offset,
+ n,
+ max: DOC_MAX_LINES,
+ }),
+ }
+}
+
+#[derive(FromPrimitive)]
+enum ExtensionType {
+ /// Machine integer info.
+ Integer = 3,
+ /// Machine floating-point info.
+ Float = 4,
+ /// Variable sets.
+ VarSets = 5,
+ /// DATE.
+ Date = 6,
+ /// Multiple response sets.
+ Mrsets = 7,
+ /// SPSS Data Entry.
+ DataEntry = 8,
+ /// Extra product info text.
+ ProductInfo = 10,
+ /// Variable display parameters.
+ Display = 11,
+ /// Long variable names.
+ LongNames = 13,
+ /// Long strings.
+ LongStrings = 14,
+ /// Extended number of cases.
+ Ncases = 16,
+ /// Data file attributes.
+ FileAttrs = 17,
+ /// Variable attributes.
+ VarAttrs = 18,
+ /// Multiple response sets (extended).
+ Mrsets2 = 19,
+ /// Character encoding.
+ Encoding = 20,
+ /// Value labels for long strings.
+ LongLabels = 21,
+ /// Missing values for long strings.
+ LongMissing = 22,
+ /// "Format properties in dataview table".
+ Dataview = 24,
+}
+
+pub struct Extension {
+ /// Offset from the start of the file to the start of the record.
+ pub offset: u64,
+
+ /// Record subtype.
+ pub subtype: u32,
+
+ /// Size of each data element.
+ pub size: u32,
+
+ /// Number of data elements.
+ pub count: u32,
+
+ /// `size * count` bytes of data.
+ pub data: Vec<u8>,
+}
+
+fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
+ match extension {
+ /* Implemented record types. */
+ ExtensionType::Integer => (4, 8),
+ ExtensionType::Float => (8, 3),
+ ExtensionType::VarSets => (1, 0),
+ ExtensionType::Mrsets => (1, 0),
+ ExtensionType::ProductInfo => (1, 0),
+ ExtensionType::Display => (4, 0),
+ ExtensionType::LongNames => (1, 0),
+ ExtensionType::LongStrings => (1, 0),
+ ExtensionType::Ncases => (8, 2),
+ ExtensionType::FileAttrs => (1, 0),
+ ExtensionType::VarAttrs => (1, 0),
+ ExtensionType::Mrsets2 => (1, 0),
+ ExtensionType::Encoding => (1, 0),
+ ExtensionType::LongLabels => (1, 0),
+ ExtensionType::LongMissing => (1, 0),
+
+ /* Ignored record types. */
+ ExtensionType::Date => (0, 0),
+ ExtensionType::DataEntry => (0, 0),
+ ExtensionType::Dataview => (0, 0),
+ }
+}
+
+fn read_extension_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<Extension, Error> {
+ let subtype = e.parse(read_bytes(r)?);
+ let offset = r.stream_position()?;
+ let size: u32 = e.parse(read_bytes(r)?);
+ let count = e.parse(read_bytes(r)?);
+ let Some(product) = size.checked_mul(count) else {
+ return Err(Error::ExtensionRecordTooLarge {
+ offset,
+ subtype,
+ size,
+ count,
+ });
+ };
+ let offset = r.stream_position()?;
+ let data = read_vec(r, product as usize)?;
+ Ok(Extension {
+ offset,
+ subtype,
+ size,
+ count,
+ data,
+ })
+}
+
+struct ZHeader {
+ /// File offset to the start of the record.
+ offset: u64,
+
+ /// File offset to the ZLIB data header.
+ zheader_offset: u64,
+
+ /// File offset to the ZLIB trailer.
+ ztrailer_offset: u64,
+
+ /// Length of the ZLIB trailer in bytes.
+ ztrailer_len: u64,
+}
+
+fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, e: Endian) -> Result<ZHeader, Error> {
+ let offset = r.stream_position()?;
+ let zheader_offset: u64 = e.parse(read_bytes(r)?);
+ let ztrailer_offset: u64 = e.parse(read_bytes(r)?);
+ let ztrailer_len: u64 = e.parse(read_bytes(r)?);
+
+ Ok(ZHeader {
+ offset,
+ zheader_offset,
+ ztrailer_offset,
+ ztrailer_len,
+ })