-
-pub struct Variable {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
-
- /// Variable width, in the range -1..=255.
- pub width: i32,
-
- /// Variable name, padded on the right with spaces.
- pub name: [u8; 8],
-
- /// Print format.
- pub print_format: u32,
-
- /// Write format.
- pub write_format: u32,
-
- /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
- pub missing_value_code: i32,
-
- /// Raw missing values, up to 3 of them.
- pub missing: Vec<[u8; 8]>,
-
- /// Optional variable label.
- pub label: Option<Vec<u8>>,
-}
-
-fn read_variable_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- e: Endian,
-) -> Result<Variable, Error> {
- let offset = r.stream_position()?;
- let width: i32 = e.parse(read_bytes(r)?);
- let has_variable_label: u32 = e.parse(read_bytes(r)?);
- let missing_value_code: i32 = e.parse(read_bytes(r)?);
- let print_format: u32 = e.parse(read_bytes(r)?);
- let write_format: u32 = e.parse(read_bytes(r)?);
- let name: [u8; 8] = read_bytes(r)?;
-
- let label = match has_variable_label {
- 0 => None,
- 1 => {
- let len: u32 = e.parse(read_bytes(r)?);
- let read_len = len.min(65535) as usize;
- let label = Some(read_vec(r, read_len)?);
-
- let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
- let _ = read_vec(r, padding_bytes as usize)?;
-
- label
- }
- _ => {
- return Err(Error::BadVariableLabelCode {
- offset,
- code: has_variable_label,
- })
- }
- };
-
- let mut missing = Vec::new();
- if missing_value_code != 0 {
- match (width, missing_value_code) {
- (0, -3 | -2 | 1 | 2 | 3) => (),
- (0, _) => {
- return Err(Error::BadNumericMissingValueCode {
- offset,
- code: missing_value_code,
- })
- }
- (_, 0..=3) => (),
- (_, _) => {
- return Err(Error::BadStringMissingValueCode {
- offset,
- code: missing_value_code,
- })
- }
- }
-
- for _ in 0..missing_value_code.abs() {
- missing.push(read_bytes(r)?);
- }
- }
-
- Ok(Variable {
- offset,
- width,
- name,
- print_format,
- write_format,
- missing_value_code,
- missing,
- label,
- })
-}
-
-pub struct ValueLabel {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
-
- /// The labels.
- pub labels: Vec<([u8; 8], Vec<u8>)>,
-}
-
-impl ValueLabel {
- /// Maximum number of value labels in a record.
- pub const MAX: u32 = u32::MAX / 8;
-}
-
-fn read_value_label_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- e: Endian,
-) -> Result<ValueLabel, Error> {
- let offset = r.stream_position()?;
- let n: u32 = e.parse(read_bytes(r)?);
- if n > ValueLabel::MAX {
- return Err(Error::BadNumberOfValueLabels {
- offset,
- n,
- max: ValueLabel::MAX,
- });
- }
-
- let mut labels = Vec::new();
- for _ in 0..n {
- let value: [u8; 8] = read_bytes(r)?;
- let label_len: u8 = e.parse(read_bytes(r)?);
- let label_len = label_len as usize;
- let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
-
- let mut label = read_vec(r, padded_len)?;
- label.truncate(label_len);
- labels.push((value, label));
- }
- Ok(ValueLabel { offset, labels })
-}
-
-pub struct VarIndexes {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
-
- /// The 0-based indexes of the variable indexes.
- pub var_indexes: Vec<u32>,
-}
-
-impl VarIndexes {
- /// Maximum number of variable indexes in a record.
- pub const MAX: u32 = u32::MAX / 8;
-}
-
-fn read_var_indexes_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- e: Endian,
-) -> Result<VarIndexes, Error> {
- let offset = r.stream_position()?;
- let n: u32 = e.parse(read_bytes(r)?);
- if n > VarIndexes::MAX {
- return Err(Error::BadNumberOfVarIndexes {
- offset,
- n,
- max: VarIndexes::MAX,
- });
- }
- let mut var_indexes = Vec::with_capacity(n as usize);
- for _ in 0..n {
- var_indexes.push(e.parse(read_bytes(r)?));
- }
-
- Ok(VarIndexes {
- offset,
- var_indexes,
- })
-}
-
-pub const DOC_LINE_LEN: u32 = 80;
-pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
-
-pub struct Document {
- /// Offset from the start of the file to the start of the record.
- pub pos: u64,
-
- /// The document, as an array of 80-byte lines.
- pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
-}
-
-fn read_document_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- e: Endian,
-) -> Result<Document, Error> {
- let offset = r.stream_position()?;
- let n: u32 = e.parse(read_bytes(r)?);
- match n {
- 0..=DOC_MAX_LINES => {
- let pos = r.stream_position()?;
- let mut lines = Vec::with_capacity(n as usize);
- for _ in 0..n {
- let line: [u8; 80] = read_bytes(r)?;
- lines.push(line);
- }
- Ok(Document { pos, lines })
- }
- _ => Err(Error::BadDocumentLength {
- offset,
- n,
- max: DOC_MAX_LINES,
- }),
- }
-}
-
-#[derive(FromPrimitive)]
-enum ExtensionType {
- /// Machine integer info.
- Integer = 3,
- /// Machine floating-point info.
- Float = 4,
- /// Variable sets.
- VarSets = 5,
- /// DATE.
- Date = 6,
- /// Multiple response sets.
- Mrsets = 7,
- /// SPSS Data Entry.
- DataEntry = 8,
- /// Extra product info text.
- ProductInfo = 10,
- /// Variable display parameters.
- Display = 11,
- /// Long variable names.
- LongNames = 13,
- /// Long strings.
- LongStrings = 14,
- /// Extended number of cases.
- Ncases = 16,
- /// Data file attributes.
- FileAttrs = 17,
- /// Variable attributes.
- VarAttrs = 18,
- /// Multiple response sets (extended).
- Mrsets2 = 19,
- /// Character encoding.
- Encoding = 20,
- /// Value labels for long strings.
- LongLabels = 21,
- /// Missing values for long strings.
- LongMissing = 22,
- /// "Format properties in dataview table".
- Dataview = 24,
-}
-
-pub struct Extension {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
-
- /// Record subtype.
- pub subtype: u32,
-
- /// Size of each data element.
- pub size: u32,
-
- /// Number of data elements.
- pub count: u32,
-
- /// `size * count` bytes of data.
- pub data: Vec<u8>,
-}
-
-fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
- match extension {
- /* Implemented record types. */
- ExtensionType::Integer => (4, 8),
- ExtensionType::Float => (8, 3),
- ExtensionType::VarSets => (1, 0),
- ExtensionType::Mrsets => (1, 0),
- ExtensionType::ProductInfo => (1, 0),
- ExtensionType::Display => (4, 0),
- ExtensionType::LongNames => (1, 0),
- ExtensionType::LongStrings => (1, 0),
- ExtensionType::Ncases => (8, 2),
- ExtensionType::FileAttrs => (1, 0),
- ExtensionType::VarAttrs => (1, 0),
- ExtensionType::Mrsets2 => (1, 0),
- ExtensionType::Encoding => (1, 0),
- ExtensionType::LongLabels => (1, 0),
- ExtensionType::LongMissing => (1, 0),
-
- /* Ignored record types. */
- ExtensionType::Date => (0, 0),
- ExtensionType::DataEntry => (0, 0),
- ExtensionType::Dataview => (0, 0),
- }
-}
-
-fn read_extension_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- e: Endian,
-) -> Result<Extension, Error> {
- let subtype = e.parse(read_bytes(r)?);
- let offset = r.stream_position()?;
- let size: u32 = e.parse(read_bytes(r)?);
- let count = e.parse(read_bytes(r)?);
- let Some(product) = size.checked_mul(count) else {
- return Err(Error::ExtensionRecordTooLarge {
- offset,
- subtype,
- size,
- count,
- });
- };
- let offset = r.stream_position()?;
- let data = read_vec(r, product as usize)?;
- Ok(Extension {
- offset,
- subtype,
- size,
- count,
- data,
- })
-}
-
-struct ZHeader {
- /// File offset to the start of the record.
- offset: u64,
-
- /// File offset to the ZLIB data header.
- zheader_offset: u64,
-
- /// File offset to the ZLIB trailer.
- ztrailer_offset: u64,
-
- /// Length of the ZLIB trailer in bytes.
- ztrailer_len: u64,
-}
-
-fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, e: Endian) -> Result<ZHeader, Error> {
- let offset = r.stream_position()?;
- let zheader_offset: u64 = e.parse(read_bytes(r)?);
- let ztrailer_offset: u64 = e.parse(read_bytes(r)?);
- let ztrailer_len: u64 = e.parse(read_bytes(r)?);
-
- if zheader_offset != offset {
- return Err(Error::BadZlibHeaderOffset {
- offset,
- zheader_offset,
- });
- }
- if ztrailer_offset < offset {
- return Err(Error::BadZlibTrailerOffset {
- offset,
- ztrailer_offset,
- });
- }
- if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
- return Err(Error::BadZlibTrailerLen {
- offset,
- ztrailer_len,
- });
- }
-
- Ok(ZHeader {
- offset,
- zheader_offset,
- ztrailer_offset,
- ztrailer_len,
- })
-}
-
-fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
- let mut buf = [0; N];
- r.read_exact(&mut buf)?;
- Ok(buf)
-}
-
-fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
- let mut vec = vec![0; n];
- r.read_exact(&mut vec)?;
- Ok(vec)
-}
-
-/*
-fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
- while s.last() == Some(&c) {
- s.pop();
- }
- s
-}
-
-fn skip_bytes<R: Read>(r: &mut R, mut n: u64) -> Result<(), IoError> {
- let mut buf = [0; 1024];
- while n > 0 {
- let chunk = u64::min(n, buf.len() as u64);
- r.read_exact(&mut buf[0..chunk as usize])?;
- n -= chunk;
- }
- Ok(())
-}
-
-*/