#![allow(unused_variables)]
use endian::{Endian, Parse};
-use std::io::{BufReader, Error as IoError, Read};
+use num::Integer;
+use std::io::{BufReader, Error as IoError, Read, Seek};
use thiserror::Error;
pub mod endian;
#[error("Invalid ZSAV compression code {0}")]
InvalidZsavCompression(u32),
+
+ #[error("Misplaced type 4 record.")]
+ MisplacedType4Record,
+
+ #[error("Number of document lines ({n}) must be greater than 0 and less than {max}.")]
+ BadDocumentLength { n: u32, max: u32 },
+
+ #[error("Unrecognized record type {0}.")]
+ BadRecordType(u32),
+
+ #[error("Variable label indicator ({0}) is not 0 or 1.")]
+ BadVariableLabelIndicator(u32),
+
+ #[error("Numeric missing value indicator ({0}) is not -3, -2, 0, 1, 2, or 3.")]
+ BadNumericMissingValueIndicator(i32),
+
+ #[error("String missing value indicator ({0}) is not 0, 1, 2, or 3.")]
+ BadStringMissingValueIndicator(i32),
}
#[derive(Error, Debug)]
pub enum Warning {
#[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
UnexpectedBias(f64),
+
+ #[error("Duplicate type 6 (document) record.")]
+ DuplicateDocumentRecord,
}
#[derive(Copy, Clone, Debug)]
pub struct Reader<R: Read> {
r: BufReader<R>,
+
+ document_record: Option<DocumentRecord>,
}
+/// Magic number for a regular system file.
pub const ASCII_MAGIC: &[u8; 4] = b"$FL2";
+
+/// Magic number for a system file that contains zlib-compressed data.
pub const ASCII_ZMAGIC: &[u8; 4] = b"$FL3";
+
+/// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded in
+/// EBCDIC.
pub const EBCDIC_MAGIC: &[u8; 4] = &[0x5b, 0xc6, 0xd3, 0xf2];
pub struct FileHeader {
/// True if `magic` indicates that this file contained EBCDIC data.
pub is_ebcdic: bool,
+ /// Endianness of the data in the file header.
+ pub endianness: Endian,
+
/// 0-based variable index of the weight variable, or `None` if the file is
/// unweighted.
pub weight_index: Option<u32>,
pub file_label: [u8; 64],
}
-impl<R: Read> Reader<R> {
+pub const DOC_LINE_LEN: u32 = 80;
+pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
+
+impl<R: Read + Seek> Reader<R> {
pub fn new(r: R, warn: impl Fn(Warning)) -> Result<Reader<R>, Error> {
let mut r = BufReader::new(r);
- let magic: [u8; 4] = read_bytes(&mut r)?;
- let (is_zsav, is_ebcdic) = match &magic {
- ASCII_MAGIC => (false, false),
- ASCII_ZMAGIC => (true, false),
- EBCDIC_MAGIC => (false, true),
- _ => return Err(Error::NotASystemFile),
- };
-
- let eye_catcher: [u8; 60] = read_bytes(&mut r)?;
- let layout_code: [u8; 4] = read_bytes(&mut r)?;
- let endianness = Endian::identify_u32(2, layout_code)
- .or_else(|| Endian::identify_u32(2, layout_code))
- .ok_or_else(|| Error::NotASystemFile)?;
-
- let nominal_case_size: u32 = endianness.parse(read_bytes(&mut r)?);
- let nominal_case_size = (nominal_case_size <= u32::MAX / 32).then_some(nominal_case_size);
-
- let compression_code: u32 = endianness.parse(read_bytes(&mut r)?);
- let compression = match (is_zsav, compression_code) {
- (false, 0) => None,
- (false, 1) => Some(Compression::Simple),
- (true, 2) => Some(Compression::ZLib),
- (false, code) => return Err(Error::InvalidSavCompression(code)),
- (true, code) => return Err(Error::InvalidZsavCompression(code)),
- };
-
- let weight_index: u32 = endianness.parse(read_bytes(&mut r)?);
- let weight_index = (weight_index > 0).then_some(weight_index - 1);
-
- let n_cases: u32 = endianness.parse(read_bytes(&mut r)?);
- let n_cases = (n_cases <= u32::MAX / 4).then_some(n_cases);
-
- let bias: f64 = endianness.parse(read_bytes(&mut r)?);
- if bias != 100.0 {
- warn(Warning::UnexpectedBias(bias))
+ let header = read_header(&mut r, &warn)?;
+ let e = header.endianness;
+ let mut document_record = None;
+ let mut variables = Vec::new();
+ loop {
+ let rec_type: u32 = e.parse(read_bytes(&mut r)?);
+ match rec_type {
+ 2 => variables.push(read_variable_record(&mut r, e)?),
+ /*
+ 3 => d.read_value_label_record()?,
+ */
+ // A Type 4 record is always immediately after a type 3 record,
+ // the code for type 3 records reads the type 4 record too.
+ 4 => return Err(Error::MisplacedType4Record),
+
+ 6 => {
+ let d = read_document_record(&mut r, e)?;
+ if document_record.is_some() {
+ warn(Warning::DuplicateDocumentRecord);
+ } else {
+ document_record = d;
+ }
+ }
+ /*
+ 7 => d.read_extension_record()?,
+ */
+ 999 => break,
+ _ => return Err(Error::BadRecordType(rec_type)),
+ }
+ }
+
+ Ok(Reader { r, document_record })
+ }
+}
+
+fn read_header<R: Read>(r: &mut R, warn: impl Fn(Warning)) -> Result<FileHeader, Error> {
+ let magic: [u8; 4] = read_bytes(r)?;
+ let (is_zsav, is_ebcdic) = match &magic {
+ ASCII_MAGIC => (false, false),
+ ASCII_ZMAGIC => (true, false),
+ EBCDIC_MAGIC => (false, true),
+ _ => return Err(Error::NotASystemFile),
+ };
+
+ let eye_catcher: [u8; 60] = read_bytes(r)?;
+ let layout_code: [u8; 4] = read_bytes(r)?;
+ let endianness = Endian::identify_u32(2, layout_code)
+ .or_else(|| Endian::identify_u32(2, layout_code))
+ .ok_or_else(|| Error::NotASystemFile)?;
+
+ let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
+ let nominal_case_size =
+ (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
+
+ let compression_code: u32 = endianness.parse(read_bytes(r)?);
+ let compression = match (is_zsav, compression_code) {
+ (false, 0) => None,
+ (false, 1) => Some(Compression::Simple),
+ (true, 2) => Some(Compression::ZLib),
+ (false, code) => return Err(Error::InvalidSavCompression(code)),
+ (true, code) => return Err(Error::InvalidZsavCompression(code)),
+ };
+
+ let weight_index: u32 = endianness.parse(read_bytes(r)?);
+ let weight_index = (weight_index > 0).then_some(weight_index - 1);
+
+ let n_cases: u32 = endianness.parse(read_bytes(r)?);
+ let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+
+ let bias: f64 = endianness.parse(read_bytes(r)?);
+ if bias != 100.0 {
+ warn(Warning::UnexpectedBias(bias))
+ }
+
+ let creation_date: [u8; 9] = read_bytes(r)?;
+ let creation_time: [u8; 8] = read_bytes(r)?;
+ let file_label: [u8; 64] = read_bytes(r)?;
+ let _: [u8; 3] = read_bytes(r)?;
+
+ Ok(FileHeader {
+ magic,
+ is_zsav,
+ is_ebcdic,
+ endianness,
+ weight_index,
+ nominal_case_size,
+ creation_date,
+ creation_time,
+ eye_catcher,
+ file_label,
+ })
+}
+
+pub struct VariableRecord {
+ /// Offset from the start of the file to the start of the record.
+ pub pos: u64,
+
+ /// Variable width, in the range -1..=255.
+ pub width: i32,
+
+ /// Variable name, padded on the right with spaces.
+ pub name: [u8; 8],
+
+ /// Print format.
+ pub print_format: u32,
+
+ /// Write format.
+ pub write_format: u32,
+
+ /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
+ pub missing_value_code: i32,
+
+ /// Raw missing values, up to 3 of them.
+ pub missing: Vec<[u8; 8]>,
+
+ /// Optional variable label.
+ pub label: Option<Vec<u8>>,
+}
+
+fn read_variable_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<VariableRecord, Error> {
+ let pos = r.stream_position()?;
+ let width: i32 = e.parse(read_bytes(r)?);
+ let has_variable_label: u32 = e.parse(read_bytes(r)?);
+ let missing_value_code: i32 = e.parse(read_bytes(r)?);
+ let print_format: u32 = e.parse(read_bytes(r)?);
+ let write_format: u32 = e.parse(read_bytes(r)?);
+ let name: [u8; 8] = read_bytes(r)?;
+
+ let label = match has_variable_label {
+ 0 => None,
+ 1 => {
+ let len: u32 = e.parse(read_bytes(r)?);
+ let read_len = len.min(65535) as usize;
+ let label = Some(read_vec(r, read_len)?);
+
+ let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
+ let _ = read_vec(r, padding_bytes as usize)?;
+
+ label
+ }
+ _ => return Err(Error::BadVariableLabelIndicator(has_variable_label)),
+ };
+
+ let mut missing = Vec::new();
+ if missing_value_code != 0 {
+ match (width, missing_value_code) {
+ (0, -3 | -2 | 1 | 2 | 3) => (),
+ (0, _) => return Err(Error::BadNumericMissingValueIndicator(missing_value_code)),
+ (_, 0..=3) => (),
+ (_, _) => return Err(Error::BadStringMissingValueIndicator(missing_value_code)),
}
- let creation_date: [u8; 9] = read_bytes(&mut r)?;
- let creation_time: [u8; 8] = read_bytes(&mut r)?;
- let file_label: [u8; 64] = read_bytes(&mut r)?;
- let _: [u8; 3] = read_bytes(&mut r)?;
-
- let header = FileHeader {
- magic,
- is_zsav,
- is_ebcdic,
- weight_index,
- nominal_case_size,
- creation_date,
- creation_time,
- eye_catcher,
- file_label,
- };
-
- Ok(Reader { r })
+ for _ in 0..missing_value_code.abs() {
+ missing.push(read_bytes(r)?);
+ }
+ }
+
+ Ok(VariableRecord {
+ pos,
+ width,
+ name,
+ print_format,
+ write_format,
+ missing_value_code,
+ missing,
+ label,
+ })
+}
+
+pub struct DocumentRecord {
+ /// Offset from the start of the file to the start of the record.
+ pub pos: u64,
+
+ /// The document, as an array of 80-byte lines.
+ pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
+}
+
+fn read_document_record<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ e: Endian,
+) -> Result<Option<DocumentRecord>, Error> {
+ let n: u32 = e.parse(read_bytes(r)?);
+ if n == 0 {
+ Ok(None)
+ } else if n > DOC_MAX_LINES {
+ Err(Error::BadDocumentLength {
+ n,
+ max: DOC_MAX_LINES,
+ })
+ } else {
+ let pos = r.stream_position()?;
+ let mut lines = Vec::with_capacity(n as usize);
+ for i in 0..n {
+ let line: [u8; 80] = read_bytes(r)?;
+ lines.push(line);
+ }
+ Ok(Some(DocumentRecord { pos, lines }))
}
}
Ok(buf)
}
+fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
+ let mut vec = vec![0; n];
+ r.read_exact(&mut vec)?;
+ Ok(vec)
+}
+
/*
fn trim_end(mut s: Vec<u8>, c: u8) -> Vec<u8> {
while s.last() == Some(&c) {