-}
-
-#[derive(Error, Debug)]
-pub enum Warning {
- #[error("Unexpected floating-point bias {0} or unrecognized floating-point format.")]
- UnexpectedBias(f64),
-
- #[error("Duplicate type 6 (document) record.")]
- DuplicateDocumentRecord,
-}
-
-#[derive(Copy, Clone, Debug)]
-pub enum Compression {
- Simple,
- ZLib,
-}
-
-pub enum Record {
- Header(Header),
- Document(Document),
- Variable(Variable),
- ValueLabel(ValueLabel),
- VarIndexes(VarIndexes),
- Extension(Extension),
- EndOfHeaders,
- ZHeader(ZHeader),
- ZTrailer(ZTrailer),
- Case(Vec<Value>),
-}
-
-pub struct Header {
- /// Magic number.
- pub magic: Magic,
-
- /// Eye-catcher string, product name, in the file's encoding. Padded
- /// on the right with spaces.
- pub eye_catcher: [u8; 60],
-
- /// Layout code, normally either 2 or 3.
- pub layout_code: u32,
-
- /// Number of variable positions, or `None` if the value in the file is
- /// questionably trustworthy.
- pub nominal_case_size: Option<u32>,
-
- /// Compression type, if any,
- pub compression: Option<Compression>,
-
- /// 0-based variable index of the weight variable, or `None` if the file is
- /// unweighted.
- pub weight_index: Option<u32>,
-
- /// Claimed number of cases, if known.
- pub n_cases: Option<u32>,
-
- /// Compression bias, usually 100.0.
- pub bias: f64,
-
- /// `dd mmm yy` in the file's encoding.
- pub creation_date: [u8; 9],
-
- /// `HH:MM:SS` in the file's encoding.
- pub creation_time: [u8; 8],
-
- /// File label, in the file's encoding. Padded on the right with spaces.
- pub file_label: [u8; 64],
-
- /// Endianness of the data in the file header.
- pub endianness: Endian,
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub struct Magic([u8; 4]);
-
-impl Magic {
- /// Magic number for a regular system file.
- pub const SAV: Magic = Magic(*b"$FL2");
-
- /// Magic number for a system file that contains zlib-compressed data.
- pub const ZSAV: Magic = Magic(*b"$FL3");
-
- /// Magic number for an EBDIC-encoded system file. This is `$FL2` encoded
- /// in EBCDIC.
- pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
-}
-
-impl TryFrom<[u8; 4]> for Magic {
- type Error = Error;
-
- fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
- let magic = Magic(value);
- match magic {
- Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic),
- _ => Err(Error::BadMagic(value)),
- }
- }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub enum VarType {
- Number,
- String,
-}
-
-impl VarType {
- fn from_width(width: i32) -> VarType {
- match width {
- 0 => VarType::Number,
- _ => VarType::String,
- }
- }
-}
-
-pub struct Reader<R: Read + Seek> {
- r: BufReader<R>,
- var_types: Vec<VarType>,
- state: ReaderState,
-}
-
-trait State {
- fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
-}
-
-struct Start<R: Read + Seek> {
- r: BufReader<R>,
-}
-
-impl<R: Read + Seek + 'static> State for Start<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let header = read_header(&mut self.r)?;
- Ok(Some((Record::Header(header), self)))
- }
-}
-
-struct Headers<R: Read + Seek> {
- reader: BufReader<R>,
- endian: Endian,
- compression: Option<Compression>,
- var_types: Vec<VarType>,
-}
-
-impl<R: Read + Seek + 'static> State for Headers<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let rec_type: u32 = self.endian.parse(read_bytes(&mut self.reader)?);
- let record = match rec_type {
- 2 => {
- let variable = read_variable_record(&mut self.reader, self.endian)?;
- self.var_types.push(VarType::from_width(variable.width));
- Record::Variable(variable)
- }
- 3 => Record::ValueLabel(read_value_label_record(&mut self.reader, self.endian)?),
- 4 => Record::VarIndexes(read_var_indexes_record(&mut self.reader, self.endian)?),
- 6 => Record::Document(read_document_record(&mut self.reader, self.endian)?),
- 7 => Record::Extension(read_extension_record(&mut self.reader, self.endian)?),
- 999 => {
- let _: [u8; 4] = read_bytes(&mut self.reader)?;
- let next_state: Box<dyn State> = match self.compression {
- None => Box::new(Data {
- reader: self.reader,
- endian: self.endian,
- var_types: self.var_types,
- }),
- Some(Compression::Simple) => Box::new(CompressedData {
- reader: self.reader,
- endian: self.endian,
- var_types: self.var_types,
- codes: VecDeque::new(),
- }),
- Some(Compression::ZLib) => Box::new(ZlibData {
- reader: ZlibDecodeMultiple::new(self.reader),
- endian: self.endian,
- var_types: self.var_types,
- codes: VecDeque::new(),
- }),
- };
- return Ok(Some((Record::EndOfHeaders, next_state)));
- }
- _ => {
- return Err(Error::BadRecordType {
- offset: self.reader.stream_position()?,
- rec_type,
- })
- }
- };
- Ok(Some((record, self)))
- }
-}
-
-struct Data<R: Read + Seek> {
- reader: BufReader<R>,
- endian: Endian,
- var_types: Vec<VarType>,
-}
-
-impl<R: Read + Seek + 'static> State for Data<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let case_start = self.reader.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- for (i, &var_type) in self.var_types.iter().enumerate() {
- let Some(raw) = try_read_bytes(&mut self.reader)? else {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.reader.stream_position()?;
- return Err(Error::EofInCase {
- offset,
- case_ofs: offset - case_start,
- case_len: self.var_types.len() * 8,
- });
- }
- };
- values.push(Value::from_raw(var_type, raw, self.endian));
- }
- Ok(Some((Record::Case(values), self)))
- }
-}
-
-struct CompressedData<R: Read + Seek> {
- reader: BufReader<R>,
- endian: Endian,
- var_types: Vec<VarType>,
- codes: VecDeque<u8>,
-}
-
-fn read_compressed_data<R>(
- reader: &mut R,
- endian: Endian,
- var_types: &Vec<VarType>,
- codes: &mut VecDeque<u8>,
-) -> Result<Option<Record>, Error>
-where
- R: Read + Seek,
-{
- let case_start = reader.stream_position()?;
- let mut values = Vec::with_capacity(var_types.len());
- let bias = 100.0; // XXX
- for (i, &var_type) in var_types.iter().enumerate() {
- let value = loop {
- let Some(code) = codes.pop_front() else {
- let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = reader.stream_position()?;
- return Err(Error::EofInCompressedCase {
- offset,
- case_ofs: offset - case_start,
- });
- }
- };
- codes.extend(new_codes.into_iter());
- continue;
- };
- match code {
- 0 => (),
- 1..=251 => match var_type {
- VarType::Number => break Value::Number(Some(code as f64 - bias)),
- VarType::String => break Value::String(endian.to_bytes(code as f64 - bias)),
- },
- 252 => {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = reader.stream_position()?;
- return Err(Error::PartialCompressedCase {
- offset,
- case_ofs: offset - case_start,
- });
- }
- }
- 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
- 254 => match var_type {
- VarType::String => break Value::String(*b" "), // XXX EBCDIC
- VarType::Number => {
- return Err(Error::CompressedStringExpected {
- offset: case_start,
- case_ofs: reader.stream_position()? - case_start,
- })
- }
- },
- 255 => match var_type {
- VarType::Number => break Value::Number(None),
- VarType::String => {
- return Err(Error::CompressedNumberExpected {
- offset: case_start,
- case_ofs: reader.stream_position()? - case_start,
- })
- }
- },
- }
- };
- values.push(value);
- }
- Ok(Some(Record::Case(values)))
-}
-
-impl<R: Read + Seek + 'static> State for CompressedData<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- match read_compressed_data(
- &mut self.reader,
- self.endian,
- &self.var_types,
- &mut self.codes,
- )? {
- None => Ok(None),
- Some(record) => Ok(Some((record, self))),
- }
- }
-}
-
-struct ZlibData<R: Read + Seek> {
- reader: ZlibDecodeMultiple<R>,
- endian: Endian,
- var_types: Vec<VarType>,
- codes: VecDeque<u8>,
-}
-
-impl<R: Read + Seek + 'static> State for ZlibData<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- match read_compressed_data(
- &mut self.reader,
- self.endian,
- &self.var_types,
- &mut self.codes,
- )? {
- None => Ok(None),
- Some(record) => Ok(Some((record, self))),
- }
- }
-}
-
-struct ZlibDecodeMultiple<R>
-where
- R: Read + Seek,
-{
- reader: Option<ZlibDecoder<BufReader<R>>>,
-}
-
-impl<R> ZlibDecodeMultiple<R>
-where
- R: Read + Seek,
-{
- fn new(reader: BufReader<R>) -> ZlibDecodeMultiple<R> {
- ZlibDecodeMultiple {
- reader: Some(ZlibDecoder::new(reader)),
- }
- }
-}
-
-impl<R> Read for ZlibDecodeMultiple<R>
-where
- R: Read + Seek,
-{
- fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
- loop {
- match self.reader.as_mut().unwrap().read(buf)? {
- 0 => {
- let inner = self.reader.take().unwrap().into_inner();
- self.reader = Some(ZlibDecoder::new(inner));
- }
- n => return Ok(n),
- };
- }
- }
-}
-
-impl<R> Seek for ZlibDecodeMultiple<R>
-where
- R: Read + Seek,
-{
- fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
- unimplemented!();
- }
-}
-
-/*
-impl<R> BufRead for ZlibDecodeMultiple<R>
-where
- R: Read + Seek,
-{
- fn fill_buf(&mut self) -> Result<&[u8], IoError> {
- self.reader.as_mut().unwrap().fill_buf()
- }
- fn consume(&mut self, amt: usize) {
- self.reader.as_mut().unwrap().consume(amt)
- }
-}*/
-
-enum ReaderState {
- Start,
- Headers(Endian, Option<Compression>),
- Data(Endian),
- CompressedData(Endian, VecDeque<u8>),
- ZHeader(Endian),
- ZTrailer {
- endian: Endian,
- ztrailer_ofs: u64,
- ztrailer_len: u64,
- },
- //ZData,
- End,
-}
-
-#[derive(Copy, Clone)]
-pub enum Value {
- Number(Option<f64>),
- String([u8; 8]),
-}
-
-impl Value {
- pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
- match var_type {
- VarType::String => Value::String(raw),
- VarType::Number => {
- let number: f64 = endian.parse(raw);
- Value::Number((number != -f64::MAX).then_some(number))
- }
- }
- }
-}
-
-impl<R: Read + Seek> Reader<R> {
- pub fn new(r: R) -> Result<Reader<R>, Error> {
- Ok(Reader {
- r: BufReader::new(r),
- var_types: Vec::new(),
- state: ReaderState::Start,
- })
- }
- fn _next(&mut self) -> Result<Option<Record>, Error> {
- match self.state {
- ReaderState::Start => {
- let header = read_header(&mut self.r)?;
- self.state = ReaderState::Headers(header.endianness, header.compression);
- Ok(Some(Record::Header(header)))
- }
- ReaderState::Headers(endian, compression) => {
- let rec_type: u32 = endian.parse(read_bytes(&mut self.r)?);
- let record = match rec_type {
- 2 => {
- let variable = read_variable_record(&mut self.r, endian)?;
- self.var_types.push(VarType::from_width(variable.width));
- Record::Variable(variable)
- }
- 3 => Record::ValueLabel(read_value_label_record(&mut self.r, endian)?),
- 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, endian)?),
- 6 => Record::Document(read_document_record(&mut self.r, endian)?),
- 7 => Record::Extension(read_extension_record(&mut self.r, endian)?),
- 999 => {
- let _: [u8; 4] = read_bytes(&mut self.r)?;
- self.state = match compression {
- None => ReaderState::Data(endian),
- Some(Compression::Simple) => {
- ReaderState::CompressedData(endian, VecDeque::new())
- }
- Some(Compression::ZLib) => ReaderState::ZHeader(endian),
- };
- return Ok(Some(Record::EndOfHeaders));
- }
- _ => {
- return Err(Error::BadRecordType {
- offset: self.r.stream_position()?,
- rec_type,
- })
- }
- };
- Ok(Some(record))
- }
- ReaderState::Data(endian) => {
- let case_start = self.r.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- for (i, &var_type) in self.var_types.iter().enumerate() {
- let Some(raw) = try_read_bytes(&mut self.r)? else {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.r.stream_position()?;
- return Err(Error::EofInCase {
- offset,
- case_ofs: offset - case_start,
- case_len: self.var_types.len() * 8,
- });
- }
- };
- values.push(Value::from_raw(var_type, raw, endian));
- }
- Ok(Some(Record::Case(values)))
- }
- ReaderState::CompressedData(endian, ref mut codes) => {
- let case_start = self.r.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- let bias = 100.0; // XXX
- for (i, &var_type) in self.var_types.iter().enumerate() {
- let value = loop {
- let Some(code) = codes.pop_front() else {
- let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.r)?
- else {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.r.stream_position()?;
- return Err(Error::EofInCompressedCase {
- offset,
- case_ofs: offset - case_start,
- });
- }
- };
- codes.extend(new_codes.into_iter());
- continue;
- };
- match code {
- 0 => (),
- 1..=251 => match var_type {
- VarType::Number => break Value::Number(Some(code as f64 - bias)),
- VarType::String => {
- break Value::String(endian.to_bytes(code as f64 - bias))
- }
- },
- 252 => {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.r.stream_position()?;
- return Err(Error::PartialCompressedCase {
- offset,
- case_ofs: offset - case_start,
- });
- }
- }
- 253 => {
- break Value::from_raw(var_type, read_bytes(&mut self.r)?, endian)
- }
- 254 => match var_type {
- VarType::String => break Value::String(*b" "), // XXX EBCDIC
- VarType::Number => {
- return Err(Error::CompressedStringExpected {
- offset: case_start,
- case_ofs: self.r.stream_position()? - case_start,
- })
- }
- },
- 255 => match var_type {
- VarType::Number => break Value::Number(None),
- VarType::String => {
- return Err(Error::CompressedNumberExpected {
- offset: case_start,
- case_ofs: self.r.stream_position()? - case_start,
- })
- }
- },
- }
- };
- values.push(value);
- }
- Ok(Some(Record::Case(values)))
- }
- ReaderState::ZHeader(endian) => {
- let zheader = read_zheader(&mut self.r, endian)?;
- self.state = ReaderState::ZTrailer {
- endian,
- ztrailer_ofs: zheader.ztrailer_offset,
- ztrailer_len: zheader.ztrailer_len,
- };
- Ok(Some(Record::ZHeader(zheader)))
- }
- ReaderState::ZTrailer {
- endian,
- ztrailer_ofs,
- ztrailer_len,
- } => {
- //self.state = ReaderState::ZData;
- match read_ztrailer(&mut self.r, endian, ztrailer_ofs, ztrailer_len)? {
- Some(ztrailer) => Ok(Some(Record::ZTrailer(ztrailer))),
- None => self._next(),
- }
- }
- /*
- ReaderState::ZData(zlib_decoder) => {
- let zlib_decoder = zlib_decoder.unwrap_or_else(
- },
- */
- ReaderState::End => Ok(None),
- }
- }
-}
-
-impl<R: Read + Seek> Iterator for Reader<R> {
- type Item = Result<Record, Error>;
-
- fn next(&mut self) -> Option<Self::Item> {
- let retval = self._next();
- match retval {
- Ok(None) => {
- self.state = ReaderState::End;
- None
- }
- Ok(Some(record)) => Some(Ok(record)),
- Err(error) => {
- self.state = ReaderState::End;
- Some(Err(error))
- }
- }
- }
-}
-
-fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
- let magic: [u8; 4] = read_bytes(r)?;
- let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;