#![allow(unused_variables)]
use endian::{Endian, Parse, ToBytes};
-use flate2::bufread::ZlibDecoder;
+use flate2::read::ZlibDecoder;
use num::Integer;
use num_derive::FromPrimitive;
use std::{
collections::VecDeque,
- io::{BufReader, Error as IoError, Read, Seek, SeekFrom},
+ io::{Error as IoError, Read, Seek, SeekFrom},
+ iter::FusedIterator,
};
use thiserror::Error;
pub file_label: [u8; 64],
/// Endianness of the data in the file header.
- pub endianness: Endian,
+ pub endian: Endian,
}
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
}
}
-pub struct Reader<R: Read + Seek> {
- r: BufReader<R>,
- var_types: Vec<VarType>,
- state: ReaderState,
-}
-
trait State {
fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
}
struct Start<R: Read + Seek> {
- r: BufReader<R>,
+ reader: R,
+}
+
+struct CommonState<R: Read + Seek> {
+ reader: R,
+ endian: Endian,
+ bias: f64,
+ compression: Option<Compression>,
+ var_types: Vec<VarType>,
}
impl<R: Read + Seek + 'static> State for Start<R> {
fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let header = read_header(&mut self.r)?;
- Ok(Some((Record::Header(header), self)))
+ let header = read_header(&mut self.reader)?;
+ let next_state = Headers(CommonState {
+ reader: self.reader,
+ endian: header.endian,
+ bias: header.bias,
+ compression: header.compression,
+ var_types: Vec::new(),
+ });
+ Ok(Some((Record::Header(header), Box::new(next_state))))
}
}
-struct Headers<R: Read + Seek> {
- reader: BufReader<R>,
- endian: Endian,
- compression: Option<Compression>,
- var_types: Vec<VarType>,
-}
+struct Headers<R: Read + Seek>(CommonState<R>);
impl<R: Read + Seek + 'static> State for Headers<R> {
fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let rec_type: u32 = self.endian.parse(read_bytes(&mut self.reader)?);
+ let endian = self.0.endian;
+ let rec_type: u32 = endian.parse(read_bytes(&mut self.0.reader)?);
let record = match rec_type {
2 => {
- let variable = read_variable_record(&mut self.reader, self.endian)?;
- self.var_types.push(VarType::from_width(variable.width));
+ let variable = read_variable_record(&mut self.0.reader, endian)?;
+ self.0.var_types.push(VarType::from_width(variable.width));
Record::Variable(variable)
}
- 3 => Record::ValueLabel(read_value_label_record(&mut self.reader, self.endian)?),
- 4 => Record::VarIndexes(read_var_indexes_record(&mut self.reader, self.endian)?),
- 6 => Record::Document(read_document_record(&mut self.reader, self.endian)?),
- 7 => Record::Extension(read_extension_record(&mut self.reader, self.endian)?),
+ 3 => Record::ValueLabel(read_value_label_record(&mut self.0.reader, endian)?),
+ 4 => Record::VarIndexes(read_var_indexes_record(&mut self.0.reader, endian)?),
+ 6 => Record::Document(read_document_record(&mut self.0.reader, endian)?),
+ 7 => Record::Extension(read_extension_record(&mut self.0.reader, endian)?),
999 => {
- let _: [u8; 4] = read_bytes(&mut self.reader)?;
- let next_state: Box<dyn State> = match self.compression {
- None => Box::new(Data {
- reader: self.reader,
- endian: self.endian,
- var_types: self.var_types,
- }),
- Some(Compression::Simple) => Box::new(CompressedData {
- reader: self.reader,
- endian: self.endian,
- var_types: self.var_types,
- codes: VecDeque::new(),
- }),
- Some(Compression::ZLib) => Box::new(CompressedData {
- reader: ZlibDecodeMultiple::new(self.reader),
- endian: self.endian,
- var_types: self.var_types,
- codes: VecDeque::new(),
- }),
+ let _: [u8; 4] = read_bytes(&mut self.0.reader)?;
+ let next_state: Box<dyn State> = match self.0.compression {
+ None => Box::new(Data(self.0)),
+ Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
+ Some(Compression::ZLib) => Box::new(CompressedData::new(CommonState {
+ reader: ZlibDecodeMultiple::new(self.0.reader),
+ endian: self.0.endian,
+ bias: self.0.bias,
+ compression: self.0.compression,
+ var_types: self.0.var_types
+ })),
};
return Ok(Some((Record::EndOfHeaders, next_state)));
}
_ => {
return Err(Error::BadRecordType {
- offset: self.reader.stream_position()?,
+ offset: self.0.reader.stream_position()?,
rec_type,
})
}
}
}
-struct Data<R: Read + Seek> {
- reader: BufReader<R>,
- endian: Endian,
- var_types: Vec<VarType>,
-}
+struct Data<R: Read + Seek>(CommonState<R>);
impl<R: Read + Seek + 'static> State for Data<R> {
fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let case_start = self.reader.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- for (i, &var_type) in self.var_types.iter().enumerate() {
- let Some(raw) = try_read_bytes(&mut self.reader)? else {
+ let case_start = self.0.reader.stream_position()?;
+ let mut values = Vec::with_capacity(self.0.var_types.len());
+ for (i, &var_type) in self.0.var_types.iter().enumerate() {
+ let Some(raw) = try_read_bytes(&mut self.0.reader)? else {
if i == 0 {
return Ok(None);
} else {
- let offset = self.reader.stream_position()?;
+ let offset = self.0.reader.stream_position()?;
return Err(Error::EofInCase {
offset,
case_ofs: offset - case_start,
- case_len: self.var_types.len() * 8,
+ case_len: self.0.var_types.len() * 8,
});
}
};
- values.push(Value::from_raw(var_type, raw, self.endian));
+ values.push(Value::from_raw(var_type, raw, self.0.endian));
}
Ok(Some((Record::Case(values), self)))
}
}
struct CompressedData<R: Read + Seek> {
- reader: R,
- endian: Endian,
- var_types: Vec<VarType>,
+ common: CommonState<R>,
codes: VecDeque<u8>,
}
+impl<R: Read + Seek + 'static> CompressedData<R> {
+ fn new(common: CommonState<R>) -> CompressedData<R> {
+ CompressedData { common, codes: VecDeque::new() }
+ }
+}
+
impl<R: Read + Seek + 'static> State for CompressedData<R> {
fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let case_start = self.reader.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- let bias = 100.0; // XXX
- for (i, &var_type) in self.var_types.iter().enumerate() {
+ let case_start = self.common.reader.stream_position()?;
+ let mut values = Vec::with_capacity(self.common.var_types.len());
+ for (i, &var_type) in self.common.var_types.iter().enumerate() {
let value = loop {
let Some(code) = self.codes.pop_front() else {
- let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.reader)? else {
+ let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.common.reader)?
+ else {
if i == 0 {
return Ok(None);
} else {
- let offset = self.reader.stream_position()?;
+ let offset = self.common.reader.stream_position()?;
return Err(Error::EofInCompressedCase {
offset,
case_ofs: offset - case_start,
match code {
0 => (),
1..=251 => match var_type {
- VarType::Number => break Value::Number(Some(code as f64 - bias)),
- VarType::String => break Value::String(self.endian.to_bytes(code as f64 - bias)),
+ VarType::Number => break Value::Number(Some(code as f64 - self.common.bias)),
+ VarType::String => {
+ break Value::String(self.common.endian.to_bytes(code as f64 - self.common.bias))
+ }
},
252 => {
if i == 0 {
return Ok(None);
} else {
- let offset = self.reader.stream_position()?;
+ let offset = self.common.reader.stream_position()?;
return Err(Error::PartialCompressedCase {
offset,
case_ofs: offset - case_start,
});
}
}
- 253 => break Value::from_raw(var_type, read_bytes(&mut self.reader)?, self.endian),
+ 253 => {
+ break Value::from_raw(
+ var_type,
+ read_bytes(&mut self.common.reader)?,
+ self.common.endian,
+ )
+ }
254 => match var_type {
VarType::String => break Value::String(*b" "), // XXX EBCDIC
VarType::Number => {
return Err(Error::CompressedStringExpected {
offset: case_start,
- case_ofs: self.reader.stream_position()? - case_start,
+ case_ofs: self.common.reader.stream_position()? - case_start,
})
}
},
VarType::String => {
return Err(Error::CompressedNumberExpected {
offset: case_start,
- case_ofs: self.reader.stream_position()? - case_start,
+ case_ofs: self.common.reader.stream_position()? - case_start,
})
}
},
where
R: Read + Seek,
{
- reader: Option<ZlibDecoder<BufReader<R>>>,
+ reader: Option<ZlibDecoder<R>>,
}
impl<R> ZlibDecodeMultiple<R>
where
R: Read + Seek,
{
- fn new(reader: BufReader<R>) -> ZlibDecodeMultiple<R> {
+ fn new(reader: R) -> ZlibDecodeMultiple<R> {
ZlibDecodeMultiple {
reader: Some(ZlibDecoder::new(reader)),
}
}
}
-/*
-impl<R> BufRead for ZlibDecodeMultiple<R>
-where
- R: Read + Seek,
-{
- fn fill_buf(&mut self) -> Result<&[u8], IoError> {
- self.reader.as_mut().unwrap().fill_buf()
- }
- fn consume(&mut self, amt: usize) {
- self.reader.as_mut().unwrap().consume(amt)
- }
-}*/
-
-enum ReaderState {
- Start,
- Headers(Endian, Option<Compression>),
- Data(Endian),
- CompressedData(Endian, VecDeque<u8>),
- ZHeader(Endian),
- ZTrailer {
- endian: Endian,
- ztrailer_ofs: u64,
- ztrailer_len: u64,
- },
- //ZData,
- End,
-}
-
#[derive(Copy, Clone)]
pub enum Value {
Number(Option<f64>),
}
}
-impl<R: Read + Seek> Reader<R> {
- pub fn new(r: R) -> Result<Reader<R>, Error> {
+pub struct Reader {
+ state: Option<Box<dyn State>>,
+}
+
+impl Reader {
+ pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
Ok(Reader {
- r: BufReader::new(r),
- var_types: Vec::new(),
- state: ReaderState::Start,
+ state: Some(Box::new(Start { reader })),
})
}
- fn _next(&mut self) -> Result<Option<Record>, Error> {
- match self.state {
- ReaderState::Start => {
- let header = read_header(&mut self.r)?;
- self.state = ReaderState::Headers(header.endianness, header.compression);
- Ok(Some(Record::Header(header)))
- }
- ReaderState::Headers(endian, compression) => {
- let rec_type: u32 = endian.parse(read_bytes(&mut self.r)?);
- let record = match rec_type {
- 2 => {
- let variable = read_variable_record(&mut self.r, endian)?;
- self.var_types.push(VarType::from_width(variable.width));
- Record::Variable(variable)
- }
- 3 => Record::ValueLabel(read_value_label_record(&mut self.r, endian)?),
- 4 => Record::VarIndexes(read_var_indexes_record(&mut self.r, endian)?),
- 6 => Record::Document(read_document_record(&mut self.r, endian)?),
- 7 => Record::Extension(read_extension_record(&mut self.r, endian)?),
- 999 => {
- let _: [u8; 4] = read_bytes(&mut self.r)?;
- self.state = match compression {
- None => ReaderState::Data(endian),
- Some(Compression::Simple) => {
- ReaderState::CompressedData(endian, VecDeque::new())
- }
- Some(Compression::ZLib) => ReaderState::ZHeader(endian),
- };
- return Ok(Some(Record::EndOfHeaders));
- }
- _ => {
- return Err(Error::BadRecordType {
- offset: self.r.stream_position()?,
- rec_type,
- })
- }
- };
- Ok(Some(record))
- }
- ReaderState::Data(endian) => {
- let case_start = self.r.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- for (i, &var_type) in self.var_types.iter().enumerate() {
- let Some(raw) = try_read_bytes(&mut self.r)? else {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.r.stream_position()?;
- return Err(Error::EofInCase {
- offset,
- case_ofs: offset - case_start,
- case_len: self.var_types.len() * 8,
- });
- }
- };
- values.push(Value::from_raw(var_type, raw, endian));
- }
- Ok(Some(Record::Case(values)))
- }
- ReaderState::CompressedData(endian, ref mut codes) => {
- let case_start = self.r.stream_position()?;
- let mut values = Vec::with_capacity(self.var_types.len());
- let bias = 100.0; // XXX
- for (i, &var_type) in self.var_types.iter().enumerate() {
- let value = loop {
- let Some(code) = codes.pop_front() else {
- let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.r)?
- else {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.r.stream_position()?;
- return Err(Error::EofInCompressedCase {
- offset,
- case_ofs: offset - case_start,
- });
- }
- };
- codes.extend(new_codes.into_iter());
- continue;
- };
- match code {
- 0 => (),
- 1..=251 => match var_type {
- VarType::Number => break Value::Number(Some(code as f64 - bias)),
- VarType::String => {
- break Value::String(endian.to_bytes(code as f64 - bias))
- }
- },
- 252 => {
- if i == 0 {
- return Ok(None);
- } else {
- let offset = self.r.stream_position()?;
- return Err(Error::PartialCompressedCase {
- offset,
- case_ofs: offset - case_start,
- });
- }
- }
- 253 => {
- break Value::from_raw(var_type, read_bytes(&mut self.r)?, endian)
- }
- 254 => match var_type {
- VarType::String => break Value::String(*b" "), // XXX EBCDIC
- VarType::Number => {
- return Err(Error::CompressedStringExpected {
- offset: case_start,
- case_ofs: self.r.stream_position()? - case_start,
- })
- }
- },
- 255 => match var_type {
- VarType::Number => break Value::Number(None),
- VarType::String => {
- return Err(Error::CompressedNumberExpected {
- offset: case_start,
- case_ofs: self.r.stream_position()? - case_start,
- })
- }
- },
- }
- };
- values.push(value);
- }
- Ok(Some(Record::Case(values)))
- }
- ReaderState::ZHeader(endian) => {
- let zheader = read_zheader(&mut self.r, endian)?;
- self.state = ReaderState::ZTrailer {
- endian,
- ztrailer_ofs: zheader.ztrailer_offset,
- ztrailer_len: zheader.ztrailer_len,
- };
- Ok(Some(Record::ZHeader(zheader)))
- }
- ReaderState::ZTrailer {
- endian,
- ztrailer_ofs,
- ztrailer_len,
- } => {
- //self.state = ReaderState::ZData;
- match read_ztrailer(&mut self.r, endian, ztrailer_ofs, ztrailer_len)? {
- Some(ztrailer) => Ok(Some(Record::ZTrailer(ztrailer))),
- None => self._next(),
- }
- }
- /*
- ReaderState::ZData(zlib_decoder) => {
- let zlib_decoder = zlib_decoder.unwrap_or_else(
- },
- */
- ReaderState::End => Ok(None),
- }
- }
}
-impl<R: Read + Seek> Iterator for Reader<R> {
+impl Iterator for Reader {
type Item = Result<Record, Error>;
fn next(&mut self) -> Option<Self::Item> {
- let retval = self._next();
- match retval {
- Ok(None) => {
- self.state = ReaderState::End;
- None
- }
- Ok(Some(record)) => Some(Ok(record)),
- Err(error) => {
- self.state = ReaderState::End;
- Some(Err(error))
+ match self.state.take()?.read() {
+ Ok(Some((record, next_state))) => {
+ self.state = Some(next_state);
+ return Some(Ok(record));
}
+ Ok(None) => return None,
+ Err(error) => return Some(Err(error)),
}
}
}
+impl FusedIterator for Reader {}
+
fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
let magic: [u8; 4] = read_bytes(r)?;
let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
let eye_catcher: [u8; 60] = read_bytes(r)?;
let layout_code: [u8; 4] = read_bytes(r)?;
- let endianness = Endian::identify_u32(2, layout_code)
+ let endian = Endian::identify_u32(2, layout_code)
.or_else(|| Endian::identify_u32(2, layout_code))
.ok_or_else(|| Error::NotASystemFile)?;
- let layout_code = endianness.parse(layout_code);
+ let layout_code = endian.parse(layout_code);
- let nominal_case_size: u32 = endianness.parse(read_bytes(r)?);
+ let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
let nominal_case_size =
(nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
- let compression_code: u32 = endianness.parse(read_bytes(r)?);
+ let compression_code: u32 = endian.parse(read_bytes(r)?);
let compression = match (magic, compression_code) {
(Magic::ZSAV, 2) => Some(Compression::ZLib),
(Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
(_, code) => return Err(Error::InvalidSavCompression(code)),
};
- let weight_index: u32 = endianness.parse(read_bytes(r)?);
+ let weight_index: u32 = endian.parse(read_bytes(r)?);
let weight_index = (weight_index > 0).then_some(weight_index - 1);
- let n_cases: u32 = endianness.parse(read_bytes(r)?);
+ let n_cases: u32 = endian.parse(read_bytes(r)?);
let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
- let bias: f64 = endianness.parse(read_bytes(r)?);
+ let bias: f64 = endian.parse(read_bytes(r)?);
let creation_date: [u8; 9] = read_bytes(r)?;
let creation_time: [u8; 8] = read_bytes(r)?;
creation_time,
eye_catcher,
file_label,
- endianness,
+ endian,
})
}
pub label: Option<Vec<u8>>,
}
-fn read_variable_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- endian: Endian,
-) -> Result<Variable, Error> {
+fn read_variable_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
let offset = r.stream_position()?;
let width: i32 = endian.parse(read_bytes(r)?);
let has_variable_label: u32 = endian.parse(read_bytes(r)?);
pub const MAX: u32 = u32::MAX / 8;
}
-fn read_value_label_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- endian: Endian,
-) -> Result<ValueLabel, Error> {
+fn read_value_label_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
let offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
if n > ValueLabel::MAX {
pub const MAX: u32 = u32::MAX / 8;
}
-fn read_var_indexes_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- endian: Endian,
-) -> Result<VarIndexes, Error> {
+fn read_var_indexes_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
let offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
if n > VarIndexes::MAX {
pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
}
-fn read_document_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- endian: Endian,
-) -> Result<Document, Error> {
+fn read_document_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
let offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
match n {
}
}
-fn read_extension_record<R: Read + Seek>(
- r: &mut BufReader<R>,
- endian: Endian,
-) -> Result<Extension, Error> {
+fn read_extension_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Extension, Error> {
let subtype = endian.parse(read_bytes(r)?);
let offset = r.stream_position()?;
let size: u32 = endian.parse(read_bytes(r)?);
pub ztrailer_len: u64,
}
-fn read_zheader<R: Read + Seek>(r: &mut BufReader<R>, endian: Endian) -> Result<ZHeader, Error> {
+fn read_zheader<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
let offset = r.stream_position()?;
let zheader_offset: u64 = endian.parse(read_bytes(r)?);
let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
}
fn read_ztrailer<R: Read + Seek>(
- r: &mut BufReader<R>,
+ r: &mut R,
endian: Endian,
ztrailer_ofs: u64,
ztrailer_len: u64,
Ok(buf)
}
-fn read_vec<R: Read>(r: &mut BufReader<R>, n: usize) -> Result<Vec<u8>, IoError> {
+fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
let mut vec = vec![0; n];
r.read_exact(&mut vec)?;
Ok(vec)