#![allow(unused_variables)]
use endian::{Endian, Parse, ToBytes};
-//use flate2::bufread::ZlibDecoder;
+use flate2::bufread::ZlibDecoder;
use num::Integer;
use num_derive::FromPrimitive;
use std::{
CompressedStringExpected { offset: u64, case_ofs: u64 },
#[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
- BadZlibTrailerNBlocks { offset: u64, n_blocks: u32, expected_n_blocks: u64, ztrailer_len: u64 }
+ BadZlibTrailerNBlocks {
+ offset: u64,
+ n_blocks: u32,
+ expected_n_blocks: u64,
+ ztrailer_len: u64,
+ },
}
#[derive(Error, Debug)]
}
}
-pub struct Reader<R: Read> {
+pub struct Reader<R: Read + Seek> {
r: BufReader<R>,
var_types: Vec<VarType>,
state: ReaderState,
}
+trait State {
+ fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
+}
+
+struct Start<R: Read + Seek> {
+ r: BufReader<R>,
+}
+
+impl<R: Read + Seek + 'static> State for Start<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let header = read_header(&mut self.r)?;
+ Ok(Some((Record::Header(header), self)))
+ }
+}
+
+struct Headers<R: Read + Seek> {
+ reader: BufReader<R>,
+ endian: Endian,
+ compression: Option<Compression>,
+ var_types: Vec<VarType>,
+}
+
+impl<R: Read + Seek + 'static> State for Headers<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let rec_type: u32 = self.endian.parse(read_bytes(&mut self.reader)?);
+ let record = match rec_type {
+ 2 => {
+ let variable = read_variable_record(&mut self.reader, self.endian)?;
+ self.var_types.push(VarType::from_width(variable.width));
+ Record::Variable(variable)
+ }
+ 3 => Record::ValueLabel(read_value_label_record(&mut self.reader, self.endian)?),
+ 4 => Record::VarIndexes(read_var_indexes_record(&mut self.reader, self.endian)?),
+ 6 => Record::Document(read_document_record(&mut self.reader, self.endian)?),
+ 7 => Record::Extension(read_extension_record(&mut self.reader, self.endian)?),
+ 999 => {
+ let _: [u8; 4] = read_bytes(&mut self.reader)?;
+ let next_state: Box<dyn State> = match self.compression {
+ None => Box::new(Data {
+ reader: self.reader,
+ endian: self.endian,
+ var_types: self.var_types,
+ }),
+ Some(Compression::Simple) => Box::new(CompressedData {
+ reader: self.reader,
+ endian: self.endian,
+ var_types: self.var_types,
+ codes: VecDeque::new(),
+ }),
+ Some(Compression::ZLib) => Box::new(ZlibData {
+ reader: ZlibDecodeMultiple::new(self.reader),
+ endian: self.endian,
+ var_types: self.var_types,
+ codes: VecDeque::new(),
+ }),
+ };
+ return Ok(Some((Record::EndOfHeaders, next_state)));
+ }
+ _ => {
+ return Err(Error::BadRecordType {
+ offset: self.reader.stream_position()?,
+ rec_type,
+ })
+ }
+ };
+ Ok(Some((record, self)))
+ }
+}
+
+struct Data<R: Read + Seek> {
+ reader: BufReader<R>,
+ endian: Endian,
+ var_types: Vec<VarType>,
+}
+
+impl<R: Read + Seek + 'static> State for Data<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let case_start = self.reader.stream_position()?;
+ let mut values = Vec::with_capacity(self.var_types.len());
+ for (i, &var_type) in self.var_types.iter().enumerate() {
+ let Some(raw) = try_read_bytes(&mut self.reader)? else {
+ if i == 0 {
+ return Ok(None);
+ } else {
+ let offset = self.reader.stream_position()?;
+ return Err(Error::EofInCase {
+ offset,
+ case_ofs: offset - case_start,
+ case_len: self.var_types.len() * 8,
+ });
+ }
+ };
+ values.push(Value::from_raw(var_type, raw, self.endian));
+ }
+ Ok(Some((Record::Case(values), self)))
+ }
+}
+
+struct CompressedData<R: Read + Seek> {
+ reader: BufReader<R>,
+ endian: Endian,
+ var_types: Vec<VarType>,
+ codes: VecDeque<u8>,
+}
+
+fn read_compressed_data<R>(
+ reader: &mut R,
+ endian: Endian,
+ var_types: &Vec<VarType>,
+ codes: &mut VecDeque<u8>,
+) -> Result<Option<Record>, Error>
+where
+ R: Read + Seek,
+{
+ let case_start = reader.stream_position()?;
+ let mut values = Vec::with_capacity(var_types.len());
+ let bias = 100.0; // XXX
+ for (i, &var_type) in var_types.iter().enumerate() {
+ let value = loop {
+ let Some(code) = codes.pop_front() else {
+ let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
+ if i == 0 {
+ return Ok(None);
+ } else {
+ let offset = reader.stream_position()?;
+ return Err(Error::EofInCompressedCase {
+ offset,
+ case_ofs: offset - case_start,
+ });
+ }
+ };
+ codes.extend(new_codes.into_iter());
+ continue;
+ };
+ match code {
+ 0 => (),
+ 1..=251 => match var_type {
+ VarType::Number => break Value::Number(Some(code as f64 - bias)),
+ VarType::String => break Value::String(endian.to_bytes(code as f64 - bias)),
+ },
+ 252 => {
+ if i == 0 {
+ return Ok(None);
+ } else {
+ let offset = reader.stream_position()?;
+ return Err(Error::PartialCompressedCase {
+ offset,
+ case_ofs: offset - case_start,
+ });
+ }
+ }
+ 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
+ 254 => match var_type {
+ VarType::String => break Value::String(*b" "), // XXX EBCDIC
+ VarType::Number => {
+ return Err(Error::CompressedStringExpected {
+ offset: case_start,
+ case_ofs: reader.stream_position()? - case_start,
+ })
+ }
+ },
+ 255 => match var_type {
+ VarType::Number => break Value::Number(None),
+ VarType::String => {
+ return Err(Error::CompressedNumberExpected {
+ offset: case_start,
+ case_ofs: reader.stream_position()? - case_start,
+ })
+ }
+ },
+ }
+ };
+ values.push(value);
+ }
+ Ok(Some(Record::Case(values)))
+}
+
+impl<R: Read + Seek + 'static> State for CompressedData<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ match read_compressed_data(
+ &mut self.reader,
+ self.endian,
+ &self.var_types,
+ &mut self.codes,
+ )? {
+ None => Ok(None),
+ Some(record) => Ok(Some((record, self))),
+ }
+ }
+}
+
+struct ZlibData<R: Read + Seek> {
+ reader: ZlibDecodeMultiple<R>,
+ endian: Endian,
+ var_types: Vec<VarType>,
+ codes: VecDeque<u8>,
+}
+
+impl<R: Read + Seek + 'static> State for ZlibData<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ match read_compressed_data(
+ &mut self.reader,
+ self.endian,
+ &self.var_types,
+ &mut self.codes,
+ )? {
+ None => Ok(None),
+ Some(record) => Ok(Some((record, self))),
+ }
+ }
+}
+
+struct ZlibDecodeMultiple<R>
+where
+ R: Read + Seek,
+{
+ reader: Option<ZlibDecoder<BufReader<R>>>,
+}
+
+impl<R> ZlibDecodeMultiple<R>
+where
+ R: Read + Seek,
+{
+ fn new(reader: BufReader<R>) -> ZlibDecodeMultiple<R> {
+ ZlibDecodeMultiple {
+ reader: Some(ZlibDecoder::new(reader)),
+ }
+ }
+}
+
+impl<R> Read for ZlibDecodeMultiple<R>
+where
+ R: Read + Seek,
+{
+ fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
+ loop {
+ match self.reader.as_mut().unwrap().read(buf)? {
+ 0 => {
+ let inner = self.reader.take().unwrap().into_inner();
+ self.reader = Some(ZlibDecoder::new(inner));
+ }
+ n => return Ok(n),
+ };
+ }
+ }
+}
+
+impl<R> Seek for ZlibDecodeMultiple<R>
+where
+ R: Read + Seek,
+{
+ fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
+ unimplemented!();
+ }
+}
+
+/*
+impl<R> BufRead for ZlibDecodeMultiple<R>
+where
+ R: Read + Seek,
+{
+ fn fill_buf(&mut self) -> Result<&[u8], IoError> {
+ self.reader.as_mut().unwrap().fill_buf()
+ }
+ fn consume(&mut self, amt: usize) {
+ self.reader.as_mut().unwrap().consume(amt)
+ }
+}*/
+
enum ReaderState {
Start,
Headers(Endian, Option<Compression>),
Data(Endian),
CompressedData(Endian, VecDeque<u8>),
ZHeader(Endian),
- ZTrailer { endian: Endian, ztrailer_ofs: u64, ztrailer_len: u64 },
+ ZTrailer {
+ endian: Endian,
+ ztrailer_ofs: u64,
+ ztrailer_len: u64,
+ },
//ZData,
End,
}
return Ok(None);
} else {
let offset = self.r.stream_position()?;
- return Err(Error::EofInCase { offset, case_ofs: offset - case_start, case_len: self.var_types.len() * 8});
+ return Err(Error::EofInCase {
+ offset,
+ case_ofs: offset - case_start,
+ case_len: self.var_types.len() * 8,
+ });
}
};
values.push(Value::from_raw(var_type, raw, endian));
for (i, &var_type) in self.var_types.iter().enumerate() {
let value = loop {
let Some(code) = codes.pop_front() else {
- let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.r)? else {
+ let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.r)?
+ else {
if i == 0 {
return Ok(None);
} else {
let offset = self.r.stream_position()?;
- return Err(Error::EofInCompressedCase { offset, case_ofs: offset - case_start});
+ return Err(Error::EofInCompressedCase {
+ offset,
+ case_ofs: offset - case_start,
+ });
}
};
codes.extend(new_codes.into_iter());
}
ReaderState::ZHeader(endian) => {
let zheader = read_zheader(&mut self.r, endian)?;
- self.state = ReaderState::ZTrailer { endian, ztrailer_ofs: zheader.ztrailer_offset, ztrailer_len: zheader.ztrailer_len};
+ self.state = ReaderState::ZTrailer {
+ endian,
+ ztrailer_ofs: zheader.ztrailer_offset,
+ ztrailer_len: zheader.ztrailer_len,
+ };
Ok(Some(Record::ZHeader(zheader)))
}
- ReaderState::ZTrailer { endian, ztrailer_ofs, ztrailer_len } => {
+ ReaderState::ZTrailer {
+ endian,
+ ztrailer_ofs,
+ ztrailer_len,
+ } => {
//self.state = ReaderState::ZData;
match read_ztrailer(&mut self.r, endian, ztrailer_ofs, ztrailer_len)? {
- Some(ztrailer) => {
- Ok(Some(Record::ZTrailer(ztrailer)))
- },
- None => self._next()
+ Some(ztrailer) => Ok(Some(Record::ZTrailer(ztrailer))),
+ None => self._next(),
}
}
-/*
- ReaderState::ZData(zlib_decoder) => {
- let zlib_decoder = zlib_decoder.unwrap_or_else(
- },
-*/
+ /*
+ ReaderState::ZData(zlib_decoder) => {
+ let zlib_decoder = zlib_decoder.unwrap_or_else(
+ },
+ */
ReaderState::End => Ok(None),
}
}
pub compressed_size: u32,
}
-fn read_ztrailer<R: Read + Seek>(r: &mut BufReader<R>, endian: Endian, ztrailer_ofs: u64, ztrailer_len: u64) -> Result<Option<ZTrailer>, Error> {
+fn read_ztrailer<R: Read + Seek>(
+ r: &mut BufReader<R>,
+ endian: Endian,
+ ztrailer_ofs: u64,
+ ztrailer_len: u64,
+) -> Result<Option<ZTrailer>, Error> {
let start_offset = r.stream_position()?;
if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
- return Ok(None)
+ return Ok(None);
}
let int_bias = endian.parse(read_bytes(r)?);
let zero = endian.parse(read_bytes(r)?);
let n_blocks: u32 = endian.parse(read_bytes(r)?);
let expected_n_blocks = (ztrailer_len - 24) / 24;
if n_blocks as u64 != expected_n_blocks {
- return Err(Error::BadZlibTrailerNBlocks { offset: ztrailer_ofs, n_blocks, expected_n_blocks, ztrailer_len })
+ return Err(Error::BadZlibTrailerNBlocks {
+ offset: ztrailer_ofs,
+ n_blocks,
+ expected_n_blocks,
+ ztrailer_len,
+ });
}
let mut blocks = Vec::with_capacity(n_blocks as usize);
for _ in 0..n_blocks {
let compressed_ofs = endian.parse(read_bytes(r)?);
let uncompressed_size = endian.parse(read_bytes(r)?);
let compressed_size = endian.parse(read_bytes(r)?);
- blocks.push(ZBlock { uncompressed_ofs, compressed_ofs, uncompressed_size, compressed_size });
+ blocks.push(ZBlock {
+ uncompressed_ofs,
+ compressed_ofs,
+ uncompressed_size,
+ compressed_size,
+ });
}
r.seek(SeekFrom::Start(start_offset))?;
- Ok(Some(ZTrailer { offset: ztrailer_ofs, int_bias, zero, block_size, blocks }))
+ Ok(Some(ZTrailer {
+ offset: ztrailer_ofs,
+ int_bias,
+ zero,
+ block_size,
+ blocks,
+ }))
}
fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {