use flate2::read::ZlibDecoder;
use num::Integer;
-use num_derive::FromPrimitive;
use std::{
collections::VecDeque,
io::{Error as IoError, Read, Seek, SeekFrom},
iter::FusedIterator,
};
+use self::state::State;
+
#[derive(Copy, Clone, Debug)]
pub enum Compression {
Simple,
ValueLabel(ValueLabel),
VarIndexes(VarIndexes),
Extension(Extension),
- EndOfHeaders,
+ EndOfHeaders(u32),
ZHeader(ZHeader),
ZTrailer(ZTrailer),
Case(Vec<Value>),
}
+impl Record {
+ fn read<R: Read + Seek>(reader: &mut R, endian: Endian) -> Result<Record, Error> {
+ let rec_type: u32 = endian.parse(read_bytes(reader)?);
+ match rec_type {
+ 2 => Ok(Record::Variable(Variable::read(reader, endian)?)),
+ 3 => Ok(Record::ValueLabel(ValueLabel::read(reader, endian)?)),
+ 4 => Ok(Record::VarIndexes(VarIndexes::read(reader, endian)?)),
+ 6 => Ok(Record::Document(Document::read(reader, endian)?)),
+ 7 => Ok(Record::Extension(Extension::read(reader, endian)?)),
+ 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))),
+ _ => Err(Error::BadRecordType {
+ offset: reader.stream_position()?,
+ rec_type,
+ }),
+ }
+ }
+}
+
pub struct Header {
/// Magic number.
pub magic: Magic,
pub endian: Endian,
}
+impl Header {
+ fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
+ let magic: [u8; 4] = read_bytes(r)?;
+ let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
+
+ let eye_catcher: [u8; 60] = read_bytes(r)?;
+ let layout_code: [u8; 4] = read_bytes(r)?;
+ let endian = Endian::identify_u32(2, layout_code)
+ .or_else(|| Endian::identify_u32(2, layout_code))
+ .ok_or_else(|| Error::NotASystemFile)?;
+ let layout_code = endian.parse(layout_code);
+
+ let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
+ let nominal_case_size =
+ (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
+
+ let compression_code: u32 = endian.parse(read_bytes(r)?);
+ let compression = match (magic, compression_code) {
+ (Magic::ZSAV, 2) => Some(Compression::ZLib),
+ (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
+ (_, 0) => None,
+ (_, 1) => Some(Compression::Simple),
+ (_, code) => return Err(Error::InvalidSavCompression(code)),
+ };
+
+ let weight_index: u32 = endian.parse(read_bytes(r)?);
+ let weight_index = (weight_index > 0).then_some(weight_index - 1);
+
+ let n_cases: u32 = endian.parse(read_bytes(r)?);
+ let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+
+ let bias: f64 = endian.parse(read_bytes(r)?);
+
+ let creation_date: [u8; 9] = read_bytes(r)?;
+ let creation_time: [u8; 8] = read_bytes(r)?;
+ let file_label: [u8; 64] = read_bytes(r)?;
+ let _: [u8; 3] = read_bytes(r)?;
+
+ Ok(Header {
+ magic,
+ layout_code,
+ nominal_case_size,
+ compression,
+ weight_index,
+ n_cases,
+ bias,
+ creation_date,
+ creation_time,
+ eye_catcher,
+ file_label,
+ endian,
+ })
+ }
+}
+
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Magic([u8; 4]);
}
}
-trait State {
- #[allow(clippy::type_complexity)]
- fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
-}
+mod state {
+ use super::{
+ Compression, Error, Header, Record, Value, VarType, Variable, ZHeader, ZTrailer,
+ ZlibDecodeMultiple,
+ };
+ use crate::endian::Endian;
+ use std::{
+ collections::VecDeque,
+ io::{Read, Seek},
+ };
-struct Start<R: Read + Seek> {
- reader: R,
-}
+ pub trait State {
+ #[allow(clippy::type_complexity)]
+ fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
+ }
-struct CommonState<R: Read + Seek> {
- reader: R,
- endian: Endian,
- bias: f64,
- compression: Option<Compression>,
- var_types: Vec<VarType>,
-}
+ struct Start<R: Read + Seek> {
+ reader: R,
+ }
-impl<R: Read + Seek + 'static> State for Start<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let header = read_header(&mut self.reader)?;
- let next_state = Headers(CommonState {
- reader: self.reader,
- endian: header.endian,
- bias: header.bias,
- compression: header.compression,
- var_types: Vec::new(),
- });
- Ok(Some((Record::Header(header), Box::new(next_state))))
+ pub fn new<R: Read + Seek + 'static>(reader: R) -> Box<dyn State> {
+ Box::new(Start { reader })
+ }
+
+ struct CommonState<R: Read + Seek> {
+ reader: R,
+ endian: Endian,
+ bias: f64,
+ compression: Option<Compression>,
+ var_types: Vec<VarType>,
}
-}
-struct Headers<R: Read + Seek>(CommonState<R>);
+ impl<R: Read + Seek + 'static> State for Start<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let header = Header::read(&mut self.reader)?;
+ let next_state = Headers(CommonState {
+ reader: self.reader,
+ endian: header.endian,
+ bias: header.bias,
+ compression: header.compression,
+ var_types: Vec::new(),
+ });
+ Ok(Some((Record::Header(header), Box::new(next_state))))
+ }
+ }
-impl<R: Read + Seek + 'static> State for Headers<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let endian = self.0.endian;
- let rec_type: u32 = endian.parse(read_bytes(&mut self.0.reader)?);
- let record = match rec_type {
- 2 => {
- let variable = read_variable_record(&mut self.0.reader, endian)?;
- self.0.var_types.push(VarType::from_width(variable.width));
- Record::Variable(variable)
- }
- 3 => Record::ValueLabel(read_value_label_record(&mut self.0.reader, endian)?),
- 4 => Record::VarIndexes(read_var_indexes_record(&mut self.0.reader, endian)?),
- 6 => Record::Document(read_document_record(&mut self.0.reader, endian)?),
- 7 => Record::Extension(read_extension_record(&mut self.0.reader, endian)?),
- 999 => {
- let _: [u8; 4] = read_bytes(&mut self.0.reader)?;
- let next_state: Box<dyn State> = match self.0.compression {
- None => Box::new(Data(self.0)),
- Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
- Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
- };
- return Ok(Some((Record::EndOfHeaders, next_state)));
- }
- _ => {
- return Err(Error::BadRecordType {
- offset: self.0.reader.stream_position()?,
- rec_type,
- })
+ struct Headers<R: Read + Seek>(CommonState<R>);
+
+ impl<R: Read + Seek + 'static> State for Headers<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let record = Record::read(&mut self.0.reader, self.0.endian)?;
+ match record {
+ Record::Variable(Variable { width, .. }) => {
+ self.0.var_types.push(VarType::from_width(width));
+ }
+ Record::EndOfHeaders(_) => {
+ let next_state: Box<dyn State> = match self.0.compression {
+ None => Box::new(Data(self.0)),
+ Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
+ Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
+ };
+ return Ok(Some((record, next_state)));
+ }
+ _ => (),
+ };
+ Ok(Some((record, self)))
+ }
+ }
+
+ struct ZlibHeader<R: Read + Seek>(CommonState<R>);
+
+ impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?;
+ Ok(Some((Record::ZHeader(zheader), self)))
+ }
+ }
+
+ struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
+
+ impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ let retval = ZTrailer::read(
+ &mut self.0.reader,
+ self.0.endian,
+ self.1.ztrailer_offset,
+ self.1.ztrailer_len,
+ )?;
+ let next_state = Box::new(CompressedData::new(CommonState {
+ reader: ZlibDecodeMultiple::new(self.0.reader),
+ endian: self.0.endian,
+ bias: self.0.bias,
+ compression: self.0.compression,
+ var_types: self.0.var_types,
+ }));
+ match retval {
+ None => next_state.read(),
+ Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))),
}
- };
- Ok(Some((record, self)))
+ }
}
-}
-struct ZlibHeader<R: Read + Seek>(CommonState<R>);
+ struct Data<R: Read + Seek>(CommonState<R>);
+
+ impl<R: Read + Seek + 'static> State for Data<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? {
+ None => Ok(None),
+ Some(values) => Ok(Some((Record::Case(values), self))),
+ }
+ }
+ }
-impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let zheader = read_zheader(&mut self.0.reader, self.0.endian)?;
- Ok(Some((Record::ZHeader(zheader), self)))
+ struct CompressedData<R: Read + Seek> {
+ common: CommonState<R>,
+ codes: VecDeque<u8>,
}
-}
-struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
+ impl<R: Read + Seek + 'static> CompressedData<R> {
+ fn new(common: CommonState<R>) -> CompressedData<R> {
+ CompressedData {
+ common,
+ codes: VecDeque::new(),
+ }
+ }
+ }
-impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let retval = read_ztrailer(&mut self.0.reader, self.0.endian, self.1.ztrailer_offset, self.1.ztrailer_len)?;
- let next_state = Box::new(CompressedData::new(CommonState {
- reader: ZlibDecodeMultiple::new(self.0.reader),
- endian: self.0.endian,
- bias: self.0.bias,
- compression: self.0.compression,
- var_types: self.0.var_types
- }));
- match retval {
- None => next_state.read(),
- Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state)))
- }
+ impl<R: Read + Seek + 'static> State for CompressedData<R> {
+ fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+ match Value::read_compressed_case(
+ &mut self.common.reader,
+ &self.common.var_types,
+ &mut self.codes,
+ self.common.endian,
+ self.common.bias,
+ )? {
+ None => Ok(None),
+ Some(values) => Ok(Some((Record::Case(values), self))),
+ }
+ }
}
}
-struct Data<R: Read + Seek>(CommonState<R>);
+#[derive(Copy, Clone)]
+pub enum Value {
+ Number(Option<f64>),
+ String([u8; 8]),
+}
-impl<R: Read + Seek + 'static> State for Data<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let case_start = self.0.reader.stream_position()?;
- let mut values = Vec::with_capacity(self.0.var_types.len());
- for (i, &var_type) in self.0.var_types.iter().enumerate() {
- let Some(raw) = try_read_bytes(&mut self.0.reader)? else {
+impl Value {
+ pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
+ match var_type {
+ VarType::String => Value::String(raw),
+ VarType::Number => {
+ let number: f64 = endian.parse(raw);
+ Value::Number((number != -f64::MAX).then_some(number))
+ }
+ }
+ }
+
+ fn read_case<R: Read + Seek>(
+ reader: &mut R,
+ var_types: &[VarType],
+ endian: Endian,
+ ) -> Result<Option<Vec<Value>>, Error> {
+ let case_start = reader.stream_position()?;
+ let mut values = Vec::with_capacity(var_types.len());
+ for (i, &var_type) in var_types.iter().enumerate() {
+ let Some(raw) = try_read_bytes(reader)? else {
if i == 0 {
return Ok(None);
} else {
- let offset = self.0.reader.stream_position()?;
+ let offset = reader.stream_position()?;
return Err(Error::EofInCase {
offset,
case_ofs: offset - case_start,
- case_len: self.0.var_types.len() * 8,
+ case_len: var_types.len() * 8,
});
}
};
- values.push(Value::from_raw(var_type, raw, self.0.endian));
+ values.push(Value::from_raw(var_type, raw, endian));
}
- Ok(Some((Record::Case(values), self)))
+ Ok(Some(values))
}
-}
-struct CompressedData<R: Read + Seek> {
- common: CommonState<R>,
- codes: VecDeque<u8>,
-}
-
-impl<R: Read + Seek + 'static> CompressedData<R> {
- fn new(common: CommonState<R>) -> CompressedData<R> {
- CompressedData { common, codes: VecDeque::new() }
- }
-}
-
-impl<R: Read + Seek + 'static> State for CompressedData<R> {
- fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
- let case_start = self.common.reader.stream_position()?;
- let mut values = Vec::with_capacity(self.common.var_types.len());
- for (i, &var_type) in self.common.var_types.iter().enumerate() {
+ fn read_compressed_case<R: Read + Seek>(
+ reader: &mut R,
+ var_types: &[VarType],
+ codes: &mut VecDeque<u8>,
+ endian: Endian,
+ bias: f64,
+ ) -> Result<Option<Vec<Value>>, Error> {
+ let case_start = reader.stream_position()?;
+ let mut values = Vec::with_capacity(var_types.len());
+ for (i, &var_type) in var_types.iter().enumerate() {
let value = loop {
- let Some(code) = self.codes.pop_front() else {
- let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.common.reader)?
- else {
+ let Some(code) = codes.pop_front() else {
+ let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
if i == 0 {
return Ok(None);
} else {
- let offset = self.common.reader.stream_position()?;
+ let offset = reader.stream_position()?;
return Err(Error::EofInCompressedCase {
offset,
case_ofs: offset - case_start,
});
}
};
- self.codes.extend(new_codes.into_iter());
+ codes.extend(new_codes.into_iter());
continue;
};
match code {
0 => (),
1..=251 => match var_type {
- VarType::Number => break Value::Number(Some(code as f64 - self.common.bias)),
+ VarType::Number => break Value::Number(Some(code as f64 - bias)),
VarType::String => {
- break Value::String(self.common.endian.to_bytes(code as f64 - self.common.bias))
+ break Value::String(endian.to_bytes(code as f64 - bias))
}
},
252 => {
if i == 0 {
return Ok(None);
} else {
- let offset = self.common.reader.stream_position()?;
+ let offset = reader.stream_position()?;
return Err(Error::PartialCompressedCase {
offset,
case_ofs: offset - case_start,
});
}
}
- 253 => {
- break Value::from_raw(
- var_type,
- read_bytes(&mut self.common.reader)?,
- self.common.endian,
- )
- }
+ 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
254 => match var_type {
VarType::String => break Value::String(*b" "), // XXX EBCDIC
VarType::Number => {
return Err(Error::CompressedStringExpected {
offset: case_start,
- case_ofs: self.common.reader.stream_position()? - case_start,
+ case_ofs: reader.stream_position()? - case_start,
})
}
},
VarType::String => {
return Err(Error::CompressedNumberExpected {
offset: case_start,
- case_ofs: self.common.reader.stream_position()? - case_start,
+ case_ofs: reader.stream_position()? - case_start,
})
}
},
};
values.push(value);
}
- Ok(Some((Record::Case(values), self)))
+ Ok(Some(values))
}
}
}
}
-#[derive(Copy, Clone)]
-pub enum Value {
- Number(Option<f64>),
- String([u8; 8]),
-}
-
-impl Value {
- pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
- match var_type {
- VarType::String => Value::String(raw),
- VarType::Number => {
- let number: f64 = endian.parse(raw);
- Value::Number((number != -f64::MAX).then_some(number))
- }
- }
- }
-}
-
pub struct Reader {
state: Option<Box<dyn State>>,
}
impl Reader {
pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
Ok(Reader {
- state: Some(Box::new(Start { reader })),
+ state: Some(state::new(reader)),
})
}
}
impl FusedIterator for Reader {}
-fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
- let magic: [u8; 4] = read_bytes(r)?;
- let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
-
- let eye_catcher: [u8; 60] = read_bytes(r)?;
- let layout_code: [u8; 4] = read_bytes(r)?;
- let endian = Endian::identify_u32(2, layout_code)
- .or_else(|| Endian::identify_u32(2, layout_code))
- .ok_or_else(|| Error::NotASystemFile)?;
- let layout_code = endian.parse(layout_code);
-
- let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
- let nominal_case_size =
- (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
-
- let compression_code: u32 = endian.parse(read_bytes(r)?);
- let compression = match (magic, compression_code) {
- (Magic::ZSAV, 2) => Some(Compression::ZLib),
- (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
- (_, 0) => None,
- (_, 1) => Some(Compression::Simple),
- (_, code) => return Err(Error::InvalidSavCompression(code)),
- };
-
- let weight_index: u32 = endian.parse(read_bytes(r)?);
- let weight_index = (weight_index > 0).then_some(weight_index - 1);
-
- let n_cases: u32 = endian.parse(read_bytes(r)?);
- let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
-
- let bias: f64 = endian.parse(read_bytes(r)?);
-
- let creation_date: [u8; 9] = read_bytes(r)?;
- let creation_time: [u8; 8] = read_bytes(r)?;
- let file_label: [u8; 64] = read_bytes(r)?;
- let _: [u8; 3] = read_bytes(r)?;
-
- Ok(Header {
- magic,
- layout_code,
- nominal_case_size,
- compression,
- weight_index,
- n_cases,
- bias,
- creation_date,
- creation_time,
- eye_catcher,
- file_label,
- endian,
- })
-}
-
pub struct Variable {
/// Offset from the start of the file to the start of the record.
pub offset: u64,
pub label: Option<Vec<u8>>,
}
-fn read_variable_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
- let offset = r.stream_position()?;
- let width: i32 = endian.parse(read_bytes(r)?);
- let has_variable_label: u32 = endian.parse(read_bytes(r)?);
- let missing_value_code: i32 = endian.parse(read_bytes(r)?);
- let print_format: u32 = endian.parse(read_bytes(r)?);
- let write_format: u32 = endian.parse(read_bytes(r)?);
- let name: [u8; 8] = read_bytes(r)?;
+impl Variable {
+ fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
+ let offset = r.stream_position()?;
+ let width: i32 = endian.parse(read_bytes(r)?);
+ let has_variable_label: u32 = endian.parse(read_bytes(r)?);
+ let missing_value_code: i32 = endian.parse(read_bytes(r)?);
+ let print_format: u32 = endian.parse(read_bytes(r)?);
+ let write_format: u32 = endian.parse(read_bytes(r)?);
+ let name: [u8; 8] = read_bytes(r)?;
- let label = match has_variable_label {
- 0 => None,
- 1 => {
- let len: u32 = endian.parse(read_bytes(r)?);
- let read_len = len.min(65535) as usize;
- let label = Some(read_vec(r, read_len)?);
+ let label = match has_variable_label {
+ 0 => None,
+ 1 => {
+ let len: u32 = endian.parse(read_bytes(r)?);
+ let read_len = len.min(65535) as usize;
+ let label = Some(read_vec(r, read_len)?);
- let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
- let _ = read_vec(r, padding_bytes as usize)?;
-
- label
- }
- _ => {
- return Err(Error::BadVariableLabelCode {
- offset,
- code: has_variable_label,
- })
- }
- };
+ let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
+ let _ = read_vec(r, padding_bytes as usize)?;
- let mut missing = Vec::new();
- if missing_value_code != 0 {
- match (width, missing_value_code) {
- (0, -3 | -2 | 1 | 2 | 3) => (),
- (0, _) => {
- return Err(Error::BadNumericMissingValueCode {
- offset,
- code: missing_value_code,
- })
+ label
}
- (_, 0..=3) => (),
- (_, _) => {
- return Err(Error::BadStringMissingValueCode {
+ _ => {
+ return Err(Error::BadVariableLabelCode {
offset,
- code: missing_value_code,
+ code: has_variable_label,
})
}
- }
+ };
+
+ let mut missing = Vec::new();
+ if missing_value_code != 0 {
+ match (width, missing_value_code) {
+ (0, -3 | -2 | 1 | 2 | 3) => (),
+ (0, _) => {
+ return Err(Error::BadNumericMissingValueCode {
+ offset,
+ code: missing_value_code,
+ })
+ }
+ (_, 0..=3) => (),
+ (_, _) => {
+ return Err(Error::BadStringMissingValueCode {
+ offset,
+ code: missing_value_code,
+ })
+ }
+ }
- for _ in 0..missing_value_code.abs() {
- missing.push(read_bytes(r)?);
+ for _ in 0..missing_value_code.abs() {
+ missing.push(read_bytes(r)?);
+ }
}
- }
- Ok(Variable {
- offset,
- width,
- name,
- print_format,
- write_format,
- missing_value_code,
- missing,
- label,
- })
+ Ok(Variable {
+ offset,
+ width,
+ name,
+ print_format,
+ write_format,
+ missing_value_code,
+ missing,
+ label,
+ })
+ }
}
pub struct ValueLabel {
impl ValueLabel {
/// Maximum number of value labels in a record.
pub const MAX: u32 = u32::MAX / 8;
-}
-fn read_value_label_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
- let offset = r.stream_position()?;
- let n: u32 = endian.parse(read_bytes(r)?);
- if n > ValueLabel::MAX {
- return Err(Error::BadNumberOfValueLabels {
- offset,
- n,
- max: ValueLabel::MAX,
- });
- }
+ fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
+ let offset = r.stream_position()?;
+ let n: u32 = endian.parse(read_bytes(r)?);
+ if n > ValueLabel::MAX {
+ return Err(Error::BadNumberOfValueLabels {
+ offset,
+ n,
+ max: ValueLabel::MAX,
+ });
+ }
- let mut labels = Vec::new();
- for _ in 0..n {
- let value: [u8; 8] = read_bytes(r)?;
- let label_len: u8 = endian.parse(read_bytes(r)?);
- let label_len = label_len as usize;
- let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
+ let mut labels = Vec::new();
+ for _ in 0..n {
+ let value: [u8; 8] = read_bytes(r)?;
+ let label_len: u8 = endian.parse(read_bytes(r)?);
+ let label_len = label_len as usize;
+ let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
- let mut label = read_vec(r, padded_len)?;
- label.truncate(label_len);
- labels.push((value, label));
+ let mut label = read_vec(r, padded_len)?;
+ label.truncate(label_len);
+ labels.push((value, label));
+ }
+ Ok(ValueLabel { offset, labels })
}
- Ok(ValueLabel { offset, labels })
}
pub struct VarIndexes {
impl VarIndexes {
/// Maximum number of variable indexes in a record.
pub const MAX: u32 = u32::MAX / 8;
-}
-fn read_var_indexes_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
- let offset = r.stream_position()?;
- let n: u32 = endian.parse(read_bytes(r)?);
- if n > VarIndexes::MAX {
- return Err(Error::BadNumberOfVarIndexes {
+ fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
+ let offset = r.stream_position()?;
+ let n: u32 = endian.parse(read_bytes(r)?);
+ if n > VarIndexes::MAX {
+ return Err(Error::BadNumberOfVarIndexes {
+ offset,
+ n,
+ max: VarIndexes::MAX,
+ });
+ }
+ let mut var_indexes = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ var_indexes.push(endian.parse(read_bytes(r)?));
+ }
+
+ Ok(VarIndexes {
offset,
- n,
- max: VarIndexes::MAX,
- });
- }
- let mut var_indexes = Vec::with_capacity(n as usize);
- for _ in 0..n {
- var_indexes.push(endian.parse(read_bytes(r)?));
+ var_indexes,
+ })
}
-
- Ok(VarIndexes {
- offset,
- var_indexes,
- })
}
-pub const DOC_LINE_LEN: u32 = 80;
-pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
-
pub struct Document {
/// Offset from the start of the file to the start of the record.
pub pos: u64,
pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
}
-fn read_document_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
- let offset = r.stream_position()?;
- let n: u32 = endian.parse(read_bytes(r)?);
- match n {
- 0..=DOC_MAX_LINES => {
- let pos = r.stream_position()?;
- let mut lines = Vec::with_capacity(n as usize);
- for _ in 0..n {
- let line: [u8; 80] = read_bytes(r)?;
- lines.push(line);
+impl Document {
+ /// Length of a line in a document. Document lines are fixed-length and
+ /// padded on the right with spaces.
+ pub const LINE_LEN: u32 = 80;
+
+ /// Maximum number of lines we will accept in a document. This is simply
+ /// the maximum number that will fit in a 32-bit space.
+ pub const MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
+
+ fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
+ let offset = r.stream_position()?;
+ let n: u32 = endian.parse(read_bytes(r)?);
+ match n {
+ 0..=DOC_MAX_LINES => {
+ let pos = r.stream_position()?;
+ let mut lines = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ let line: [u8; 80] = read_bytes(r)?;
+ lines.push(line);
+ }
+ Ok(Document { pos, lines })
}
- Ok(Document { pos, lines })
+ _ => Err(Error::BadDocumentLength {
+ offset,
+ n,
+ max: DOC_MAX_LINES,
+ }),
}
- _ => Err(Error::BadDocumentLength {
- offset,
- n,
- max: DOC_MAX_LINES,
- }),
}
}
+/*
#[derive(FromPrimitive)]
enum ExtensionType {
/// Machine integer info.
/// "Format properties in dataview table".
Dataview = 24,
}
+ */
pub struct Extension {
/// Offset from the start of the file to the start of the record.
}
*/
-fn read_extension_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Extension, Error> {
- let subtype = endian.parse(read_bytes(r)?);
- let offset = r.stream_position()?;
- let size: u32 = endian.parse(read_bytes(r)?);
- let count = endian.parse(read_bytes(r)?);
- let Some(product) = size.checked_mul(count) else {
- return Err(Error::ExtensionRecordTooLarge {
+impl Extension {
+ fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Extension, Error> {
+ let subtype = endian.parse(read_bytes(r)?);
+ let offset = r.stream_position()?;
+ let size: u32 = endian.parse(read_bytes(r)?);
+ let count = endian.parse(read_bytes(r)?);
+ let Some(product) = size.checked_mul(count) else {
+ return Err(Error::ExtensionRecordTooLarge {
+ offset,
+ subtype,
+ size,
+ count,
+ });
+ };
+ let offset = r.stream_position()?;
+ let data = read_vec(r, product as usize)?;
+ Ok(Extension {
offset,
subtype,
size,
count,
- });
- };
- let offset = r.stream_position()?;
- let data = read_vec(r, product as usize)?;
- Ok(Extension {
- offset,
- subtype,
- size,
- count,
- data,
- })
+ data,
+ })
+ }
}
pub struct ZHeader {
pub ztrailer_len: u64,
}
-fn read_zheader<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
- let offset = r.stream_position()?;
- let zheader_offset: u64 = endian.parse(read_bytes(r)?);
- let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
- let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
+impl ZHeader {
+ fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
+ let offset = r.stream_position()?;
+ let zheader_offset: u64 = endian.parse(read_bytes(r)?);
+ let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
+ let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
- Ok(ZHeader {
- offset,
- zheader_offset,
- ztrailer_offset,
- ztrailer_len,
- })
+ Ok(ZHeader {
+ offset,
+ zheader_offset,
+ ztrailer_offset,
+ ztrailer_len,
+ })
+ }
}
pub struct ZTrailer {
pub compressed_size: u32,
}
-fn read_ztrailer<R: Read + Seek>(
- r: &mut R,
- endian: Endian,
- ztrailer_ofs: u64,
- ztrailer_len: u64,
-) -> Result<Option<ZTrailer>, Error> {
- let start_offset = r.stream_position()?;
- if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
- return Ok(None);
- }
- let int_bias = endian.parse(read_bytes(r)?);
- let zero = endian.parse(read_bytes(r)?);
- let block_size = endian.parse(read_bytes(r)?);
- let n_blocks: u32 = endian.parse(read_bytes(r)?);
- let expected_n_blocks = (ztrailer_len - 24) / 24;
- if n_blocks as u64 != expected_n_blocks {
- return Err(Error::BadZlibTrailerNBlocks {
+impl ZTrailer {
+ fn read<R: Read + Seek>(
+ r: &mut R,
+ endian: Endian,
+ ztrailer_ofs: u64,
+ ztrailer_len: u64,
+ ) -> Result<Option<ZTrailer>, Error> {
+ let start_offset = r.stream_position()?;
+ if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
+ return Ok(None);
+ }
+ let int_bias = endian.parse(read_bytes(r)?);
+ let zero = endian.parse(read_bytes(r)?);
+ let block_size = endian.parse(read_bytes(r)?);
+ let n_blocks: u32 = endian.parse(read_bytes(r)?);
+ let expected_n_blocks = (ztrailer_len - 24) / 24;
+ if n_blocks as u64 != expected_n_blocks {
+ return Err(Error::BadZlibTrailerNBlocks {
+ offset: ztrailer_ofs,
+ n_blocks,
+ expected_n_blocks,
+ ztrailer_len,
+ });
+ }
+ let mut blocks = Vec::with_capacity(n_blocks as usize);
+ for _ in 0..n_blocks {
+ let uncompressed_ofs = endian.parse(read_bytes(r)?);
+ let compressed_ofs = endian.parse(read_bytes(r)?);
+ let uncompressed_size = endian.parse(read_bytes(r)?);
+ let compressed_size = endian.parse(read_bytes(r)?);
+ blocks.push(ZBlock {
+ uncompressed_ofs,
+ compressed_ofs,
+ uncompressed_size,
+ compressed_size,
+ });
+ }
+ r.seek(SeekFrom::Start(start_offset))?;
+ Ok(Some(ZTrailer {
offset: ztrailer_ofs,
- n_blocks,
- expected_n_blocks,
- ztrailer_len,
- });
- }
- let mut blocks = Vec::with_capacity(n_blocks as usize);
- for _ in 0..n_blocks {
- let uncompressed_ofs = endian.parse(read_bytes(r)?);
- let compressed_ofs = endian.parse(read_bytes(r)?);
- let uncompressed_size = endian.parse(read_bytes(r)?);
- let compressed_size = endian.parse(read_bytes(r)?);
- blocks.push(ZBlock {
- uncompressed_ofs,
- compressed_ofs,
- uncompressed_size,
- compressed_size,
- });
- }
- r.seek(SeekFrom::Start(start_offset))?;
- Ok(Some(ZTrailer {
- offset: ztrailer_ofs,
- int_bias,
- zero,
- block_size,
- blocks,
- }))
+ int_bias,
+ zero,
+ block_size,
+ blocks,
+ }))
+ }
}
fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {