From 52b33c0f8d996fc4667b27d348fd9b3ea5f49ba2 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 31 Jul 2023 10:53:58 -0700 Subject: [PATCH] work --- rust/src/raw.rs | 836 ++++++++++++++++++++++++++---------------------- 1 file changed, 454 insertions(+), 382 deletions(-) diff --git a/rust/src/raw.rs b/rust/src/raw.rs index f1e362376b..7cb7238e9a 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -3,13 +3,14 @@ use crate::Error; use flate2::read::ZlibDecoder; use num::Integer; -use num_derive::FromPrimitive; use std::{ collections::VecDeque, io::{Error as IoError, Read, Seek, SeekFrom}, iter::FusedIterator, }; +use self::state::State; + #[derive(Copy, Clone, Debug)] pub enum Compression { Simple, @@ -23,12 +24,30 @@ pub enum Record { ValueLabel(ValueLabel), VarIndexes(VarIndexes), Extension(Extension), - EndOfHeaders, + EndOfHeaders(u32), ZHeader(ZHeader), ZTrailer(ZTrailer), Case(Vec), } +impl Record { + fn read(reader: &mut R, endian: Endian) -> Result { + let rec_type: u32 = endian.parse(read_bytes(reader)?); + match rec_type { + 2 => Ok(Record::Variable(Variable::read(reader, endian)?)), + 3 => Ok(Record::ValueLabel(ValueLabel::read(reader, endian)?)), + 4 => Ok(Record::VarIndexes(VarIndexes::read(reader, endian)?)), + 6 => Ok(Record::Document(Document::read(reader, endian)?)), + 7 => Ok(Record::Extension(Extension::read(reader, endian)?)), + 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))), + _ => Err(Error::BadRecordType { + offset: reader.stream_position()?, + rec_type, + }), + } + } +} + pub struct Header { /// Magic number. pub magic: Magic, @@ -70,6 +89,61 @@ pub struct Header { pub endian: Endian, } +impl Header { + fn read(r: &mut R) -> Result { + let magic: [u8; 4] = read_bytes(r)?; + let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; + + let eye_catcher: [u8; 60] = read_bytes(r)?; + let layout_code: [u8; 4] = read_bytes(r)?; + let endian = Endian::identify_u32(2, layout_code) + .or_else(|| Endian::identify_u32(2, layout_code)) + .ok_or_else(|| Error::NotASystemFile)?; + let layout_code = endian.parse(layout_code); + + let nominal_case_size: u32 = endian.parse(read_bytes(r)?); + let nominal_case_size = + (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size); + + let compression_code: u32 = endian.parse(read_bytes(r)?); + let compression = match (magic, compression_code) { + (Magic::ZSAV, 2) => Some(Compression::ZLib), + (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)), + (_, 0) => None, + (_, 1) => Some(Compression::Simple), + (_, code) => return Err(Error::InvalidSavCompression(code)), + }; + + let weight_index: u32 = endian.parse(read_bytes(r)?); + let weight_index = (weight_index > 0).then_some(weight_index - 1); + + let n_cases: u32 = endian.parse(read_bytes(r)?); + let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases); + + let bias: f64 = endian.parse(read_bytes(r)?); + + let creation_date: [u8; 9] = read_bytes(r)?; + let creation_time: [u8; 8] = read_bytes(r)?; + let file_label: [u8; 64] = read_bytes(r)?; + let _: [u8; 3] = read_bytes(r)?; + + Ok(Header { + magic, + layout_code, + nominal_case_size, + compression, + weight_index, + n_cases, + bias, + creation_date, + creation_time, + eye_catcher, + file_label, + endian, + }) + } +} + #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct Magic([u8; 4]); @@ -112,191 +186,243 @@ impl VarType { } } -trait State { - #[allow(clippy::type_complexity)] - fn read(self: Box) -> Result)>, Error>; -} +mod state { + use super::{ + Compression, Error, Header, Record, Value, VarType, Variable, ZHeader, ZTrailer, + ZlibDecodeMultiple, + }; + use crate::endian::Endian; + use std::{ + collections::VecDeque, + io::{Read, Seek}, + }; -struct Start { - reader: R, -} + pub trait State { + #[allow(clippy::type_complexity)] + fn read(self: Box) -> Result)>, Error>; + } -struct CommonState { - reader: R, - endian: Endian, - bias: f64, - compression: Option, - var_types: Vec, -} + struct Start { + reader: R, + } -impl State for Start { - fn read(mut self: Box) -> Result)>, Error> { - let header = read_header(&mut self.reader)?; - let next_state = Headers(CommonState { - reader: self.reader, - endian: header.endian, - bias: header.bias, - compression: header.compression, - var_types: Vec::new(), - }); - Ok(Some((Record::Header(header), Box::new(next_state)))) + pub fn new(reader: R) -> Box { + Box::new(Start { reader }) + } + + struct CommonState { + reader: R, + endian: Endian, + bias: f64, + compression: Option, + var_types: Vec, } -} -struct Headers(CommonState); + impl State for Start { + fn read(mut self: Box) -> Result)>, Error> { + let header = Header::read(&mut self.reader)?; + let next_state = Headers(CommonState { + reader: self.reader, + endian: header.endian, + bias: header.bias, + compression: header.compression, + var_types: Vec::new(), + }); + Ok(Some((Record::Header(header), Box::new(next_state)))) + } + } -impl State for Headers { - fn read(mut self: Box) -> Result)>, Error> { - let endian = self.0.endian; - let rec_type: u32 = endian.parse(read_bytes(&mut self.0.reader)?); - let record = match rec_type { - 2 => { - let variable = read_variable_record(&mut self.0.reader, endian)?; - self.0.var_types.push(VarType::from_width(variable.width)); - Record::Variable(variable) - } - 3 => Record::ValueLabel(read_value_label_record(&mut self.0.reader, endian)?), - 4 => Record::VarIndexes(read_var_indexes_record(&mut self.0.reader, endian)?), - 6 => Record::Document(read_document_record(&mut self.0.reader, endian)?), - 7 => Record::Extension(read_extension_record(&mut self.0.reader, endian)?), - 999 => { - let _: [u8; 4] = read_bytes(&mut self.0.reader)?; - let next_state: Box = match self.0.compression { - None => Box::new(Data(self.0)), - Some(Compression::Simple) => Box::new(CompressedData::new(self.0)), - Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)), - }; - return Ok(Some((Record::EndOfHeaders, next_state))); - } - _ => { - return Err(Error::BadRecordType { - offset: self.0.reader.stream_position()?, - rec_type, - }) + struct Headers(CommonState); + + impl State for Headers { + fn read(mut self: Box) -> Result)>, Error> { + let record = Record::read(&mut self.0.reader, self.0.endian)?; + match record { + Record::Variable(Variable { width, .. }) => { + self.0.var_types.push(VarType::from_width(width)); + } + Record::EndOfHeaders(_) => { + let next_state: Box = match self.0.compression { + None => Box::new(Data(self.0)), + Some(Compression::Simple) => Box::new(CompressedData::new(self.0)), + Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)), + }; + return Ok(Some((record, next_state))); + } + _ => (), + }; + Ok(Some((record, self))) + } + } + + struct ZlibHeader(CommonState); + + impl State for ZlibHeader { + fn read(mut self: Box) -> Result)>, Error> { + let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?; + Ok(Some((Record::ZHeader(zheader), self))) + } + } + + struct ZlibTrailer(CommonState, ZHeader); + + impl State for ZlibTrailer { + fn read(mut self: Box) -> Result)>, Error> { + let retval = ZTrailer::read( + &mut self.0.reader, + self.0.endian, + self.1.ztrailer_offset, + self.1.ztrailer_len, + )?; + let next_state = Box::new(CompressedData::new(CommonState { + reader: ZlibDecodeMultiple::new(self.0.reader), + endian: self.0.endian, + bias: self.0.bias, + compression: self.0.compression, + var_types: self.0.var_types, + })); + match retval { + None => next_state.read(), + Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))), } - }; - Ok(Some((record, self))) + } } -} -struct ZlibHeader(CommonState); + struct Data(CommonState); + + impl State for Data { + fn read(mut self: Box) -> Result)>, Error> { + match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? { + None => Ok(None), + Some(values) => Ok(Some((Record::Case(values), self))), + } + } + } -impl State for ZlibHeader { - fn read(mut self: Box) -> Result)>, Error> { - let zheader = read_zheader(&mut self.0.reader, self.0.endian)?; - Ok(Some((Record::ZHeader(zheader), self))) + struct CompressedData { + common: CommonState, + codes: VecDeque, } -} -struct ZlibTrailer(CommonState, ZHeader); + impl CompressedData { + fn new(common: CommonState) -> CompressedData { + CompressedData { + common, + codes: VecDeque::new(), + } + } + } -impl State for ZlibTrailer { - fn read(mut self: Box) -> Result)>, Error> { - let retval = read_ztrailer(&mut self.0.reader, self.0.endian, self.1.ztrailer_offset, self.1.ztrailer_len)?; - let next_state = Box::new(CompressedData::new(CommonState { - reader: ZlibDecodeMultiple::new(self.0.reader), - endian: self.0.endian, - bias: self.0.bias, - compression: self.0.compression, - var_types: self.0.var_types - })); - match retval { - None => next_state.read(), - Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))) - } + impl State for CompressedData { + fn read(mut self: Box) -> Result)>, Error> { + match Value::read_compressed_case( + &mut self.common.reader, + &self.common.var_types, + &mut self.codes, + self.common.endian, + self.common.bias, + )? { + None => Ok(None), + Some(values) => Ok(Some((Record::Case(values), self))), + } + } } } -struct Data(CommonState); +#[derive(Copy, Clone)] +pub enum Value { + Number(Option), + String([u8; 8]), +} -impl State for Data { - fn read(mut self: Box) -> Result)>, Error> { - let case_start = self.0.reader.stream_position()?; - let mut values = Vec::with_capacity(self.0.var_types.len()); - for (i, &var_type) in self.0.var_types.iter().enumerate() { - let Some(raw) = try_read_bytes(&mut self.0.reader)? else { +impl Value { + pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value { + match var_type { + VarType::String => Value::String(raw), + VarType::Number => { + let number: f64 = endian.parse(raw); + Value::Number((number != -f64::MAX).then_some(number)) + } + } + } + + fn read_case( + reader: &mut R, + var_types: &[VarType], + endian: Endian, + ) -> Result>, Error> { + let case_start = reader.stream_position()?; + let mut values = Vec::with_capacity(var_types.len()); + for (i, &var_type) in var_types.iter().enumerate() { + let Some(raw) = try_read_bytes(reader)? else { if i == 0 { return Ok(None); } else { - let offset = self.0.reader.stream_position()?; + let offset = reader.stream_position()?; return Err(Error::EofInCase { offset, case_ofs: offset - case_start, - case_len: self.0.var_types.len() * 8, + case_len: var_types.len() * 8, }); } }; - values.push(Value::from_raw(var_type, raw, self.0.endian)); + values.push(Value::from_raw(var_type, raw, endian)); } - Ok(Some((Record::Case(values), self))) + Ok(Some(values)) } -} -struct CompressedData { - common: CommonState, - codes: VecDeque, -} - -impl CompressedData { - fn new(common: CommonState) -> CompressedData { - CompressedData { common, codes: VecDeque::new() } - } -} - -impl State for CompressedData { - fn read(mut self: Box) -> Result)>, Error> { - let case_start = self.common.reader.stream_position()?; - let mut values = Vec::with_capacity(self.common.var_types.len()); - for (i, &var_type) in self.common.var_types.iter().enumerate() { + fn read_compressed_case( + reader: &mut R, + var_types: &[VarType], + codes: &mut VecDeque, + endian: Endian, + bias: f64, + ) -> Result>, Error> { + let case_start = reader.stream_position()?; + let mut values = Vec::with_capacity(var_types.len()); + for (i, &var_type) in var_types.iter().enumerate() { let value = loop { - let Some(code) = self.codes.pop_front() else { - let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.common.reader)? - else { + let Some(code) = codes.pop_front() else { + let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else { if i == 0 { return Ok(None); } else { - let offset = self.common.reader.stream_position()?; + let offset = reader.stream_position()?; return Err(Error::EofInCompressedCase { offset, case_ofs: offset - case_start, }); } }; - self.codes.extend(new_codes.into_iter()); + codes.extend(new_codes.into_iter()); continue; }; match code { 0 => (), 1..=251 => match var_type { - VarType::Number => break Value::Number(Some(code as f64 - self.common.bias)), + VarType::Number => break Value::Number(Some(code as f64 - bias)), VarType::String => { - break Value::String(self.common.endian.to_bytes(code as f64 - self.common.bias)) + break Value::String(endian.to_bytes(code as f64 - bias)) } }, 252 => { if i == 0 { return Ok(None); } else { - let offset = self.common.reader.stream_position()?; + let offset = reader.stream_position()?; return Err(Error::PartialCompressedCase { offset, case_ofs: offset - case_start, }); } } - 253 => { - break Value::from_raw( - var_type, - read_bytes(&mut self.common.reader)?, - self.common.endian, - ) - } + 253 => break Value::from_raw(var_type, read_bytes(reader)?, endian), 254 => match var_type { VarType::String => break Value::String(*b" "), // XXX EBCDIC VarType::Number => { return Err(Error::CompressedStringExpected { offset: case_start, - case_ofs: self.common.reader.stream_position()? - case_start, + case_ofs: reader.stream_position()? - case_start, }) } }, @@ -305,7 +431,7 @@ impl State for CompressedData { VarType::String => { return Err(Error::CompressedNumberExpected { offset: case_start, - case_ofs: self.common.reader.stream_position()? - case_start, + case_ofs: reader.stream_position()? - case_start, }) } }, @@ -313,7 +439,7 @@ impl State for CompressedData { }; values.push(value); } - Ok(Some((Record::Case(values), self))) + Ok(Some(values)) } } @@ -361,24 +487,6 @@ where } } -#[derive(Copy, Clone)] -pub enum Value { - Number(Option), - String([u8; 8]), -} - -impl Value { - pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value { - match var_type { - VarType::String => Value::String(raw), - VarType::Number => { - let number: f64 = endian.parse(raw); - Value::Number((number != -f64::MAX).then_some(number)) - } - } - } -} - pub struct Reader { state: Option>, } @@ -386,7 +494,7 @@ pub struct Reader { impl Reader { pub fn new(reader: R) -> Result { Ok(Reader { - state: Some(Box::new(Start { reader })), + state: Some(state::new(reader)), }) } } @@ -408,59 +516,6 @@ impl Iterator for Reader { impl FusedIterator for Reader {} -fn read_header(r: &mut R) -> Result { - let magic: [u8; 4] = read_bytes(r)?; - let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; - - let eye_catcher: [u8; 60] = read_bytes(r)?; - let layout_code: [u8; 4] = read_bytes(r)?; - let endian = Endian::identify_u32(2, layout_code) - .or_else(|| Endian::identify_u32(2, layout_code)) - .ok_or_else(|| Error::NotASystemFile)?; - let layout_code = endian.parse(layout_code); - - let nominal_case_size: u32 = endian.parse(read_bytes(r)?); - let nominal_case_size = - (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size); - - let compression_code: u32 = endian.parse(read_bytes(r)?); - let compression = match (magic, compression_code) { - (Magic::ZSAV, 2) => Some(Compression::ZLib), - (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)), - (_, 0) => None, - (_, 1) => Some(Compression::Simple), - (_, code) => return Err(Error::InvalidSavCompression(code)), - }; - - let weight_index: u32 = endian.parse(read_bytes(r)?); - let weight_index = (weight_index > 0).then_some(weight_index - 1); - - let n_cases: u32 = endian.parse(read_bytes(r)?); - let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases); - - let bias: f64 = endian.parse(read_bytes(r)?); - - let creation_date: [u8; 9] = read_bytes(r)?; - let creation_time: [u8; 8] = read_bytes(r)?; - let file_label: [u8; 64] = read_bytes(r)?; - let _: [u8; 3] = read_bytes(r)?; - - Ok(Header { - magic, - layout_code, - nominal_case_size, - compression, - weight_index, - n_cases, - bias, - creation_date, - creation_time, - eye_catcher, - file_label, - endian, - }) -} - pub struct Variable { /// Offset from the start of the file to the start of the record. pub offset: u64, @@ -487,69 +542,71 @@ pub struct Variable { pub label: Option>, } -fn read_variable_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let width: i32 = endian.parse(read_bytes(r)?); - let has_variable_label: u32 = endian.parse(read_bytes(r)?); - let missing_value_code: i32 = endian.parse(read_bytes(r)?); - let print_format: u32 = endian.parse(read_bytes(r)?); - let write_format: u32 = endian.parse(read_bytes(r)?); - let name: [u8; 8] = read_bytes(r)?; +impl Variable { + fn read(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let width: i32 = endian.parse(read_bytes(r)?); + let has_variable_label: u32 = endian.parse(read_bytes(r)?); + let missing_value_code: i32 = endian.parse(read_bytes(r)?); + let print_format: u32 = endian.parse(read_bytes(r)?); + let write_format: u32 = endian.parse(read_bytes(r)?); + let name: [u8; 8] = read_bytes(r)?; - let label = match has_variable_label { - 0 => None, - 1 => { - let len: u32 = endian.parse(read_bytes(r)?); - let read_len = len.min(65535) as usize; - let label = Some(read_vec(r, read_len)?); + let label = match has_variable_label { + 0 => None, + 1 => { + let len: u32 = endian.parse(read_bytes(r)?); + let read_len = len.min(65535) as usize; + let label = Some(read_vec(r, read_len)?); - let padding_bytes = Integer::next_multiple_of(&len, &4) - len; - let _ = read_vec(r, padding_bytes as usize)?; - - label - } - _ => { - return Err(Error::BadVariableLabelCode { - offset, - code: has_variable_label, - }) - } - }; + let padding_bytes = Integer::next_multiple_of(&len, &4) - len; + let _ = read_vec(r, padding_bytes as usize)?; - let mut missing = Vec::new(); - if missing_value_code != 0 { - match (width, missing_value_code) { - (0, -3 | -2 | 1 | 2 | 3) => (), - (0, _) => { - return Err(Error::BadNumericMissingValueCode { - offset, - code: missing_value_code, - }) + label } - (_, 0..=3) => (), - (_, _) => { - return Err(Error::BadStringMissingValueCode { + _ => { + return Err(Error::BadVariableLabelCode { offset, - code: missing_value_code, + code: has_variable_label, }) } - } + }; + + let mut missing = Vec::new(); + if missing_value_code != 0 { + match (width, missing_value_code) { + (0, -3 | -2 | 1 | 2 | 3) => (), + (0, _) => { + return Err(Error::BadNumericMissingValueCode { + offset, + code: missing_value_code, + }) + } + (_, 0..=3) => (), + (_, _) => { + return Err(Error::BadStringMissingValueCode { + offset, + code: missing_value_code, + }) + } + } - for _ in 0..missing_value_code.abs() { - missing.push(read_bytes(r)?); + for _ in 0..missing_value_code.abs() { + missing.push(read_bytes(r)?); + } } - } - Ok(Variable { - offset, - width, - name, - print_format, - write_format, - missing_value_code, - missing, - label, - }) + Ok(Variable { + offset, + width, + name, + print_format, + write_format, + missing_value_code, + missing, + label, + }) + } } pub struct ValueLabel { @@ -563,31 +620,31 @@ pub struct ValueLabel { impl ValueLabel { /// Maximum number of value labels in a record. pub const MAX: u32 = u32::MAX / 8; -} -fn read_value_label_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let n: u32 = endian.parse(read_bytes(r)?); - if n > ValueLabel::MAX { - return Err(Error::BadNumberOfValueLabels { - offset, - n, - max: ValueLabel::MAX, - }); - } + fn read(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let n: u32 = endian.parse(read_bytes(r)?); + if n > ValueLabel::MAX { + return Err(Error::BadNumberOfValueLabels { + offset, + n, + max: ValueLabel::MAX, + }); + } - let mut labels = Vec::new(); - for _ in 0..n { - let value: [u8; 8] = read_bytes(r)?; - let label_len: u8 = endian.parse(read_bytes(r)?); - let label_len = label_len as usize; - let padded_len = Integer::next_multiple_of(&(label_len + 1), &8); + let mut labels = Vec::new(); + for _ in 0..n { + let value: [u8; 8] = read_bytes(r)?; + let label_len: u8 = endian.parse(read_bytes(r)?); + let label_len = label_len as usize; + let padded_len = Integer::next_multiple_of(&(label_len + 1), &8); - let mut label = read_vec(r, padded_len)?; - label.truncate(label_len); - labels.push((value, label)); + let mut label = read_vec(r, padded_len)?; + label.truncate(label_len); + labels.push((value, label)); + } + Ok(ValueLabel { offset, labels }) } - Ok(ValueLabel { offset, labels }) } pub struct VarIndexes { @@ -601,32 +658,29 @@ pub struct VarIndexes { impl VarIndexes { /// Maximum number of variable indexes in a record. pub const MAX: u32 = u32::MAX / 8; -} -fn read_var_indexes_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let n: u32 = endian.parse(read_bytes(r)?); - if n > VarIndexes::MAX { - return Err(Error::BadNumberOfVarIndexes { + fn read(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let n: u32 = endian.parse(read_bytes(r)?); + if n > VarIndexes::MAX { + return Err(Error::BadNumberOfVarIndexes { + offset, + n, + max: VarIndexes::MAX, + }); + } + let mut var_indexes = Vec::with_capacity(n as usize); + for _ in 0..n { + var_indexes.push(endian.parse(read_bytes(r)?)); + } + + Ok(VarIndexes { offset, - n, - max: VarIndexes::MAX, - }); - } - let mut var_indexes = Vec::with_capacity(n as usize); - for _ in 0..n { - var_indexes.push(endian.parse(read_bytes(r)?)); + var_indexes, + }) } - - Ok(VarIndexes { - offset, - var_indexes, - }) } -pub const DOC_LINE_LEN: u32 = 80; -pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN; - pub struct Document { /// Offset from the start of the file to the start of the record. pub pos: u64, @@ -635,27 +689,38 @@ pub struct Document { pub lines: Vec<[u8; DOC_LINE_LEN as usize]>, } -fn read_document_record(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let n: u32 = endian.parse(read_bytes(r)?); - match n { - 0..=DOC_MAX_LINES => { - let pos = r.stream_position()?; - let mut lines = Vec::with_capacity(n as usize); - for _ in 0..n { - let line: [u8; 80] = read_bytes(r)?; - lines.push(line); +impl Document { + /// Length of a line in a document. Document lines are fixed-length and + /// padded on the right with spaces. + pub const LINE_LEN: u32 = 80; + + /// Maximum number of lines we will accept in a document. This is simply + /// the maximum number that will fit in a 32-bit space. + pub const MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN; + + fn read(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let n: u32 = endian.parse(read_bytes(r)?); + match n { + 0..=DOC_MAX_LINES => { + let pos = r.stream_position()?; + let mut lines = Vec::with_capacity(n as usize); + for _ in 0..n { + let line: [u8; 80] = read_bytes(r)?; + lines.push(line); + } + Ok(Document { pos, lines }) } - Ok(Document { pos, lines }) + _ => Err(Error::BadDocumentLength { + offset, + n, + max: DOC_MAX_LINES, + }), } - _ => Err(Error::BadDocumentLength { - offset, - n, - max: DOC_MAX_LINES, - }), } } +/* #[derive(FromPrimitive)] enum ExtensionType { /// Machine integer info. @@ -695,6 +760,7 @@ enum ExtensionType { /// "Format properties in dataview table". Dataview = 24, } + */ pub struct Extension { /// Offset from the start of the file to the start of the record. @@ -741,28 +807,30 @@ fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) { } */ -fn read_extension_record(r: &mut R, endian: Endian) -> Result { - let subtype = endian.parse(read_bytes(r)?); - let offset = r.stream_position()?; - let size: u32 = endian.parse(read_bytes(r)?); - let count = endian.parse(read_bytes(r)?); - let Some(product) = size.checked_mul(count) else { - return Err(Error::ExtensionRecordTooLarge { +impl Extension { + fn read(r: &mut R, endian: Endian) -> Result { + let subtype = endian.parse(read_bytes(r)?); + let offset = r.stream_position()?; + let size: u32 = endian.parse(read_bytes(r)?); + let count = endian.parse(read_bytes(r)?); + let Some(product) = size.checked_mul(count) else { + return Err(Error::ExtensionRecordTooLarge { + offset, + subtype, + size, + count, + }); + }; + let offset = r.stream_position()?; + let data = read_vec(r, product as usize)?; + Ok(Extension { offset, subtype, size, count, - }); - }; - let offset = r.stream_position()?; - let data = read_vec(r, product as usize)?; - Ok(Extension { - offset, - subtype, - size, - count, - data, - }) + data, + }) + } } pub struct ZHeader { @@ -779,18 +847,20 @@ pub struct ZHeader { pub ztrailer_len: u64, } -fn read_zheader(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; - let zheader_offset: u64 = endian.parse(read_bytes(r)?); - let ztrailer_offset: u64 = endian.parse(read_bytes(r)?); - let ztrailer_len: u64 = endian.parse(read_bytes(r)?); +impl ZHeader { + fn read(r: &mut R, endian: Endian) -> Result { + let offset = r.stream_position()?; + let zheader_offset: u64 = endian.parse(read_bytes(r)?); + let ztrailer_offset: u64 = endian.parse(read_bytes(r)?); + let ztrailer_len: u64 = endian.parse(read_bytes(r)?); - Ok(ZHeader { - offset, - zheader_offset, - ztrailer_offset, - ztrailer_len, - }) + Ok(ZHeader { + offset, + zheader_offset, + ztrailer_offset, + ztrailer_len, + }) + } } pub struct ZTrailer { @@ -827,50 +897,52 @@ pub struct ZBlock { pub compressed_size: u32, } -fn read_ztrailer( - r: &mut R, - endian: Endian, - ztrailer_ofs: u64, - ztrailer_len: u64, -) -> Result, Error> { - let start_offset = r.stream_position()?; - if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() { - return Ok(None); - } - let int_bias = endian.parse(read_bytes(r)?); - let zero = endian.parse(read_bytes(r)?); - let block_size = endian.parse(read_bytes(r)?); - let n_blocks: u32 = endian.parse(read_bytes(r)?); - let expected_n_blocks = (ztrailer_len - 24) / 24; - if n_blocks as u64 != expected_n_blocks { - return Err(Error::BadZlibTrailerNBlocks { +impl ZTrailer { + fn read( + r: &mut R, + endian: Endian, + ztrailer_ofs: u64, + ztrailer_len: u64, + ) -> Result, Error> { + let start_offset = r.stream_position()?; + if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() { + return Ok(None); + } + let int_bias = endian.parse(read_bytes(r)?); + let zero = endian.parse(read_bytes(r)?); + let block_size = endian.parse(read_bytes(r)?); + let n_blocks: u32 = endian.parse(read_bytes(r)?); + let expected_n_blocks = (ztrailer_len - 24) / 24; + if n_blocks as u64 != expected_n_blocks { + return Err(Error::BadZlibTrailerNBlocks { + offset: ztrailer_ofs, + n_blocks, + expected_n_blocks, + ztrailer_len, + }); + } + let mut blocks = Vec::with_capacity(n_blocks as usize); + for _ in 0..n_blocks { + let uncompressed_ofs = endian.parse(read_bytes(r)?); + let compressed_ofs = endian.parse(read_bytes(r)?); + let uncompressed_size = endian.parse(read_bytes(r)?); + let compressed_size = endian.parse(read_bytes(r)?); + blocks.push(ZBlock { + uncompressed_ofs, + compressed_ofs, + uncompressed_size, + compressed_size, + }); + } + r.seek(SeekFrom::Start(start_offset))?; + Ok(Some(ZTrailer { offset: ztrailer_ofs, - n_blocks, - expected_n_blocks, - ztrailer_len, - }); - } - let mut blocks = Vec::with_capacity(n_blocks as usize); - for _ in 0..n_blocks { - let uncompressed_ofs = endian.parse(read_bytes(r)?); - let compressed_ofs = endian.parse(read_bytes(r)?); - let uncompressed_size = endian.parse(read_bytes(r)?); - let compressed_size = endian.parse(read_bytes(r)?); - blocks.push(ZBlock { - uncompressed_ofs, - compressed_ofs, - uncompressed_size, - compressed_size, - }); - } - r.seek(SeekFrom::Start(start_offset))?; - Ok(Some(ZTrailer { - offset: ztrailer_ofs, - int_bias, - zero, - block_size, - blocks, - })) + int_bias, + zero, + block_size, + blocks, + })) + } } fn try_read_bytes(r: &mut R) -> Result, IoError> { -- 2.30.2