From: Ben Pfaff Date: Wed, 22 Nov 2023 17:31:20 +0000 (-0800) Subject: cleanup X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a9acabf6c98fe4b2b227a974e856aca607d105e;p=pspp cleanup --- diff --git a/rust/src/main.rs b/rust/src/main.rs index 404e96d57d..213b381a6e 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -82,7 +82,7 @@ fn main() -> Result<()> { fn dissect(file_name: &Path, max_cases: u64, mode: Mode, encoding: Option<&'static Encoding>) -> Result<()> { let reader = File::open(file_name)?; let reader = BufReader::new(reader); - let mut reader = Reader::new(reader)?; + let mut reader = Reader::new(reader, |warning| println!("{warning}"))?; match mode { Mode::Identify => { diff --git a/rust/src/raw.rs b/rust/src/raw.rs index ed246717ce..222a39b01d 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -162,14 +162,22 @@ pub enum Record { } impl Record { - fn read(reader: &mut R, endian: Endian) -> Result { + fn read(reader: &mut R, endian: Endian, warn: &Box) -> Result { + loop { + if let Some(record) = Self::_read(reader, endian, warn)? { + return Ok(record); + } + } + } + + fn _read(reader: &mut R, endian: Endian, warn: &Box) -> Result, Error> { let rec_type: u32 = endian.parse(read_bytes(reader)?); match rec_type { - 2 => Ok(Record::Variable(VariableRecord::read(reader, endian)?)), - 3 => Ok(Record::ValueLabel(ValueLabelRecord::read(reader, endian)?)), - 6 => Ok(Record::Document(DocumentRecord::read(reader, endian)?)), - 7 => Ok(Extension::read(reader, endian)?), - 999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))), + 2 => Ok(Some(VariableRecord::read(reader, endian)?)), + 3 => Ok(Some(ValueLabelRecord::read(reader, endian)?)), + 6 => Ok(Some(DocumentRecord::read(reader, endian)?)), + 7 => Extension::read(reader, endian, warn), + 999 => Ok(Some(Record::EndOfHeaders(endian.parse(read_bytes(reader)?)))), _ => Err(Error::BadRecordType { offset: reader.stream_position()?, rec_type, @@ -408,14 +416,16 @@ mod state { struct Start { reader: R, + warn: Box } - pub fn new(reader: R) -> Box { - Box::new(Start { reader }) + pub fn new(reader: R, warn: F) -> Box { + Box::new(Start { reader, warn: Box::new(warn) }) } struct CommonState { reader: R, + warn: Box, endian: Endian, bias: f64, compression: Option, @@ -427,6 +437,7 @@ mod state { let header = HeaderRecord::read(&mut self.reader)?; let next_state = Headers(CommonState { reader: self.reader, + warn: self.warn, endian: header.endian, bias: header.bias, compression: header.compression, @@ -440,7 +451,7 @@ mod state { impl State for Headers { fn read(mut self: Box) -> Result)>, Error> { - let record = Record::read(&mut self.0.reader, self.0.endian)?; + let record = Record::read(&mut self.0.reader, self.0.endian, &self.0.warn)?; match record { Record::Variable(VariableRecord { width, .. }) => { self.0.var_types.push(VarType::from_width(width)); @@ -481,6 +492,7 @@ mod state { )?; let next_state = Box::new(CompressedData::new(CommonState { reader: ZlibDecodeMultiple::new(self.0.reader), + warn: self.0.warn, endian: self.0.endian, bias: self.0.bias, compression: self.0.compression, @@ -717,9 +729,9 @@ pub struct Reader { } impl Reader { - pub fn new(reader: R) -> Result { + pub fn new(reader: R, warn: F) -> Result { Ok(Reader { - state: Some(state::new(reader)), + state: Some(state::new(reader, warn)), }) } pub fn collect_headers(&mut self) -> Result, Error> { @@ -922,7 +934,7 @@ impl Debug for VariableRecord { } impl VariableRecord { - fn read(r: &mut R, endian: Endian) -> Result { + fn read(r: &mut R, endian: Endian) -> Result { let start_offset = r.stream_position()?; let width: i32 = endian.parse(read_bytes(r)?); let code_offset = r.stream_position()?; @@ -958,7 +970,7 @@ impl VariableRecord { let end_offset = r.stream_position()?; - Ok(VariableRecord { + Ok(Record::Variable(VariableRecord { offsets: start_offset..end_offset, width, name, @@ -966,7 +978,7 @@ impl VariableRecord { write_format, missing_values, label, - }) + })) } } @@ -1071,7 +1083,7 @@ impl ValueLabelRecord { /// Maximum number of variable indexes in a record. pub const MAX_INDEXES: u32 = u32::MAX / 8; - fn read(r: &mut R, endian: Endian) -> Result { + fn read(r: &mut R, endian: Endian) -> Result { let label_offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); if n > Self::MAX_LABELS { @@ -1117,11 +1129,11 @@ impl ValueLabelRecord { } let end_offset = r.stream_position()?; - Ok(ValueLabelRecord { + Ok(Record::ValueLabel(ValueLabelRecord { offsets: label_offset..end_offset, labels, dict_indexes, - }) + })) } } @@ -1144,7 +1156,7 @@ impl DocumentRecord { /// the maximum number that will fit in a 32-bit space. pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN; - fn read(r: &mut R, endian: Endian) -> Result { + fn read(r: &mut R, endian: Endian) -> Result { let start_offset = r.stream_position()?; let n: u32 = endian.parse(read_bytes(r)?); let n = n as usize; @@ -1160,10 +1172,10 @@ impl DocumentRecord { lines.push(UnencodedStr::<{ DocumentRecord::LINE_LEN }>(read_bytes(r)?)); } let end_offset = r.stream_position()?; - Ok(DocumentRecord { + Ok(Record::Document(DocumentRecord { offsets: start_offset..end_offset, lines, - }) + })) } } } @@ -1577,7 +1589,7 @@ impl Extension { Ok(()) } - fn read(r: &mut R, endian: Endian) -> Result { + fn read(r: &mut R, endian: Endian, warn: &Box) -> Result, Error> { let subtype = endian.parse(read_bytes(r)?); let header_offset = r.stream_position()?; let size: u32 = endian.parse(read_bytes(r)?); @@ -1600,18 +1612,18 @@ impl Extension { count, data, }; - match subtype { - IntegerInfoRecord::SUBTYPE => Ok(IntegerInfoRecord::parse(&extension, endian)?), - FloatInfoRecord::SUBTYPE => Ok(FloatInfoRecord::parse(&extension, endian)?), - VarDisplayRecord::SUBTYPE => Ok(VarDisplayRecord::parse(&extension, endian)?), + let result = match subtype { + IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian), + FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian), + VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, endian), MultipleResponseRecord::SUBTYPE | 19 => { - Ok(MultipleResponseRecord::parse(&extension, endian)?) + MultipleResponseRecord::parse(&extension, endian) } LongStringValueLabelRecord::SUBTYPE => { - Ok(LongStringValueLabelRecord::parse(&extension, endian)?) + LongStringValueLabelRecord::parse(&extension, endian) } - EncodingRecord::SUBTYPE => Ok(EncodingRecord::parse(&extension, endian)?), - NumberOfCasesRecord::SUBTYPE => Ok(NumberOfCasesRecord::parse(&extension, endian)?), + EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian), + NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian), 5 => Ok(Record::VariableSets(extension.into())), 10 => Ok(Record::ProductInfo(extension.into())), 13 => Ok(Record::LongNames(extension.into())), @@ -1619,6 +1631,13 @@ impl Extension { 17 => Ok(Record::FileAttributes(extension.into())), 18 => Ok(Record::VariableAttributes(extension.into())), _ => Ok(Record::OtherExtension(extension)), + }; + match result { + Ok(result) => Ok(Some(result)), + Err(error) => { + warn(error); + Ok(None) + }, } } }