cleanup
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 22 Nov 2023 17:31:20 +0000 (09:31 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 22 Nov 2023 17:31:20 +0000 (09:31 -0800)
rust/src/main.rs
rust/src/raw.rs

index 404e96d57d07becaf09c4688319a0ac3739ceee4..213b381a6e670092cd966c586d2a8006528e504f 100644 (file)
@@ -82,7 +82,7 @@ fn main() -> Result<()> {
 fn dissect(file_name: &Path, max_cases: u64, mode: Mode, encoding: Option<&'static Encoding>) -> Result<()> {
     let reader = File::open(file_name)?;
     let reader = BufReader::new(reader);
-    let mut reader = Reader::new(reader)?;
+    let mut reader = Reader::new(reader, |warning| println!("{warning}"))?;
 
     match mode {
         Mode::Identify => {
index ed246717cef3f94b45c4330cf8ac8283cec4eba2..222a39b01dd7b9071e93d972b552222c85ae1d18 100644 (file)
@@ -162,14 +162,22 @@ pub enum Record {
 }
 
 impl Record {
-    fn read<R: Read + Seek>(reader: &mut R, endian: Endian) -> Result<Record, Error> {
+    fn read<R: Read + Seek>(reader: &mut R, endian: Endian, warn: &Box<dyn Fn(Error)>) -> Result<Record, Error> {
+        loop {
+            if let Some(record) = Self::_read(reader, endian, warn)? {
+                return Ok(record);
+            }
+        }
+    }
+
+    fn _read<R: Read + Seek>(reader: &mut R, endian: Endian, warn: &Box<dyn Fn(Error)>) -> Result<Option<Record>, Error> {
         let rec_type: u32 = endian.parse(read_bytes(reader)?);
         match rec_type {
-            2 => Ok(Record::Variable(VariableRecord::read(reader, endian)?)),
-            3 => Ok(Record::ValueLabel(ValueLabelRecord::read(reader, endian)?)),
-            6 => Ok(Record::Document(DocumentRecord::read(reader, endian)?)),
-            7 => Ok(Extension::read(reader, endian)?),
-            999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))),
+            2 => Ok(Some(VariableRecord::read(reader, endian)?)),
+            3 => Ok(Some(ValueLabelRecord::read(reader, endian)?)),
+            6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
+            7 => Extension::read(reader, endian, warn),
+            999 => Ok(Some(Record::EndOfHeaders(endian.parse(read_bytes(reader)?)))),
             _ => Err(Error::BadRecordType {
                 offset: reader.stream_position()?,
                 rec_type,
@@ -408,14 +416,16 @@ mod state {
 
     struct Start<R: Read + Seek> {
         reader: R,
+        warn: Box<dyn Fn(Error)>
     }
 
-    pub fn new<R: Read + Seek + 'static>(reader: R) -> Box<dyn State> {
-        Box::new(Start { reader })
+    pub fn new<R: Read + Seek + 'static, F: Fn(Error) + 'static >(reader: R, warn: F) -> Box<dyn State> {
+        Box::new(Start { reader, warn: Box::new(warn) })
     }
 
     struct CommonState<R: Read + Seek> {
         reader: R,
+        warn: Box<dyn Fn(Error)>,
         endian: Endian,
         bias: f64,
         compression: Option<Compression>,
@@ -427,6 +437,7 @@ mod state {
             let header = HeaderRecord::read(&mut self.reader)?;
             let next_state = Headers(CommonState {
                 reader: self.reader,
+                warn: self.warn,
                 endian: header.endian,
                 bias: header.bias,
                 compression: header.compression,
@@ -440,7 +451,7 @@ mod state {
 
     impl<R: Read + Seek + 'static> State for Headers<R> {
         fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            let record = Record::read(&mut self.0.reader, self.0.endian)?;
+            let record = Record::read(&mut self.0.reader, self.0.endian, &self.0.warn)?;
             match record {
                 Record::Variable(VariableRecord { width, .. }) => {
                     self.0.var_types.push(VarType::from_width(width));
@@ -481,6 +492,7 @@ mod state {
             )?;
             let next_state = Box::new(CompressedData::new(CommonState {
                 reader: ZlibDecodeMultiple::new(self.0.reader),
+                warn: self.0.warn,
                 endian: self.0.endian,
                 bias: self.0.bias,
                 compression: self.0.compression,
@@ -717,9 +729,9 @@ pub struct Reader {
 }
 
 impl Reader {
-    pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
+    pub fn new<R: Read + Seek + 'static, F: Fn(Error) + 'static>(reader: R, warn: F) -> Result<Self, Error> {
         Ok(Reader {
-            state: Some(state::new(reader)),
+            state: Some(state::new(reader, warn)),
         })
     }
     pub fn collect_headers(&mut self) -> Result<Vec<Record>, Error> {
@@ -922,7 +934,7 @@ impl Debug for VariableRecord {
 }
 
 impl VariableRecord {
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VariableRecord, Error> {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
         let start_offset = r.stream_position()?;
         let width: i32 = endian.parse(read_bytes(r)?);
         let code_offset = r.stream_position()?;
@@ -958,7 +970,7 @@ impl VariableRecord {
 
         let end_offset = r.stream_position()?;
 
-        Ok(VariableRecord {
+        Ok(Record::Variable(VariableRecord {
             offsets: start_offset..end_offset,
             width,
             name,
@@ -966,7 +978,7 @@ impl VariableRecord {
             write_format,
             missing_values,
             label,
-        })
+        }))
     }
 }
 
@@ -1071,7 +1083,7 @@ impl ValueLabelRecord {
     /// Maximum number of variable indexes in a record.
     pub const MAX_INDEXES: u32 = u32::MAX / 8;
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabelRecord, Error> {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
         let label_offset = r.stream_position()?;
         let n: u32 = endian.parse(read_bytes(r)?);
         if n > Self::MAX_LABELS {
@@ -1117,11 +1129,11 @@ impl ValueLabelRecord {
         }
 
         let end_offset = r.stream_position()?;
-        Ok(ValueLabelRecord {
+        Ok(Record::ValueLabel(ValueLabelRecord {
             offsets: label_offset..end_offset,
             labels,
             dict_indexes,
-        })
+        }))
     }
 }
 
@@ -1144,7 +1156,7 @@ impl DocumentRecord {
     /// the maximum number that will fit in a 32-bit space.
     pub const MAX_LINES: usize = i32::MAX as usize / Self::LINE_LEN;
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<DocumentRecord, Error> {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
         let start_offset = r.stream_position()?;
         let n: u32 = endian.parse(read_bytes(r)?);
         let n = n as usize;
@@ -1160,10 +1172,10 @@ impl DocumentRecord {
                 lines.push(UnencodedStr::<{ DocumentRecord::LINE_LEN }>(read_bytes(r)?));
             }
             let end_offset = r.stream_position()?;
-            Ok(DocumentRecord {
+            Ok(Record::Document(DocumentRecord {
                 offsets: start_offset..end_offset,
                 lines,
-            })
+            }))
         }
     }
 }
@@ -1577,7 +1589,7 @@ impl Extension {
         Ok(())
     }
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian, warn: &Box<dyn Fn(Error)>) -> Result<Option<Record>, Error> {
         let subtype = endian.parse(read_bytes(r)?);
         let header_offset = r.stream_position()?;
         let size: u32 = endian.parse(read_bytes(r)?);
@@ -1600,18 +1612,18 @@ impl Extension {
             count,
             data,
         };
-        match subtype {
-            IntegerInfoRecord::SUBTYPE => Ok(IntegerInfoRecord::parse(&extension, endian)?),
-            FloatInfoRecord::SUBTYPE => Ok(FloatInfoRecord::parse(&extension, endian)?),
-            VarDisplayRecord::SUBTYPE => Ok(VarDisplayRecord::parse(&extension, endian)?),
+        let result = match subtype {
+            IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
+            FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
+            VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, endian),
             MultipleResponseRecord::SUBTYPE | 19 => {
-                Ok(MultipleResponseRecord::parse(&extension, endian)?)
+                MultipleResponseRecord::parse(&extension, endian)
             }
             LongStringValueLabelRecord::SUBTYPE => {
-                Ok(LongStringValueLabelRecord::parse(&extension, endian)?)
+                LongStringValueLabelRecord::parse(&extension, endian)
             }
-            EncodingRecord::SUBTYPE => Ok(EncodingRecord::parse(&extension, endian)?),
-            NumberOfCasesRecord::SUBTYPE => Ok(NumberOfCasesRecord::parse(&extension, endian)?),
+            EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
+            NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
             5 => Ok(Record::VariableSets(extension.into())),
             10 => Ok(Record::ProductInfo(extension.into())),
             13 => Ok(Record::LongNames(extension.into())),
@@ -1619,6 +1631,13 @@ impl Extension {
             17 => Ok(Record::FileAttributes(extension.into())),
             18 => Ok(Record::VariableAttributes(extension.into())),
             _ => Ok(Record::OtherExtension(extension)),
+        };
+        match result {
+            Ok(result) => Ok(Some(result)),
+            Err(error) => {
+                warn(error);
+                Ok(None)
+            },
         }
     }
 }