work
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 31 Jul 2023 17:53:58 +0000 (10:53 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 31 Jul 2023 17:53:58 +0000 (10:53 -0700)
rust/src/raw.rs

index f1e362376b0e9625811b363721d5e1163f31dcea..7cb7238e9a2d5b721bffcde104254e256ac47ec6 100644 (file)
@@ -3,13 +3,14 @@ use crate::Error;
 
 use flate2::read::ZlibDecoder;
 use num::Integer;
-use num_derive::FromPrimitive;
 use std::{
     collections::VecDeque,
     io::{Error as IoError, Read, Seek, SeekFrom},
     iter::FusedIterator,
 };
 
+use self::state::State;
+
 #[derive(Copy, Clone, Debug)]
 pub enum Compression {
     Simple,
@@ -23,12 +24,30 @@ pub enum Record {
     ValueLabel(ValueLabel),
     VarIndexes(VarIndexes),
     Extension(Extension),
-    EndOfHeaders,
+    EndOfHeaders(u32),
     ZHeader(ZHeader),
     ZTrailer(ZTrailer),
     Case(Vec<Value>),
 }
 
+impl Record {
+    fn read<R: Read + Seek>(reader: &mut R, endian: Endian) -> Result<Record, Error> {
+        let rec_type: u32 = endian.parse(read_bytes(reader)?);
+        match rec_type {
+            2 => Ok(Record::Variable(Variable::read(reader, endian)?)),
+            3 => Ok(Record::ValueLabel(ValueLabel::read(reader, endian)?)),
+            4 => Ok(Record::VarIndexes(VarIndexes::read(reader, endian)?)),
+            6 => Ok(Record::Document(Document::read(reader, endian)?)),
+            7 => Ok(Record::Extension(Extension::read(reader, endian)?)),
+            999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))),
+            _ => Err(Error::BadRecordType {
+                offset: reader.stream_position()?,
+                rec_type,
+            }),
+        }
+    }
+}
+
 pub struct Header {
     /// Magic number.
     pub magic: Magic,
@@ -70,6 +89,61 @@ pub struct Header {
     pub endian: Endian,
 }
 
+impl Header {
+    fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
+        let magic: [u8; 4] = read_bytes(r)?;
+        let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
+
+        let eye_catcher: [u8; 60] = read_bytes(r)?;
+        let layout_code: [u8; 4] = read_bytes(r)?;
+        let endian = Endian::identify_u32(2, layout_code)
+            .or_else(|| Endian::identify_u32(2, layout_code))
+            .ok_or_else(|| Error::NotASystemFile)?;
+        let layout_code = endian.parse(layout_code);
+
+        let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
+        let nominal_case_size =
+            (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
+
+        let compression_code: u32 = endian.parse(read_bytes(r)?);
+        let compression = match (magic, compression_code) {
+            (Magic::ZSAV, 2) => Some(Compression::ZLib),
+            (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
+            (_, 0) => None,
+            (_, 1) => Some(Compression::Simple),
+            (_, code) => return Err(Error::InvalidSavCompression(code)),
+        };
+
+        let weight_index: u32 = endian.parse(read_bytes(r)?);
+        let weight_index = (weight_index > 0).then_some(weight_index - 1);
+
+        let n_cases: u32 = endian.parse(read_bytes(r)?);
+        let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+
+        let bias: f64 = endian.parse(read_bytes(r)?);
+
+        let creation_date: [u8; 9] = read_bytes(r)?;
+        let creation_time: [u8; 8] = read_bytes(r)?;
+        let file_label: [u8; 64] = read_bytes(r)?;
+        let _: [u8; 3] = read_bytes(r)?;
+
+        Ok(Header {
+            magic,
+            layout_code,
+            nominal_case_size,
+            compression,
+            weight_index,
+            n_cases,
+            bias,
+            creation_date,
+            creation_time,
+            eye_catcher,
+            file_label,
+            endian,
+        })
+    }
+}
+
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct Magic([u8; 4]);
 
@@ -112,191 +186,243 @@ impl VarType {
     }
 }
 
-trait State {
-    #[allow(clippy::type_complexity)]
-    fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
-}
+mod state {
+    use super::{
+        Compression, Error, Header, Record, Value, VarType, Variable, ZHeader, ZTrailer,
+        ZlibDecodeMultiple,
+    };
+    use crate::endian::Endian;
+    use std::{
+        collections::VecDeque,
+        io::{Read, Seek},
+    };
 
-struct Start<R: Read + Seek> {
-    reader: R,
-}
+    pub trait State {
+        #[allow(clippy::type_complexity)]
+        fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
+    }
 
-struct CommonState<R: Read + Seek> {
-    reader: R,
-    endian: Endian,
-    bias: f64,
-    compression: Option<Compression>,
-    var_types: Vec<VarType>,
-}
+    struct Start<R: Read + Seek> {
+        reader: R,
+    }
 
-impl<R: Read + Seek + 'static> State for Start<R> {
-    fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-        let header = read_header(&mut self.reader)?;
-        let next_state = Headers(CommonState {
-            reader: self.reader,
-            endian: header.endian,
-            bias: header.bias,
-            compression: header.compression,
-            var_types: Vec::new(),
-        });
-        Ok(Some((Record::Header(header), Box::new(next_state))))
+    pub fn new<R: Read + Seek + 'static>(reader: R) -> Box<dyn State> {
+        Box::new(Start { reader })
+    }
+
+    struct CommonState<R: Read + Seek> {
+        reader: R,
+        endian: Endian,
+        bias: f64,
+        compression: Option<Compression>,
+        var_types: Vec<VarType>,
     }
-}
 
-struct Headers<R: Read + Seek>(CommonState<R>);
+    impl<R: Read + Seek + 'static> State for Start<R> {
+        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+            let header = Header::read(&mut self.reader)?;
+            let next_state = Headers(CommonState {
+                reader: self.reader,
+                endian: header.endian,
+                bias: header.bias,
+                compression: header.compression,
+                var_types: Vec::new(),
+            });
+            Ok(Some((Record::Header(header), Box::new(next_state))))
+        }
+    }
 
-impl<R: Read + Seek + 'static> State for Headers<R> {
-    fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-        let endian = self.0.endian;
-        let rec_type: u32 = endian.parse(read_bytes(&mut self.0.reader)?);
-        let record = match rec_type {
-            2 => {
-                let variable = read_variable_record(&mut self.0.reader, endian)?;
-                self.0.var_types.push(VarType::from_width(variable.width));
-                Record::Variable(variable)
-            }
-            3 => Record::ValueLabel(read_value_label_record(&mut self.0.reader, endian)?),
-            4 => Record::VarIndexes(read_var_indexes_record(&mut self.0.reader, endian)?),
-            6 => Record::Document(read_document_record(&mut self.0.reader, endian)?),
-            7 => Record::Extension(read_extension_record(&mut self.0.reader, endian)?),
-            999 => {
-                let _: [u8; 4] = read_bytes(&mut self.0.reader)?;
-                let next_state: Box<dyn State> = match self.0.compression {
-                    None => Box::new(Data(self.0)),
-                    Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
-                    Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
-                };
-                return Ok(Some((Record::EndOfHeaders, next_state)));
-            }
-            _ => {
-                return Err(Error::BadRecordType {
-                    offset: self.0.reader.stream_position()?,
-                    rec_type,
-                })
+    struct Headers<R: Read + Seek>(CommonState<R>);
+
+    impl<R: Read + Seek + 'static> State for Headers<R> {
+        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+            let record = Record::read(&mut self.0.reader, self.0.endian)?;
+            match record {
+                Record::Variable(Variable { width, .. }) => {
+                    self.0.var_types.push(VarType::from_width(width));
+                }
+                Record::EndOfHeaders(_) => {
+                    let next_state: Box<dyn State> = match self.0.compression {
+                        None => Box::new(Data(self.0)),
+                        Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
+                        Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
+                    };
+                    return Ok(Some((record, next_state)));
+                }
+                _ => (),
+            };
+            Ok(Some((record, self)))
+        }
+    }
+
+    struct ZlibHeader<R: Read + Seek>(CommonState<R>);
+
+    impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
+        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+            let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?;
+            Ok(Some((Record::ZHeader(zheader), self)))
+        }
+    }
+
+    struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
+
+    impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
+        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+            let retval = ZTrailer::read(
+                &mut self.0.reader,
+                self.0.endian,
+                self.1.ztrailer_offset,
+                self.1.ztrailer_len,
+            )?;
+            let next_state = Box::new(CompressedData::new(CommonState {
+                reader: ZlibDecodeMultiple::new(self.0.reader),
+                endian: self.0.endian,
+                bias: self.0.bias,
+                compression: self.0.compression,
+                var_types: self.0.var_types,
+            }));
+            match retval {
+                None => next_state.read(),
+                Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))),
             }
-        };
-        Ok(Some((record, self)))
+        }
     }
-}
 
-struct ZlibHeader<R: Read + Seek>(CommonState<R>);
+    struct Data<R: Read + Seek>(CommonState<R>);
+
+    impl<R: Read + Seek + 'static> State for Data<R> {
+        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+            match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? {
+                None => Ok(None),
+                Some(values) => Ok(Some((Record::Case(values), self))),
+            }
+        }
+    }
 
-impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
-    fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-        let zheader = read_zheader(&mut self.0.reader, self.0.endian)?;
-        Ok(Some((Record::ZHeader(zheader), self)))
+    struct CompressedData<R: Read + Seek> {
+        common: CommonState<R>,
+        codes: VecDeque<u8>,
     }
-}
 
-struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
+    impl<R: Read + Seek + 'static> CompressedData<R> {
+        fn new(common: CommonState<R>) -> CompressedData<R> {
+            CompressedData {
+                common,
+                codes: VecDeque::new(),
+            }
+        }
+    }
 
-impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
-    fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-        let retval = read_ztrailer(&mut self.0.reader, self.0.endian, self.1.ztrailer_offset, self.1.ztrailer_len)?;
-        let next_state = Box::new(CompressedData::new(CommonState {
-            reader: ZlibDecodeMultiple::new(self.0.reader),
-            endian: self.0.endian,
-            bias: self.0.bias,
-            compression: self.0.compression,
-            var_types: self.0.var_types
-        }));
-        match retval {
-            None => next_state.read(),
-            Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state)))
-        }        
+    impl<R: Read + Seek + 'static> State for CompressedData<R> {
+        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
+            match Value::read_compressed_case(
+                &mut self.common.reader,
+                &self.common.var_types,
+                &mut self.codes,
+                self.common.endian,
+                self.common.bias,
+            )? {
+                None => Ok(None),
+                Some(values) => Ok(Some((Record::Case(values), self))),
+            }
+        }
     }
 }
 
-struct Data<R: Read + Seek>(CommonState<R>);
+#[derive(Copy, Clone)]
+pub enum Value {
+    Number(Option<f64>),
+    String([u8; 8]),
+}
 
-impl<R: Read + Seek + 'static> State for Data<R> {
-    fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-        let case_start = self.0.reader.stream_position()?;
-        let mut values = Vec::with_capacity(self.0.var_types.len());
-        for (i, &var_type) in self.0.var_types.iter().enumerate() {
-            let Some(raw) = try_read_bytes(&mut self.0.reader)? else {
+impl Value {
+    pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
+        match var_type {
+            VarType::String => Value::String(raw),
+            VarType::Number => {
+                let number: f64 = endian.parse(raw);
+                Value::Number((number != -f64::MAX).then_some(number))
+            }
+        }
+    }
+
+    fn read_case<R: Read + Seek>(
+        reader: &mut R,
+        var_types: &[VarType],
+        endian: Endian,
+    ) -> Result<Option<Vec<Value>>, Error> {
+        let case_start = reader.stream_position()?;
+        let mut values = Vec::with_capacity(var_types.len());
+        for (i, &var_type) in var_types.iter().enumerate() {
+            let Some(raw) = try_read_bytes(reader)? else {
                 if i == 0 {
                     return Ok(None);
                 } else {
-                    let offset = self.0.reader.stream_position()?;
+                    let offset = reader.stream_position()?;
                     return Err(Error::EofInCase {
                         offset,
                         case_ofs: offset - case_start,
-                        case_len: self.0.var_types.len() * 8,
+                        case_len: var_types.len() * 8,
                     });
                 }
             };
-            values.push(Value::from_raw(var_type, raw, self.0.endian));
+            values.push(Value::from_raw(var_type, raw, endian));
         }
-        Ok(Some((Record::Case(values), self)))
+        Ok(Some(values))
     }
-}
 
-struct CompressedData<R: Read + Seek> {
-    common: CommonState<R>,
-    codes: VecDeque<u8>,
-}
-
-impl<R: Read + Seek + 'static> CompressedData<R> {
-    fn new(common: CommonState<R>) -> CompressedData<R> {
-        CompressedData { common, codes: VecDeque::new() }
-    }
-}
-
-impl<R: Read + Seek + 'static> State for CompressedData<R> {
-    fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-        let case_start = self.common.reader.stream_position()?;
-        let mut values = Vec::with_capacity(self.common.var_types.len());
-        for (i, &var_type) in self.common.var_types.iter().enumerate() {
+    fn read_compressed_case<R: Read + Seek>(
+        reader: &mut R,
+        var_types: &[VarType],
+        codes: &mut VecDeque<u8>,
+        endian: Endian,
+        bias: f64,
+    ) -> Result<Option<Vec<Value>>, Error> {
+        let case_start = reader.stream_position()?;
+        let mut values = Vec::with_capacity(var_types.len());
+        for (i, &var_type) in var_types.iter().enumerate() {
             let value = loop {
-                let Some(code) = self.codes.pop_front() else {
-                    let Some(new_codes): Option<[u8; 8]> = try_read_bytes(&mut self.common.reader)?
-                    else {
+                let Some(code) = codes.pop_front() else {
+                    let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
                         if i == 0 {
                             return Ok(None);
                         } else {
-                            let offset = self.common.reader.stream_position()?;
+                            let offset = reader.stream_position()?;
                             return Err(Error::EofInCompressedCase {
                                 offset,
                                 case_ofs: offset - case_start,
                             });
                         }
                     };
-                    self.codes.extend(new_codes.into_iter());
+                    codes.extend(new_codes.into_iter());
                     continue;
                 };
                 match code {
                     0 => (),
                     1..=251 => match var_type {
-                        VarType::Number => break Value::Number(Some(code as f64 - self.common.bias)),
+                        VarType::Number => break Value::Number(Some(code as f64 - bias)),
                         VarType::String => {
-                            break Value::String(self.common.endian.to_bytes(code as f64 - self.common.bias))
+                            break Value::String(endian.to_bytes(code as f64 - bias))
                         }
                     },
                     252 => {
                         if i == 0 {
                             return Ok(None);
                         } else {
-                            let offset = self.common.reader.stream_position()?;
+                            let offset = reader.stream_position()?;
                             return Err(Error::PartialCompressedCase {
                                 offset,
                                 case_ofs: offset - case_start,
                             });
                         }
                     }
-                    253 => {
-                        break Value::from_raw(
-                            var_type,
-                            read_bytes(&mut self.common.reader)?,
-                            self.common.endian,
-                        )
-                    }
+                    253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
                     254 => match var_type {
                         VarType::String => break Value::String(*b"        "), // XXX EBCDIC
                         VarType::Number => {
                             return Err(Error::CompressedStringExpected {
                                 offset: case_start,
-                                case_ofs: self.common.reader.stream_position()? - case_start,
+                                case_ofs: reader.stream_position()? - case_start,
                             })
                         }
                     },
@@ -305,7 +431,7 @@ impl<R: Read + Seek + 'static> State for CompressedData<R> {
                         VarType::String => {
                             return Err(Error::CompressedNumberExpected {
                                 offset: case_start,
-                                case_ofs: self.common.reader.stream_position()? - case_start,
+                                case_ofs: reader.stream_position()? - case_start,
                             })
                         }
                     },
@@ -313,7 +439,7 @@ impl<R: Read + Seek + 'static> State for CompressedData<R> {
             };
             values.push(value);
         }
-        Ok(Some((Record::Case(values), self)))
+        Ok(Some(values))
     }
 }
 
@@ -361,24 +487,6 @@ where
     }
 }
 
-#[derive(Copy, Clone)]
-pub enum Value {
-    Number(Option<f64>),
-    String([u8; 8]),
-}
-
-impl Value {
-    pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
-        match var_type {
-            VarType::String => Value::String(raw),
-            VarType::Number => {
-                let number: f64 = endian.parse(raw);
-                Value::Number((number != -f64::MAX).then_some(number))
-            }
-        }
-    }
-}
-
 pub struct Reader {
     state: Option<Box<dyn State>>,
 }
@@ -386,7 +494,7 @@ pub struct Reader {
 impl Reader {
     pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
         Ok(Reader {
-            state: Some(Box::new(Start { reader })),
+            state: Some(state::new(reader)),
         })
     }
 }
@@ -408,59 +516,6 @@ impl Iterator for Reader {
 
 impl FusedIterator for Reader {}
 
-fn read_header<R: Read>(r: &mut R) -> Result<Header, Error> {
-    let magic: [u8; 4] = read_bytes(r)?;
-    let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
-
-    let eye_catcher: [u8; 60] = read_bytes(r)?;
-    let layout_code: [u8; 4] = read_bytes(r)?;
-    let endian = Endian::identify_u32(2, layout_code)
-        .or_else(|| Endian::identify_u32(2, layout_code))
-        .ok_or_else(|| Error::NotASystemFile)?;
-    let layout_code = endian.parse(layout_code);
-
-    let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
-    let nominal_case_size =
-        (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
-
-    let compression_code: u32 = endian.parse(read_bytes(r)?);
-    let compression = match (magic, compression_code) {
-        (Magic::ZSAV, 2) => Some(Compression::ZLib),
-        (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
-        (_, 0) => None,
-        (_, 1) => Some(Compression::Simple),
-        (_, code) => return Err(Error::InvalidSavCompression(code)),
-    };
-
-    let weight_index: u32 = endian.parse(read_bytes(r)?);
-    let weight_index = (weight_index > 0).then_some(weight_index - 1);
-
-    let n_cases: u32 = endian.parse(read_bytes(r)?);
-    let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
-
-    let bias: f64 = endian.parse(read_bytes(r)?);
-
-    let creation_date: [u8; 9] = read_bytes(r)?;
-    let creation_time: [u8; 8] = read_bytes(r)?;
-    let file_label: [u8; 64] = read_bytes(r)?;
-    let _: [u8; 3] = read_bytes(r)?;
-
-    Ok(Header {
-        magic,
-        layout_code,
-        nominal_case_size,
-        compression,
-        weight_index,
-        n_cases,
-        bias,
-        creation_date,
-        creation_time,
-        eye_catcher,
-        file_label,
-        endian,
-    })
-}
-
 pub struct Variable {
     /// Offset from the start of the file to the start of the record.
     pub offset: u64,
@@ -487,69 +542,71 @@ pub struct Variable {
     pub label: Option<Vec<u8>>,
 }
 
-fn read_variable_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
-    let offset = r.stream_position()?;
-    let width: i32 = endian.parse(read_bytes(r)?);
-    let has_variable_label: u32 = endian.parse(read_bytes(r)?);
-    let missing_value_code: i32 = endian.parse(read_bytes(r)?);
-    let print_format: u32 = endian.parse(read_bytes(r)?);
-    let write_format: u32 = endian.parse(read_bytes(r)?);
-    let name: [u8; 8] = read_bytes(r)?;
+impl Variable {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
+        let offset = r.stream_position()?;
+        let width: i32 = endian.parse(read_bytes(r)?);
+        let has_variable_label: u32 = endian.parse(read_bytes(r)?);
+        let missing_value_code: i32 = endian.parse(read_bytes(r)?);
+        let print_format: u32 = endian.parse(read_bytes(r)?);
+        let write_format: u32 = endian.parse(read_bytes(r)?);
+        let name: [u8; 8] = read_bytes(r)?;
 
-    let label = match has_variable_label {
-        0 => None,
-        1 => {
-            let len: u32 = endian.parse(read_bytes(r)?);
-            let read_len = len.min(65535) as usize;
-            let label = Some(read_vec(r, read_len)?);
+        let label = match has_variable_label {
+            0 => None,
+            1 => {
+                let len: u32 = endian.parse(read_bytes(r)?);
+                let read_len = len.min(65535) as usize;
+                let label = Some(read_vec(r, read_len)?);
 
-            let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
-            let _ = read_vec(r, padding_bytes as usize)?;
-
-            label
-        }
-        _ => {
-            return Err(Error::BadVariableLabelCode {
-                offset,
-                code: has_variable_label,
-            })
-        }
-    };
+                let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
+                let _ = read_vec(r, padding_bytes as usize)?;
 
-    let mut missing = Vec::new();
-    if missing_value_code != 0 {
-        match (width, missing_value_code) {
-            (0, -3 | -2 | 1 | 2 | 3) => (),
-            (0, _) => {
-                return Err(Error::BadNumericMissingValueCode {
-                    offset,
-                    code: missing_value_code,
-                })
+                label
             }
-            (_, 0..=3) => (),
-            (_, _) => {
-                return Err(Error::BadStringMissingValueCode {
+            _ => {
+                return Err(Error::BadVariableLabelCode {
                     offset,
-                    code: missing_value_code,
+                    code: has_variable_label,
                 })
             }
-        }
+        };
+
+        let mut missing = Vec::new();
+        if missing_value_code != 0 {
+            match (width, missing_value_code) {
+                (0, -3 | -2 | 1 | 2 | 3) => (),
+                (0, _) => {
+                    return Err(Error::BadNumericMissingValueCode {
+                        offset,
+                        code: missing_value_code,
+                    })
+                }
+                (_, 0..=3) => (),
+                (_, _) => {
+                    return Err(Error::BadStringMissingValueCode {
+                        offset,
+                        code: missing_value_code,
+                    })
+                }
+            }
 
-        for _ in 0..missing_value_code.abs() {
-            missing.push(read_bytes(r)?);
+            for _ in 0..missing_value_code.abs() {
+                missing.push(read_bytes(r)?);
+            }
         }
-    }
 
-    Ok(Variable {
-        offset,
-        width,
-        name,
-        print_format,
-        write_format,
-        missing_value_code,
-        missing,
-        label,
-    })
+        Ok(Variable {
+            offset,
+            width,
+            name,
+            print_format,
+            write_format,
+            missing_value_code,
+            missing,
+            label,
+        })
+    }
 }
 
 pub struct ValueLabel {
@@ -563,31 +620,31 @@ pub struct ValueLabel {
 impl ValueLabel {
     /// Maximum number of value labels in a record.
     pub const MAX: u32 = u32::MAX / 8;
-}
 
-fn read_value_label_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
-    let offset = r.stream_position()?;
-    let n: u32 = endian.parse(read_bytes(r)?);
-    if n > ValueLabel::MAX {
-        return Err(Error::BadNumberOfValueLabels {
-            offset,
-            n,
-            max: ValueLabel::MAX,
-        });
-    }
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
+        let offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        if n > ValueLabel::MAX {
+            return Err(Error::BadNumberOfValueLabels {
+                offset,
+                n,
+                max: ValueLabel::MAX,
+            });
+        }
 
-    let mut labels = Vec::new();
-    for _ in 0..n {
-        let value: [u8; 8] = read_bytes(r)?;
-        let label_len: u8 = endian.parse(read_bytes(r)?);
-        let label_len = label_len as usize;
-        let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
+        let mut labels = Vec::new();
+        for _ in 0..n {
+            let value: [u8; 8] = read_bytes(r)?;
+            let label_len: u8 = endian.parse(read_bytes(r)?);
+            let label_len = label_len as usize;
+            let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
 
-        let mut label = read_vec(r, padded_len)?;
-        label.truncate(label_len);
-        labels.push((value, label));
+            let mut label = read_vec(r, padded_len)?;
+            label.truncate(label_len);
+            labels.push((value, label));
+        }
+        Ok(ValueLabel { offset, labels })
     }
-    Ok(ValueLabel { offset, labels })
 }
 
 pub struct VarIndexes {
@@ -601,32 +658,29 @@ pub struct VarIndexes {
 impl VarIndexes {
     /// Maximum number of variable indexes in a record.
     pub const MAX: u32 = u32::MAX / 8;
-}
 
-fn read_var_indexes_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
-    let offset = r.stream_position()?;
-    let n: u32 = endian.parse(read_bytes(r)?);
-    if n > VarIndexes::MAX {
-        return Err(Error::BadNumberOfVarIndexes {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
+        let offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        if n > VarIndexes::MAX {
+            return Err(Error::BadNumberOfVarIndexes {
+                offset,
+                n,
+                max: VarIndexes::MAX,
+            });
+        }
+        let mut var_indexes = Vec::with_capacity(n as usize);
+        for _ in 0..n {
+            var_indexes.push(endian.parse(read_bytes(r)?));
+        }
+
+        Ok(VarIndexes {
             offset,
-            n,
-            max: VarIndexes::MAX,
-        });
-    }
-    let mut var_indexes = Vec::with_capacity(n as usize);
-    for _ in 0..n {
-        var_indexes.push(endian.parse(read_bytes(r)?));
+            var_indexes,
+        })
     }
-
-    Ok(VarIndexes {
-        offset,
-        var_indexes,
-    })
 }
 
-pub const DOC_LINE_LEN: u32 = 80;
-pub const DOC_MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
-
 pub struct Document {
     /// Offset from the start of the file to the start of the record.
     pub pos: u64,
@@ -635,27 +689,38 @@ pub struct Document {
     pub lines: Vec<[u8; DOC_LINE_LEN as usize]>,
 }
 
-fn read_document_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
-    let offset = r.stream_position()?;
-    let n: u32 = endian.parse(read_bytes(r)?);
-    match n {
-        0..=DOC_MAX_LINES => {
-            let pos = r.stream_position()?;
-            let mut lines = Vec::with_capacity(n as usize);
-            for _ in 0..n {
-                let line: [u8; 80] = read_bytes(r)?;
-                lines.push(line);
+impl Document {
+    /// Length of a line in a document.  Document lines are fixed-length and
+    /// padded on the right with spaces.
+    pub const LINE_LEN: u32 = 80;
+
+    /// Maximum number of lines we will accept in a document.  This is simply
+    /// the maximum number that will fit in a 32-bit space.
+    pub const MAX_LINES: u32 = i32::MAX as u32 / DOC_LINE_LEN;
+
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
+        let offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        match n {
+            0..=DOC_MAX_LINES => {
+                let pos = r.stream_position()?;
+                let mut lines = Vec::with_capacity(n as usize);
+                for _ in 0..n {
+                    let line: [u8; 80] = read_bytes(r)?;
+                    lines.push(line);
+                }
+                Ok(Document { pos, lines })
             }
-            Ok(Document { pos, lines })
+            _ => Err(Error::BadDocumentLength {
+                offset,
+                n,
+                max: DOC_MAX_LINES,
+            }),
         }
-        _ => Err(Error::BadDocumentLength {
-            offset,
-            n,
-            max: DOC_MAX_LINES,
-        }),
     }
 }
 
+/*
 #[derive(FromPrimitive)]
 enum ExtensionType {
     /// Machine integer info.
@@ -695,6 +760,7 @@ enum ExtensionType {
     /// "Format properties in dataview table".
     Dataview = 24,
 }
+ */
 
 pub struct Extension {
     /// Offset from the start of the file to the start of the record.
@@ -741,28 +807,30 @@ fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
 }
  */
 
-fn read_extension_record<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Extension, Error> {
-    let subtype = endian.parse(read_bytes(r)?);
-    let offset = r.stream_position()?;
-    let size: u32 = endian.parse(read_bytes(r)?);
-    let count = endian.parse(read_bytes(r)?);
-    let Some(product) = size.checked_mul(count) else {
-        return Err(Error::ExtensionRecordTooLarge {
+impl Extension {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Extension, Error> {
+        let subtype = endian.parse(read_bytes(r)?);
+        let offset = r.stream_position()?;
+        let size: u32 = endian.parse(read_bytes(r)?);
+        let count = endian.parse(read_bytes(r)?);
+        let Some(product) = size.checked_mul(count) else {
+            return Err(Error::ExtensionRecordTooLarge {
+                offset,
+                subtype,
+                size,
+                count,
+            });
+        };
+        let offset = r.stream_position()?;
+        let data = read_vec(r, product as usize)?;
+        Ok(Extension {
             offset,
             subtype,
             size,
             count,
-        });
-    };
-    let offset = r.stream_position()?;
-    let data = read_vec(r, product as usize)?;
-    Ok(Extension {
-        offset,
-        subtype,
-        size,
-        count,
-        data,
-    })
+            data,
+        })
+    }
 }
 
 pub struct ZHeader {
@@ -779,18 +847,20 @@ pub struct ZHeader {
     pub ztrailer_len: u64,
 }
 
-fn read_zheader<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
-    let offset = r.stream_position()?;
-    let zheader_offset: u64 = endian.parse(read_bytes(r)?);
-    let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
-    let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
+impl ZHeader {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
+        let offset = r.stream_position()?;
+        let zheader_offset: u64 = endian.parse(read_bytes(r)?);
+        let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
+        let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
 
-    Ok(ZHeader {
-        offset,
-        zheader_offset,
-        ztrailer_offset,
-        ztrailer_len,
-    })
+        Ok(ZHeader {
+            offset,
+            zheader_offset,
+            ztrailer_offset,
+            ztrailer_len,
+        })
+    }
 }
 
 pub struct ZTrailer {
@@ -827,50 +897,52 @@ pub struct ZBlock {
     pub compressed_size: u32,
 }
 
-fn read_ztrailer<R: Read + Seek>(
-    r: &mut R,
-    endian: Endian,
-    ztrailer_ofs: u64,
-    ztrailer_len: u64,
-) -> Result<Option<ZTrailer>, Error> {
-    let start_offset = r.stream_position()?;
-    if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
-        return Ok(None);
-    }
-    let int_bias = endian.parse(read_bytes(r)?);
-    let zero = endian.parse(read_bytes(r)?);
-    let block_size = endian.parse(read_bytes(r)?);
-    let n_blocks: u32 = endian.parse(read_bytes(r)?);
-    let expected_n_blocks = (ztrailer_len - 24) / 24;
-    if n_blocks as u64 != expected_n_blocks {
-        return Err(Error::BadZlibTrailerNBlocks {
+impl ZTrailer {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        ztrailer_ofs: u64,
+        ztrailer_len: u64,
+    ) -> Result<Option<ZTrailer>, Error> {
+        let start_offset = r.stream_position()?;
+        if r.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
+            return Ok(None);
+        }
+        let int_bias = endian.parse(read_bytes(r)?);
+        let zero = endian.parse(read_bytes(r)?);
+        let block_size = endian.parse(read_bytes(r)?);
+        let n_blocks: u32 = endian.parse(read_bytes(r)?);
+        let expected_n_blocks = (ztrailer_len - 24) / 24;
+        if n_blocks as u64 != expected_n_blocks {
+            return Err(Error::BadZlibTrailerNBlocks {
+                offset: ztrailer_ofs,
+                n_blocks,
+                expected_n_blocks,
+                ztrailer_len,
+            });
+        }
+        let mut blocks = Vec::with_capacity(n_blocks as usize);
+        for _ in 0..n_blocks {
+            let uncompressed_ofs = endian.parse(read_bytes(r)?);
+            let compressed_ofs = endian.parse(read_bytes(r)?);
+            let uncompressed_size = endian.parse(read_bytes(r)?);
+            let compressed_size = endian.parse(read_bytes(r)?);
+            blocks.push(ZBlock {
+                uncompressed_ofs,
+                compressed_ofs,
+                uncompressed_size,
+                compressed_size,
+            });
+        }
+        r.seek(SeekFrom::Start(start_offset))?;
+        Ok(Some(ZTrailer {
             offset: ztrailer_ofs,
-            n_blocks,
-            expected_n_blocks,
-            ztrailer_len,
-        });
-    }
-    let mut blocks = Vec::with_capacity(n_blocks as usize);
-    for _ in 0..n_blocks {
-        let uncompressed_ofs = endian.parse(read_bytes(r)?);
-        let compressed_ofs = endian.parse(read_bytes(r)?);
-        let uncompressed_size = endian.parse(read_bytes(r)?);
-        let compressed_size = endian.parse(read_bytes(r)?);
-        blocks.push(ZBlock {
-            uncompressed_ofs,
-            compressed_ofs,
-            uncompressed_size,
-            compressed_size,
-        });
-    }
-    r.seek(SeekFrom::Start(start_offset))?;
-    Ok(Some(ZTrailer {
-        offset: ztrailer_ofs,
-        int_bias,
-        zero,
-        block_size,
-        blocks,
-    }))
+            int_bias,
+            zero,
+            block_size,
+            blocks,
+        }))
+    }
 }
 
 fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {