more cleanup
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 13 Jul 2025 00:17:53 +0000 (17:17 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 13 Jul 2025 00:38:32 +0000 (17:38 -0700)
rust/pspp/src/dictionary.rs
rust/pspp/src/format/mod.rs
rust/pspp/src/output/pivot/mod.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/raw/records.rs [new file with mode: 0644]

index c555b30abff17cff4d61cfa51a4ebd0e2dc050fe..e5e8f1df07ee38eff9b7f7356bc83dcc1f533f13 100644 (file)
@@ -23,7 +23,7 @@ use std::{
     collections::{BTreeMap, BTreeSet, HashMap, HashSet},
     fmt::{Debug, Display, Formatter, Result as FmtResult},
     hash::Hash,
-    ops::{Bound, RangeBounds, RangeInclusive},
+    ops::{Bound, Not, RangeBounds, RangeInclusive},
     str::FromStr,
 };
 
@@ -40,12 +40,50 @@ use crate::{
     identifier::{ByIdentifier, HasIdentifier, Identifier},
     output::pivot::{Axis3, Dimension, Footnote, Footnotes, Group, PivotTable, Value},
     settings::Show,
-    sys::raw::{CategoryLabels, RawString, VarType},
+    sys::raw::RawString,
 };
 
 /// An index within [Dictionary::variables].
 pub type DictIndex = usize;
 
+/// Variable type.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum VarType {
+    /// A numeric variable.
+    Numeric,
+
+    /// A string variable.
+    String,
+}
+
+impl Not for VarType {
+    type Output = Self;
+
+    fn not(self) -> Self::Output {
+        match self {
+            Self::Numeric => Self::String,
+            Self::String => Self::Numeric,
+        }
+    }
+}
+
+impl Not for &VarType {
+    type Output = VarType;
+
+    fn not(self) -> Self::Output {
+        !*self
+    }
+}
+
+impl Display for VarType {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        match self {
+            VarType::Numeric => write!(f, "numeric"),
+            VarType::String => write!(f, "string"),
+        }
+    }
+}
+
 /// [VarType], plus a width for [VarType::String].
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum VarWidth {
@@ -198,15 +236,12 @@ impl Display for VarWidthAdjective {
 }
 
 #[derive(Clone)]
-pub enum Datum<S = RawString> {
+pub enum Datum {
     Number(Option<f64>),
-    String(S),
+    String(RawString),
 }
 
-impl<S> Debug for Datum<S>
-where
-    S: Debug,
-{
+impl Debug for Datum {
     fn fmt(&self, f: &mut Formatter) -> FmtResult {
         match self {
             Datum::Number(Some(number)) => write!(f, "{number:?}"),
@@ -1662,6 +1697,12 @@ impl Measure {
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum CategoryLabels {
+    VarLabels,
+    CountedValues,
+}
+
 #[cfg(test)]
 mod test {
     use std::collections::HashSet;
index ff36586fe03f63d89cd0a32db4383d2ca7ffd691..f7abf5875730e0494c84b1247c77ff63c0c491e4 100644 (file)
@@ -28,8 +28,8 @@ use thiserror::Error as ThisError;
 use unicode_width::UnicodeWidthStr;
 
 use crate::{
-    dictionary::{Datum, VarWidth},
-    sys::raw::{self, RawString, VarType},
+    dictionary::{Datum, VarType, VarWidth},
+    sys::raw::{self, RawString},
 };
 
 mod display;
@@ -800,10 +800,10 @@ impl UncheckedFormat {
     }
 }
 
-impl TryFrom<raw::RawFormat> for UncheckedFormat {
+impl TryFrom<raw::records::RawFormat> for UncheckedFormat {
     type Error = Error;
 
-    fn try_from(raw: raw::RawFormat) -> Result<Self, Self::Error> {
+    fn try_from(raw: raw::records::RawFormat) -> Result<Self, Self::Error> {
         let raw = raw.0;
         let raw_format = (raw >> 16) as u16;
         let format = raw_format.try_into()?;
index 6c34441add64bfbbf938bd9dbc3c05e75fbe4ef4..a085ef49297d243dac6fb69d5bfcc56eefb6296e 100644 (file)
@@ -68,10 +68,9 @@ use thiserror::Error as ThisError;
 use tlo::parse_tlo;
 
 use crate::{
-    dictionary::{Datum, Variable},
+    dictionary::{Datum, VarType, Variable},
     format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
     settings::{Settings, Show},
-    sys::raw::VarType,
 };
 
 pub mod output;
index 3b50a63ce5aed6ed2ac104859c6fe1e927ed7627..914f03c10d50443e343290052415963fa78cf21d 100644 (file)
@@ -19,7 +19,8 @@ use std::{collections::BTreeMap, ops::Range};
 use crate::{
     calendar::date_time_to_pspp,
     dictionary::{
-        Datum, Dictionary, InvalidRole, MissingValues, MissingValuesError, MultipleResponseSet, MultipleResponseType, VarWidth, Variable, VariableSet
+        Datum, Dictionary, InvalidRole, MissingValues, MissingValuesError, MultipleResponseSet,
+        MultipleResponseType, VarWidth, Variable, VariableSet,
     },
     endian::Endian,
     format::{Error as FormatError, Format, UncheckedFormat},
@@ -29,12 +30,16 @@ use crate::{
     sys::{
         encoding::Error as EncodingError,
         raw::{
-            self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension,
-            FileAttributesRecord, FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName,
-            LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord,
-            MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawDatum, RawString,
-            RawWidth, ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributesRecord,
-            VariableRecord, VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
+            self,
+            records::{
+                Compression, DocumentRecord, EncodingRecord, Extension, FileAttributesRecord,
+                FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
+                LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord,
+                NumberOfCasesRecord, ProductInfoRecord, RawFormat, ValueLabel, ValueLabelRecord,
+                VarDisplayRecord, VariableAttributesRecord, VariableRecord, VariableSetRecord,
+                VeryLongStringsRecord, ZHeader, ZTrailer,
+            },
+            Cases, DecodedRecord, RawDatum, RawString, RawWidth,
         },
     },
 };
@@ -44,8 +49,6 @@ use indexmap::set::MutableValues;
 use itertools::Itertools;
 use thiserror::Error as ThisError;
 
-pub use crate::sys::raw::{CategoryLabels, Compression};
-
 #[derive(ThisError, Clone, Debug)]
 pub enum Error {
     #[error("Missing header record")]
@@ -1118,7 +1121,7 @@ impl Decoder {
 impl MultipleResponseSet {
     fn decode(
         dictionary: &Dictionary,
-        input: &raw::MultipleResponseSet<Identifier, String>,
+        input: &raw::records::MultipleResponseSet<Identifier, String>,
         warn: &mut impl FnMut(Error),
     ) -> Result<Self, Error> {
         let mr_set_name = input.name.clone();
@@ -1201,7 +1204,7 @@ fn fix_line_ends(s: &str) -> String {
 }
 
 fn decode_format(
-    raw: raw::RawFormat,
+    raw: RawFormat,
     width: VarWidth,
     mut warn: impl FnMut(Format, FormatError),
 ) -> Format {
@@ -1218,11 +1221,11 @@ fn decode_format(
 impl MultipleResponseType {
     fn decode(
         mr_set: &Identifier,
-        input: &raw::MultipleResponseType,
+        input: &raw::records::MultipleResponseType,
         min_width: VarWidth,
     ) -> Result<Self, Error> {
         match input {
-            raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
+            raw::records::MultipleResponseType::MultipleDichotomy { value, labels } => {
                 let value = match min_width {
                     VarWidth::Numeric => {
                         let string = String::from_utf8_lossy(&value.0);
@@ -1256,7 +1259,7 @@ impl MultipleResponseType {
                     labels: *labels,
                 })
             }
-            raw::MultipleResponseType::MultipleCategory => {
+            raw::records::MultipleResponseType::MultipleCategory => {
                 Ok(MultipleResponseType::MultipleCategory)
             }
         }
index a1da804a6f70597249eaa3ea07f0e222d4873b1b..f4e7034ca06434074ede9e3b16ff8f3df84a58ff 100644 (file)
 //! raw details.  Most readers will want to use higher-level interfaces.
 
 use crate::{
-    dictionary::{
-        Alignment, Attributes, Datum, Measure, MissingValueRange, MissingValues, VarWidth,
-    },
+    dictionary::{Datum, VarType, VarWidth},
     endian::{Endian, Parse, ToBytes},
     format::DisplayPlainF64,
     identifier::{Error as IdError, Identifier},
-    sys::encoding::{default_encoding, get_encoding, Error as EncodingError},
+    sys::{
+        encoding::{default_encoding, get_encoding, Error as EncodingError},
+        raw::records::{
+            Compression, DocumentRecord, EncodingRecord, Extension, FileAttributesRecord,
+            FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongNamesRecord,
+            LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord,
+            NumberOfCasesRecord, ProductInfoRecord, RawDocumentLine, RawFileAttributesRecord,
+            RawLongNamesRecord, RawProductInfoRecord, RawVariableAttributesRecord,
+            RawVariableSetRecord, RawVeryLongStringsRecord, ValueLabelRecord, VarDisplayRecord,
+            VariableAttributesRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord,
+            ZHeader, ZTrailer,
+        },
+    },
 };
 
 use encoding_rs::{mem::decode_latin1, Encoding};
 use flate2::read::ZlibDecoder;
 use itertools::Itertools;
-use num::Integer;
 use smallvec::SmallVec;
 use std::{
     borrow::{Borrow, Cow},
     cell::RefCell,
-    collections::{BTreeMap, VecDeque},
+    collections::VecDeque,
     fmt::{Debug, Display, Formatter, Result as FmtResult},
     io::{empty, Error as IoError, Read, Seek, SeekFrom},
     iter::repeat_n,
     mem::take,
     num::NonZeroU8,
-    ops::{Deref, Not, Range},
+    ops::Deref,
     str::from_utf8,
 };
 use thiserror::Error as ThisError;
 
+pub mod records;
+
 /// An error encountered reading raw system file records.
 ///
 /// Any error prevents reading further data from the system file.
@@ -708,177 +719,6 @@ fn default_decode(s: &[u8]) -> Cow<str> {
     from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
 }
 
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum Compression {
-    Simple,
-    ZLib,
-}
-
-#[derive(Clone)]
-pub struct HeaderRecord<S>
-where
-    S: Debug,
-{
-    /// Offset in file.
-    pub offsets: Range<u64>,
-
-    /// Magic number.
-    pub magic: Magic,
-
-    /// Eye-catcher string, product name, in the file's encoding.  Padded
-    /// on the right with spaces.
-    pub eye_catcher: S,
-
-    /// Layout code, normally either 2 or 3.
-    pub layout_code: u32,
-
-    /// Number of variable positions, or `None` if the value in the file is
-    /// questionably trustworthy.
-    pub nominal_case_size: Option<u32>,
-
-    /// Compression type, if any,
-    pub compression: Option<Compression>,
-
-    /// 1-based variable index of the weight variable, or `None` if the file is
-    /// unweighted.
-    pub weight_index: Option<u32>,
-
-    /// Claimed number of cases, if known.
-    pub n_cases: Option<u32>,
-
-    /// Compression bias, usually 100.0.
-    pub bias: f64,
-
-    /// `dd mmm yy` in the file's encoding.
-    pub creation_date: S,
-
-    /// `HH:MM:SS` in the file's encoding.
-    pub creation_time: S,
-
-    /// File label, in the file's encoding.  Padded on the right with spaces.
-    pub file_label: S,
-
-    /// Endianness of the data in the file header.
-    pub endian: Endian,
-}
-
-impl<S> HeaderRecord<S>
-where
-    S: Debug,
-{
-    fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
-    where
-        T: Debug,
-    {
-        writeln!(f, "{name:>17}: {:?}", value)
-    }
-}
-
-impl<S> Debug for HeaderRecord<S>
-where
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        writeln!(f, "File header record:")?;
-        self.debug_field(f, "Magic", self.magic)?;
-        self.debug_field(f, "Product name", &self.eye_catcher)?;
-        self.debug_field(f, "Layout code", self.layout_code)?;
-        self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
-        self.debug_field(f, "Compression", self.compression)?;
-        self.debug_field(f, "Weight index", self.weight_index)?;
-        self.debug_field(f, "Number of cases", self.n_cases)?;
-        self.debug_field(f, "Compression bias", self.bias)?;
-        self.debug_field(f, "Creation date", &self.creation_date)?;
-        self.debug_field(f, "Creation time", &self.creation_time)?;
-        self.debug_field(f, "File label", &self.file_label)?;
-        self.debug_field(f, "Endianness", self.endian)
-    }
-}
-
-impl HeaderRecord<RawString> {
-    fn read<R: Read + Seek>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error> {
-        let start = r.stream_position()?;
-
-        let magic: [u8; 4] = read_bytes(r)?;
-        let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
-
-        let eye_catcher = RawString(read_vec(r, 60)?);
-        let layout_code: [u8; 4] = read_bytes(r)?;
-        let endian = Endian::identify_u32(2, layout_code)
-            .or_else(|| Endian::identify_u32(2, layout_code))
-            .ok_or(Error::NotASystemFile)?;
-        let layout_code = endian.parse(layout_code);
-
-        let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
-        let nominal_case_size = (1..i32::MAX as u32 / 16)
-            .contains(&nominal_case_size)
-            .then_some(nominal_case_size);
-
-        let compression_code: u32 = endian.parse(read_bytes(r)?);
-        let compression = match (magic, compression_code) {
-            (Magic::Zsav, 2) => Some(Compression::ZLib),
-            (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
-            (_, 0) => None,
-            (_, 1) => Some(Compression::Simple),
-            (_, code) => return Err(Error::InvalidSavCompression(code)),
-        };
-
-        let weight_index: u32 = endian.parse(read_bytes(r)?);
-        let weight_index = (weight_index > 0).then_some(weight_index);
-
-        let n_cases: u32 = endian.parse(read_bytes(r)?);
-        let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
-
-        let bias: f64 = endian.parse(read_bytes(r)?);
-        if bias != 100.0 && bias != 0.0 {
-            warn(Warning::UnexpectedBias(bias));
-        }
-
-        let creation_date = RawString(read_vec(r, 9)?);
-        let creation_time = RawString(read_vec(r, 8)?);
-        let file_label = RawString(read_vec(r, 64)?);
-        let _: [u8; 3] = read_bytes(r)?;
-
-        Ok(HeaderRecord {
-            offsets: start..r.stream_position()?,
-            magic,
-            layout_code,
-            nominal_case_size,
-            compression,
-            weight_index,
-            n_cases,
-            bias,
-            creation_date,
-            creation_time,
-            eye_catcher,
-            file_label,
-            endian,
-        })
-    }
-
-    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
-        let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
-        let file_label = decoder.decode(&self.file_label).to_string();
-        let creation_date = decoder.decode(&self.creation_date).to_string();
-        let creation_time = decoder.decode(&self.creation_time).to_string();
-        DecodedRecord::Header(HeaderRecord {
-            eye_catcher,
-            weight_index: self.weight_index,
-            n_cases: self.n_cases,
-            file_label,
-            offsets: self.offsets.clone(),
-            magic: self.magic,
-            layout_code: self.layout_code,
-            nominal_case_size: self.nominal_case_size,
-            compression: self.compression,
-            bias: self.bias,
-            creation_date,
-            creation_time,
-            endian: self.endian,
-        })
-    }
-}
-
 /// An [Encoding] along with a function to report decoding errors.
 ///
 /// This is used by functions that decode raw records.
@@ -996,44 +836,6 @@ impl TryFrom<[u8; 4]> for Magic {
     }
 }
 
-/// Variable type.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum VarType {
-    /// A numeric variable.
-    Numeric,
-
-    /// A string variable.
-    String,
-}
-
-impl Not for VarType {
-    type Output = Self;
-
-    fn not(self) -> Self::Output {
-        match self {
-            Self::Numeric => Self::String,
-            Self::String => Self::Numeric,
-        }
-    }
-}
-
-impl Not for &VarType {
-    type Output = VarType;
-
-    fn not(self) -> Self::Output {
-        !*self
-    }
-}
-
-impl Display for VarType {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        match self {
-            VarType::Numeric => write!(f, "numeric"),
-            VarType::String => write!(f, "string"),
-        }
-    }
-}
-
 impl TryFrom<RawWidth> for VarType {
     type Error = ();
 
@@ -1662,150 +1464,6 @@ impl Iterator for Cases {
     }
 }
 
-/// [crate::format::Format] as represented in a system file.
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub struct RawFormat(
-    /// The most-significant 16 bits are the type, the next 8 bytes are the
-    /// width, and the least-significant 8 bits are the number of decimals.
-    pub u32,
-);
-
-impl Debug for RawFormat {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        let type_ = format_name(self.0 >> 16);
-        let w = (self.0 >> 8) & 0xff;
-        let d = self.0 & 0xff;
-        write!(f, "{:06x} ({type_}{w}.{d})", self.0)
-    }
-}
-
-fn format_name(type_: u32) -> Cow<'static, str> {
-    match type_ {
-        1 => "A",
-        2 => "AHEX",
-        3 => "COMMA",
-        4 => "DOLLAR",
-        5 => "F",
-        6 => "IB",
-        7 => "PIBHEX",
-        8 => "P",
-        9 => "PIB",
-        10 => "PK",
-        11 => "RB",
-        12 => "RBHEX",
-        15 => "Z",
-        16 => "N",
-        17 => "E",
-        20 => "DATE",
-        21 => "TIME",
-        22 => "DATETIME",
-        23 => "ADATE",
-        24 => "JDATE",
-        25 => "DTIME",
-        26 => "WKDAY",
-        27 => "MONTH",
-        28 => "MOYR",
-        29 => "QYR",
-        30 => "WKYR",
-        31 => "PCT",
-        32 => "DOT",
-        33 => "CCA",
-        34 => "CCB",
-        35 => "CCC",
-        36 => "CCD",
-        37 => "CCE",
-        38 => "EDATE",
-        39 => "SDATE",
-        40 => "MTIME",
-        41 => "YMDHMS",
-        _ => return format!("<unknown format {type_}>").into(),
-    }
-    .into()
-}
-
-impl MissingValues {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        offset: u64,
-        raw_width: RawWidth,
-        code: i32,
-        endian: Endian,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Self, Error> {
-        let (individual_values, has_range) = match code {
-            0 => return Ok(Self::default()),
-            1..=3 => (code as usize, false),
-            -2 => (0, true),
-            -3 => (1, true),
-            _ => return Err(Error::BadMissingValueCode { offset, code }),
-        };
-
-        let mut values = Vec::with_capacity(individual_values);
-        let range = if has_range {
-            let low = read_bytes::<8, _>(r)?;
-            let high = read_bytes::<8, _>(r)?;
-            Some((low, high))
-        } else {
-            None
-        };
-        for _ in 0..individual_values {
-            values.push(read_bytes::<8, _>(r)?);
-        }
-
-        match VarWidth::try_from(raw_width) {
-            Ok(VarWidth::Numeric) => {
-                let values = values
-                    .into_iter()
-                    .map(|v| Datum::Number(endian.parse(v)))
-                    .collect();
-
-                let range = range.map(|(low, high)| {
-                    MissingValueRange::new(endian.parse(low), endian.parse(high))
-                });
-                return Ok(Self::new(values, range).unwrap());
-            }
-            Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange),
-            Ok(VarWidth::String(width)) => {
-                let width = width.min(8) as usize;
-                let values = values
-                    .into_iter()
-                    .map(|value| Datum::String(RawString::from(&value[..width])))
-                    .collect();
-                return Ok(Self::new(values, None).unwrap());
-            }
-            Err(()) => warn(Warning::MissingValueContinuation(offset)),
-        }
-        Ok(Self::default())
-    }
-}
-
-#[derive(Clone)]
-pub struct VariableRecord<S>
-where
-    S: Debug,
-{
-    /// Range of offsets in file.
-    pub offsets: Range<u64>,
-
-    /// Variable width, in the range -1..=255.
-    pub width: RawWidth,
-
-    /// Variable name, padded on the right with spaces.
-    pub name: S,
-
-    /// Print format.
-    pub print_format: RawFormat,
-
-    /// Write format.
-    pub write_format: RawFormat,
-
-    /// Missing values.
-    pub missing_values: MissingValues,
-
-    /// Optional variable label.
-    pub label: Option<S>,
-}
-
 /// Width of a variable record.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum RawWidth {
@@ -1857,92 +1515,6 @@ impl Display for RawWidth {
     }
 }
 
-impl<S> Debug for VariableRecord<S>
-where
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        writeln!(f, "Width: {}", self.width,)?;
-        writeln!(f, "Print format: {:?}", self.print_format)?;
-        writeln!(f, "Write format: {:?}", self.write_format)?;
-        writeln!(f, "Name: {:?}", &self.name)?;
-        writeln!(f, "Variable label: {:?}", self.label)?;
-        writeln!(f, "Missing values: {:?}", self.missing_values)
-    }
-}
-
-impl VariableRecord<RawString> {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Record, Error> {
-        let start_offset = r.stream_position()?;
-        let width: i32 = endian.parse(read_bytes(r)?);
-        let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
-            start_offset,
-            width,
-        })?;
-        let code_offset = r.stream_position()?;
-        let has_variable_label: u32 = endian.parse(read_bytes(r)?);
-        let missing_value_code: i32 = endian.parse(read_bytes(r)?);
-        let print_format = RawFormat(endian.parse(read_bytes(r)?));
-        let write_format = RawFormat(endian.parse(read_bytes(r)?));
-        let name = RawString(read_vec(r, 8)?);
-
-        let label = match has_variable_label {
-            0 => None,
-            1 => {
-                let len: u32 = endian.parse(read_bytes(r)?);
-                let read_len = len.min(65535) as usize;
-                let label = RawString(read_vec(r, read_len)?);
-
-                let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
-                let _ = read_vec(r, padding_bytes as usize)?;
-
-                Some(label)
-            }
-            _ => {
-                return Err(Error::BadVariableLabelCode {
-                    start_offset,
-                    code_offset,
-                    code: has_variable_label,
-                });
-            }
-        };
-
-        let missing_values =
-            MissingValues::read(r, start_offset, width, missing_value_code, endian, warn)?;
-
-        let end_offset = r.stream_position()?;
-
-        Ok(Record::Variable(VariableRecord {
-            offsets: start_offset..end_offset,
-            width,
-            name,
-            print_format,
-            write_format,
-            missing_values,
-            label,
-        }))
-    }
-
-    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
-        DecodedRecord::Variable(VariableRecord {
-            offsets: self.offsets.clone(),
-            width: self.width,
-            name: decoder.decode(&self.name).to_string(),
-            print_format: self.print_format,
-            write_format: self.write_format,
-            missing_values: self.missing_values,
-            label: self
-                .label
-                .as_ref()
-                .map(|label| decoder.decode(label).to_string()),
-        })
-    }
-}
-
 /// 8 bytes that represent a number or a string (but that's all we know).
 ///
 /// Used when we don't know whether it's a number or a string, or the numerical
@@ -2225,1494 +1797,50 @@ impl Display for QuotedEncodedStr<'_> {
     }
 }
 
-#[derive(Clone, Debug)]
-pub struct ValueLabel<D, S>
-where
-    D: Debug,
-    S: Debug,
-{
-    pub datum: D,
-    pub label: S,
-}
-
-#[derive(Clone)]
-pub struct ValueLabelRecord<D, S>
-where
-    D: Debug,
-    S: Debug,
-{
-    /// Range of offsets in file.
-    pub offsets: Range<u64>,
-
-    /// The labels.
-    pub labels: Vec<ValueLabel<D, S>>,
-
-    /// The 1-based indexes of the variable indexes.
-    pub dict_indexes: Vec<u32>,
-
-    /// The types of the variables.
-    pub var_type: VarType,
-}
-
-impl<D, S> Debug for ValueLabelRecord<D, S>
-where
-    D: Debug,
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        writeln!(f, "labels: ")?;
-        for label in self.labels.iter() {
-            writeln!(f, "{label:?}")?;
-        }
-        write!(f, "apply to {} variables", self.var_type)?;
-        for dict_index in self.dict_indexes.iter() {
-            write!(f, " #{dict_index}")?;
-        }
-        Ok(())
+fn skip_bytes<R: Read>(r: &mut R, mut n: usize) -> Result<(), IoError> {
+    thread_local! {
+        static BUF: RefCell<[u8; 256]> = RefCell::new([0u8; 256]);
     }
-}
-
-impl<D, S> ValueLabelRecord<D, S>
-where
-    D: Debug,
-    S: Debug,
-{
-    /// Maximum number of value labels in a record.
-    pub const MAX_LABELS: u32 = u32::MAX / 8;
-
-    /// Maximum number of variable indexes in a record.
-    pub const MAX_INDEXES: u32 = u32::MAX / 8;
-}
-
-impl ValueLabelRecord<RawDatum, RawString> {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        var_types: &VarTypes,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Option<Record>, Error> {
-        let label_offset = r.stream_position()?;
-        let n: u32 = endian.parse(read_bytes(r)?);
-        if n > Self::MAX_LABELS {
-            return Err(Error::BadNumberOfValueLabels {
-                offset: label_offset,
-                n,
-                max: Self::MAX_LABELS,
-            });
-        }
-
-        let mut labels = Vec::new();
-        for _ in 0..n {
-            let value = UntypedDatum(read_bytes(r)?);
-            let label_len: u8 = endian.parse(read_bytes(r)?);
-            let label_len = label_len as usize;
-            let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
-
-            let mut label = read_vec(r, padded_len - 1)?;
-            label.truncate(label_len);
-            labels.push((value, RawString(label)));
-        }
-
-        let index_offset = r.stream_position()?;
-        let rec_type: u32 = endian.parse(read_bytes(r)?);
-        if rec_type != 4 {
-            return Err(Error::ExpectedVarIndexRecord {
-                offset: index_offset,
-                rec_type,
-            });
-        }
-
-        let n: u32 = endian.parse(read_bytes(r)?);
-        if n > Self::MAX_INDEXES {
-            return Err(Error::TooManyVarIndexes {
-                offset: index_offset,
-                n,
-                max: Self::MAX_INDEXES,
-            });
-        } else if n == 0 {
-            dbg!();
-            warn(Warning::NoVarIndexes {
-                offset: index_offset,
-            });
-            return Ok(None);
-        }
-
-        let index_offset = r.stream_position()?;
-        let mut dict_indexes = Vec::with_capacity(n as usize);
-        let mut invalid_indexes = Vec::new();
-        for _ in 0..n {
-            let index: u32 = endian.parse(read_bytes(r)?);
-            if var_types.is_valid_index(index as usize) {
-                dict_indexes.push(index);
-            } else {
-                invalid_indexes.push(index);
-            }
-        }
-        if !invalid_indexes.is_empty() {
-            warn(Warning::InvalidVarIndexes {
-                offset: index_offset,
-                max: var_types.n_values(),
-                invalid: invalid_indexes,
-            });
-        }
-
-        let Some(&first_index) = dict_indexes.first() else {
-            return Ok(None);
-        };
-        let var_type = VarType::from(var_types.types[first_index as usize - 1].unwrap());
-        let mut wrong_type_indexes = Vec::new();
-        dict_indexes.retain(|&index| {
-            if var_types.types[index as usize - 1].map(VarType::from) != Some(var_type) {
-                wrong_type_indexes.push(index);
-                false
-            } else {
-                true
-            }
-        });
-        if !wrong_type_indexes.is_empty() {
-            warn(Warning::MixedVarTypes {
-                offset: index_offset,
-                var_type,
-                wrong_types: wrong_type_indexes,
-            });
-        }
-
-        let labels = labels
-            .into_iter()
-            .map(|(value, label)| ValueLabel {
-                datum: RawDatum::from_raw(&value, var_type, endian),
-                label,
-            })
-            .collect();
-
-        let end_offset = r.stream_position()?;
-        Ok(Some(Record::ValueLabel(ValueLabelRecord {
-            offsets: label_offset..end_offset,
-            labels,
-            dict_indexes,
-            var_type,
-        })))
-    }
-
-    fn decode(self, decoder: &mut Decoder) -> ValueLabelRecord<RawDatum, String> {
-        let labels = self
-            .labels
-            .iter()
-            .map(
-                |ValueLabel {
-                     datum: value,
-                     label,
-                 }| ValueLabel {
-                    datum: value.clone(),
-                    label: decoder.decode(label).to_string(),
-                },
-            )
-            .collect();
-        ValueLabelRecord {
-            offsets: self.offsets.clone(),
-            labels,
-            dict_indexes: self.dict_indexes.clone(),
-            var_type: self.var_type,
+    BUF.with_borrow_mut(|buf| {
+        while n > 0 {
+            let chunk = n.min(buf.len());
+            r.read_exact(&mut buf[..n])?;
+            n -= chunk;
         }
-    }
+        Ok(())
+    })
 }
 
-#[derive(Clone, Debug)]
-pub struct DocumentRecord<S>
-where
-    S: Debug,
-{
-    pub offsets: Range<u64>,
-
-    /// The document, as an array of lines.  Raw lines are exactly 80 bytes long
-    /// and are right-padded with spaces without any new-line termination.
-    pub lines: Vec<S>,
-}
-
-pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
-
-/// Length of a line in a document.  Document lines are fixed-length and
-/// padded on the right with spaces.
-pub const DOC_LINE_LEN: usize = 80;
-
-impl DocumentRecord<RawDocumentLine> {
-    /// Maximum number of lines we will accept in a document.  This is simply
-    /// the maximum number that will fit in a 32-bit space.
-    pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
-
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
-        let start_offset = r.stream_position()?;
-        let n: u32 = endian.parse(read_bytes(r)?);
-        let n = n as usize;
-        if n > Self::MAX_LINES {
-            Err(Error::BadDocumentLength {
-                offset: start_offset,
-                n,
-                max: Self::MAX_LINES,
-            })
-        } else {
-            let mut lines = Vec::with_capacity(n);
-            for _ in 0..n {
-                lines.push(RawStrArray(read_bytes(r)?));
-            }
-            let end_offset = r.stream_position()?;
-            Ok(Record::Document(DocumentRecord {
-                offsets: start_offset..end_offset,
-                lines,
-            }))
+fn try_read_bytes_into<R: Read>(r: &mut R, buf: &mut [u8]) -> Result<bool, IoError> {
+    let n = r.read(buf)?;
+    if n > 0 {
+        if n < buf.len() {
+            r.read_exact(&mut buf[n..])?;
         }
-    }
-
-    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
-        DecodedRecord::Document(DocumentRecord {
-            offsets: self.offsets.clone(),
-            lines: self
-                .lines
-                .iter()
-                .map(|s| decoder.decode_slice(&s.0).to_string())
-                .collect(),
-        })
-    }
-}
-
-struct ExtensionRecord<'a> {
-    size: Option<u32>,
-    count: Option<u32>,
-    name: &'a str,
-}
-
-#[derive(Clone, Debug)]
-pub struct IntegerInfoRecord {
-    pub offsets: Range<u64>,
-    pub version: (i32, i32, i32),
-    pub machine_code: i32,
-    pub floating_point_rep: i32,
-    pub compression_code: i32,
-    pub endianness: i32,
-    pub character_code: i32,
-}
-
-static INTEGER_INFO_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(4),
-    count: Some(8),
-    name: "integer record",
-};
-
-impl IntegerInfoRecord {
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size(&INTEGER_INFO_RECORD)?;
-
-        let mut input = &ext.data[..];
-        let data: Vec<i32> = (0..8)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(Record::IntegerInfo(IntegerInfoRecord {
-            offsets: ext.offsets.clone(),
-            version: (data[0], data[1], data[2]),
-            machine_code: data[3],
-            floating_point_rep: data[4],
-            compression_code: data[5],
-            endianness: data[6],
-            character_code: data[7],
-        }))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct FloatInfoRecord {
-    pub sysmis: f64,
-    pub highest: f64,
-    pub lowest: f64,
-}
-
-static FLOAT_INFO_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(8),
-    count: Some(3),
-    name: "floating point record",
-};
-
-impl FloatInfoRecord {
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size(&FLOAT_INFO_RECORD)?;
-
-        let mut input = &ext.data[..];
-        let data: Vec<f64> = (0..3)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(Record::FloatInfo(FloatInfoRecord {
-            sysmis: data[0],
-            highest: data[1],
-            lowest: data[2],
-        }))
+        Ok(true)
+    } else {
+        Ok(false)
     }
 }
 
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum CategoryLabels {
-    VarLabels,
-    CountedValues,
-}
-
-#[derive(Clone, Debug)]
-pub enum MultipleResponseType {
-    MultipleDichotomy {
-        value: RawString,
-        labels: CategoryLabels,
-    },
-    MultipleCategory,
-}
-
-impl MultipleResponseType {
-    fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
-        let (mr_type, input) = match input.split_first() {
-            Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
-            Some((b'D', input)) => {
-                let (value, input) = parse_counted_string(input)?;
-                (
-                    MultipleResponseType::MultipleDichotomy {
-                        value,
-                        labels: CategoryLabels::VarLabels,
-                    },
-                    input,
-                )
-            }
-            Some((b'E', input)) => {
-                let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
-                    (CategoryLabels::CountedValues, rest)
-                } else if let Some(rest) = input.strip_prefix(b" 11 ") {
-                    (CategoryLabels::VarLabels, rest)
-                } else {
-                    return Err(Warning::InvalidMultipleDichotomyLabelType);
-                };
-                let (value, input) = parse_counted_string(input)?;
-                (
-                    MultipleResponseType::MultipleDichotomy { value, labels },
-                    input,
-                )
-            }
-            _ => return Err(Warning::InvalidMultipleResponseType),
-        };
-        Ok((mr_type, input))
+fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
+    let mut buf = [0; N];
+    match try_read_bytes_into(r, &mut buf)? {
+        true => Ok(Some(buf)),
+        false => Ok(None),
     }
 }
 
-#[derive(Clone, Debug)]
-pub struct MultipleResponseSet<I, S>
-where
-    I: Debug,
-    S: Debug,
-{
-    pub name: I,
-    pub label: S,
-    pub mr_type: MultipleResponseType,
-    pub short_names: Vec<I>,
+fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
+    let mut buf = [0; N];
+    r.read_exact(&mut buf)?;
+    Ok(buf)
 }
 
-impl MultipleResponseSet<RawString, RawString> {
-    fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
-        let Some(equals) = input.iter().position(|&b| b == b'=') else {
-            return Err(Warning::MultipleResponseSyntaxError("missing `=`"));
-        };
-        let (name, input) = input.split_at(equals);
-        let input = input.strip_prefix(b"=").unwrap();
-        let (mr_type, input) = MultipleResponseType::parse(input)?;
-        let Some(input) = input.strip_prefix(b" ") else {
-            return Err(Warning::MultipleResponseSyntaxError(
-                "missing space after multiple response type",
-            ));
-        };
-        let (label, mut input) = parse_counted_string(input)?;
-        let mut vars = Vec::new();
-        while input.first() != Some(&b'\n') {
-            match input.split_first() {
-                Some((b' ', rest)) => {
-                    let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
-                        return Err(Warning::MultipleResponseSyntaxError(
-                            "missing variable name delimiter",
-                        ));
-                    };
-                    let (var, rest) = rest.split_at(length);
-                    if !var.is_empty() {
-                        vars.push(var.into());
-                    }
-                    input = rest;
-                }
-                _ => {
-                    return Err(Warning::MultipleResponseSyntaxError(
-                        "missing space preceding variable name",
-                    ));
-                }
-            }
-        }
-        while input.first() == Some(&b'\n') {
-            input = &input[1..];
-        }
-        Ok((
-            MultipleResponseSet {
-                name: name.into(),
-                label,
-                mr_type,
-                short_names: vars,
-            },
-            input,
-        ))
-    }
-
-    fn decode(
-        &self,
-        decoder: &mut Decoder,
-    ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
-        let mut short_names = Vec::with_capacity(self.short_names.len());
-        for short_name in self.short_names.iter() {
-            if let Some(short_name) = decoder
-                .decode_identifier(short_name)
-                .map_err(Warning::InvalidMrSetName)
-                .issue_warning(&mut decoder.warn)
-            {
-                short_names.push(short_name);
-            }
-        }
-        Ok(MultipleResponseSet {
-            name: decoder
-                .decode_identifier(&self.name)
-                .map_err(Warning::InvalidMrSetVariableName)?,
-            label: decoder.decode(&self.label).to_string(),
-            mr_type: self.mr_type.clone(),
-            short_names,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
-where
-    I: Debug,
-    S: Debug;
-
-static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(1),
-    count: None,
-    name: "multiple response set record",
-};
-
-impl MultipleResponseRecord<RawString, RawString> {
-    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
-        ext.check_size(&MULTIPLE_RESPONSE_RECORD)?;
-
-        let mut input = &ext.data[..];
-        let mut sets = Vec::new();
-        loop {
-            while let Some(suffix) = input.strip_prefix(b"\n") {
-                input = suffix;
-            }
-            if input.is_empty() {
-                break;
-            }
-            let (set, rest) = MultipleResponseSet::parse(input)?;
-            sets.push(set);
-            input = rest;
-        }
-        Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
-    }
-}
-
-impl MultipleResponseRecord<RawString, RawString> {
-    fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
-        let mut sets = Vec::new();
-        for set in self.0.iter() {
-            if let Some(set) = set.decode(decoder).issue_warning(&mut decoder.warn) {
-                sets.push(set);
-            }
-        }
-        DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
-    }
-}
-
-fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
-    let Some(space) = input.iter().position(|&b| b == b' ') else {
-        return Err(Warning::CountedStringMissingSpace);
-    };
-    let Ok(length) = from_utf8(&input[..space]) else {
-        return Err(Warning::CountedStringInvalidUTF8);
-    };
-    let Ok(length): Result<usize, _> = length.parse() else {
-        return Err(Warning::CountedStringInvalidLength(length.into()));
-    };
-
-    let Some((string, rest)) = input[space + 1..].split_at_checked(length) else {
-        return Err(Warning::CountedStringTooLong(length));
-    };
-    Ok((string.into(), rest))
-}
-
-impl Measure {
-    fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
-        match source {
-            0 => Ok(None),
-            1 => Ok(Some(Measure::Nominal)),
-            2 => Ok(Some(Measure::Ordinal)),
-            3 => Ok(Some(Measure::Scale)),
-            _ => Err(Warning::InvalidMeasurement(source)),
-        }
-    }
-}
-
-impl Alignment {
-    fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
-        match source {
-            0 => Ok(Some(Alignment::Left)),
-            1 => Ok(Some(Alignment::Right)),
-            2 => Ok(Some(Alignment::Center)),
-            _ => Err(Warning::InvalidAlignment(source)),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VarDisplay {
-    pub measure: Option<Measure>,
-    pub width: Option<u32>,
-    pub alignment: Option<Alignment>,
-}
-
-#[derive(Clone, Debug)]
-pub struct VarDisplayRecord(pub Vec<VarDisplay>);
-
-impl VarDisplayRecord {
-    fn parse(
-        ext: &Extension,
-        var_types: &VarTypes,
-        endian: Endian,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Record, Warning> {
-        if ext.size != 4 {
-            return Err(Warning::BadRecordSize {
-                offset: ext.offsets.start,
-                record: String::from("variable display record"),
-                size: ext.size,
-                expected_size: 4,
-            });
-        }
-
-        let n_vars = var_types.n_vars();
-        let has_width = if ext.count as usize == 3 * n_vars {
-            true
-        } else if ext.count as usize == 2 * n_vars {
-            false
-        } else {
-            return Err(Warning::InvalidVariableDisplayCount {
-                count: ext.count as usize,
-                first: 2 * n_vars,
-                second: 3 * n_vars,
-            });
-        };
-
-        let mut var_displays = Vec::new();
-        let mut input = &ext.data[..];
-        for _ in 0..n_vars {
-            let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
-                .issue_warning(warn)
-                .flatten();
-            let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
-            let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
-                .issue_warning(warn)
-                .flatten();
-            var_displays.push(VarDisplay {
-                measure,
-                width,
-                alignment,
-            });
-        }
-        Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringMissingValues<N>
-where
-    N: Debug,
-{
-    /// Variable name.
-    pub var_name: N,
-
-    /// Missing values.
-    pub missing_values: Vec<RawStrArray<8>>,
-}
-
-impl LongStringMissingValues<RawString> {
-    fn decode(
-        &self,
-        decoder: &mut Decoder,
-    ) -> Result<LongStringMissingValues<Identifier>, IdError> {
-        Ok(LongStringMissingValues {
-            var_name: decoder.decode_identifier(&self.var_name)?,
-            missing_values: self.missing_values.clone(),
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringMissingValueRecord<N>(pub Vec<LongStringMissingValues<N>>)
-where
-    N: Debug;
-
-static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(1),
-    count: None,
-    name: "long string missing values record",
-};
-
-impl LongStringMissingValueRecord<RawString> {
-    fn parse(
-        ext: &Extension,
-        endian: Endian,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Record, Warning> {
-        ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?;
-
-        let mut input = &ext.data[..];
-        let mut missing_value_set = Vec::new();
-        while !input.is_empty() {
-            let var_name = read_string(&mut input, endian)?;
-            dbg!(&var_name);
-            let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
-            let value_len: u32 = endian.parse(read_bytes(&mut input)?);
-            if value_len != 8 {
-                let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
-                warn(Warning::BadLongMissingValueLength {
-                    record_offset: ext.offsets.start,
-                    offset,
-                    value_len,
-                });
-                read_vec(
-                    &mut input,
-                    dbg!(value_len as usize * n_missing_values as usize),
-                )?;
-                continue;
-            }
-            let mut missing_values = Vec::new();
-            for i in 0..n_missing_values {
-                if i > 0 {
-                    // Tolerate files written by old, buggy versions of PSPP
-                    // where we believed that the value_length was repeated
-                    // before each missing value.
-                    let mut peek = input;
-                    let number: u32 = endian.parse(read_bytes(&mut peek)?);
-                    if number == 8 {
-                        input = peek;
-                    }
-                }
-
-                let value: [u8; 8] = read_bytes(&mut input)?;
-                missing_values.push(RawStrArray(value));
-            }
-            missing_value_set.push(LongStringMissingValues {
-                var_name,
-                missing_values,
-            });
-        }
-        Ok(Record::LongStringMissingValues(
-            LongStringMissingValueRecord(missing_value_set),
-        ))
-    }
-}
-
-impl LongStringMissingValueRecord<RawString> {
-    pub fn decode(self, decoder: &mut Decoder) -> LongStringMissingValueRecord<Identifier> {
-        let mut mvs = Vec::with_capacity(self.0.len());
-        for mv in self.0.iter() {
-            if let Some(mv) = mv
-                .decode(decoder)
-                .map_err(Warning::InvalidLongStringMissingValueVariableName)
-                .issue_warning(&mut decoder.warn)
-            {
-                mvs.push(mv);
-            }
-        }
-        LongStringMissingValueRecord(mvs)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct EncodingRecord(pub String);
-
-static ENCODING_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(1),
-    count: None,
-    name: "encoding record",
-};
-
-impl EncodingRecord {
-    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
-        ext.check_size(&ENCODING_RECORD)?;
-
-        Ok(Record::Encoding(EncodingRecord(
-            String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
-                offset: ext.offsets.start,
-            })?,
-        )))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct NumberOfCasesRecord {
-    /// Always observed as 1.
-    pub one: u64,
-
-    /// Number of cases.
-    pub n_cases: u64,
-}
-
-static NUMBER_OF_CASES_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(8),
-    count: Some(2),
-    name: "extended number of cases record",
-};
-
-impl NumberOfCasesRecord {
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size(&NUMBER_OF_CASES_RECORD)?;
-
-        let mut input = &ext.data[..];
-        let one = endian.parse(read_bytes(&mut input)?);
-        let n_cases = endian.parse(read_bytes(&mut input)?);
-
-        Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct RawVariableSetRecord(TextRecord);
-
-impl RawVariableSetRecord {
-    fn parse(extension: Extension) -> Result<Record, Warning> {
-        Ok(Record::VariableSets(Self(TextRecord::parse(
-            extension,
-            "variable sets record",
-        )?)))
-    }
-    fn decode(self, decoder: &mut Decoder) -> VariableSetRecord {
-        let mut sets = Vec::new();
-        let input = decoder.decode(&self.0.text);
-        for line in input.lines() {
-            if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&mut decoder.warn) {
-                sets.push(set)
-            }
-        }
-        VariableSetRecord {
-            offsets: self.0.offsets,
-            sets,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct RawProductInfoRecord(TextRecord);
-
-impl RawProductInfoRecord {
-    fn parse(extension: Extension) -> Result<Record, Warning> {
-        Ok(Record::ProductInfo(Self(TextRecord::parse(
-            extension,
-            "product info record",
-        )?)))
-    }
-    fn decode(self, decoder: &mut Decoder) -> ProductInfoRecord {
-        ProductInfoRecord(decoder.decode(&self.0.text).into())
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct RawLongNamesRecord(TextRecord);
-
-impl RawLongNamesRecord {
-    fn parse(extension: Extension) -> Result<Record, Warning> {
-        Ok(Record::LongNames(Self(TextRecord::parse(
-            extension,
-            "long names record",
-        )?)))
-    }
-    fn decode(self, decoder: &mut Decoder) -> LongNamesRecord {
-        let input = decoder.decode(&self.0.text);
-        let mut names = Vec::new();
-        for pair in input.split('\t').filter(|s| !s.is_empty()) {
-            if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&mut decoder.warn)
-            {
-                names.push(long_name);
-            }
-        }
-        LongNamesRecord(names)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct TextRecord {
-    pub offsets: Range<u64>,
-
-    /// The text content of the record.
-    pub text: RawString,
-}
-
-impl TextRecord {
-    fn parse(extension: Extension, name: &str) -> Result<TextRecord, Warning> {
-        extension.check_size(&ExtensionRecord {
-            size: Some(1),
-            count: None,
-            name,
-        })?;
-        Ok(Self {
-            offsets: extension.offsets,
-            text: extension.data.into(),
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VeryLongString {
-    pub short_name: Identifier,
-    pub length: u16,
-}
-
-impl VeryLongString {
-    fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
-        let Some((short_name, length)) = input.split_once('=') else {
-            return Err(Warning::VeryLongStringMissingDelimiter(input.into()));
-        };
-        let short_name = decoder
-            .new_identifier(short_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidLongStringName)?;
-        let length = length
-            .parse()
-            .map_err(|_| Warning::VeryLongStringInvalidLength(input.into()))?;
-        Ok(VeryLongString { short_name, length })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct RawVeryLongStringsRecord(TextRecord);
-
-#[derive(Clone, Debug)]
-pub struct VeryLongStringsRecord(pub Vec<VeryLongString>);
-
-impl RawVeryLongStringsRecord {
-    fn parse(extension: Extension) -> Result<Record, Warning> {
-        Ok(Record::VeryLongStrings(Self(TextRecord::parse(
-            extension,
-            "very long strings record",
-        )?)))
-    }
-    fn decode(self, decoder: &mut Decoder) -> VeryLongStringsRecord {
-        let input = decoder.decode(&self.0.text);
-        let mut very_long_strings = Vec::new();
-        for tuple in input
-            .split('\0')
-            .map(|s| s.trim_start_matches('\t'))
-            .filter(|s| !s.is_empty())
-        {
-            if let Some(vls) =
-                VeryLongString::parse(decoder, tuple).issue_warning(&mut decoder.warn)
-            {
-                very_long_strings.push(vls)
-            }
-        }
-        VeryLongStringsRecord(very_long_strings)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Attribute {
-    pub name: Identifier,
-    pub values: Vec<String>,
-}
-
-impl Attribute {
-    fn parse<'a>(decoder: &mut Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
-        let Some((name, mut input)) = input.split_once('(') else {
-            return Err(Warning::AttributeMissingLParen(input.into()));
-        };
-        let name = decoder
-            .new_identifier(name)
-            .map_err(Warning::InvalidAttributeName)?;
-        let mut values = Vec::new();
-        loop {
-            let Some((value, rest)) = input.split_once('\n') else {
-                return Err(Warning::AttributeMissingValue {
-                    name: name.clone(),
-                    index: values.len(),
-                });
-            };
-            if let Some(stripped) = value
-                .strip_prefix('\'')
-                .and_then(|value| value.strip_suffix('\''))
-            {
-                values.push(stripped.into());
-            } else {
-                decoder.warn(Warning::AttributeMissingQuotes {
-                    name: name.clone(),
-                    index: values.len(),
-                });
-                values.push(value.into());
-            }
-            if let Some(rest) = rest.strip_prefix(')') {
-                let attribute = Attribute { name, values };
-                return Ok((attribute, rest));
-            };
-            input = rest;
-        }
-    }
-}
-
-impl Attributes {
-    fn parse<'a>(
-        decoder: &mut Decoder,
-        mut input: &'a str,
-        sentinel: Option<char>,
-    ) -> Result<(Attributes, &'a str, Vec<Identifier>), Warning> {
-        let mut attributes = BTreeMap::new();
-        let mut duplicates = Vec::new();
-        let rest = loop {
-            match input.chars().next() {
-                None => break input,
-                c if c == sentinel => break &input[1..],
-                _ => {
-                    let (attribute, rest) = Attribute::parse(decoder, input)?;
-                    if attributes.contains_key(&attribute.name) {
-                        duplicates.push(attribute.name.clone());
-                    }
-                    attributes.insert(attribute.name, attribute.values);
-                    input = rest;
-                }
-            }
-        };
-        Ok((Attributes(attributes), rest, duplicates))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct RawFileAttributesRecord(TextRecord);
-
-#[derive(Clone, Debug, Default)]
-pub struct FileAttributesRecord(pub Attributes);
-
-impl RawFileAttributesRecord {
-    fn parse(extension: Extension) -> Result<Record, Warning> {
-        Ok(Record::FileAttributes(Self(TextRecord::parse(
-            extension,
-            "file attributes record",
-        )?)))
-    }
-    fn decode(self, decoder: &mut Decoder) -> FileAttributesRecord {
-        let input = decoder.decode(&self.0.text);
-        match Attributes::parse(decoder, &input, None).issue_warning(&mut decoder.warn) {
-            Some((set, rest, duplicates)) => {
-                if !duplicates.is_empty() {
-                    decoder.warn(Warning::DuplicateFileAttributes {
-                        attributes: duplicates,
-                    });
-                }
-                if !rest.is_empty() {
-                    decoder.warn(dbg!(Warning::TBD));
-                }
-                FileAttributesRecord(set)
-            }
-            None => FileAttributesRecord::default(),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VarAttributes {
-    pub long_var_name: Identifier,
-    pub attributes: Attributes,
-}
-
-impl VarAttributes {
-    fn parse<'a>(
-        decoder: &mut Decoder,
-        input: &'a str,
-    ) -> Result<(VarAttributes, &'a str), Warning> {
-        let Some((long_var_name, rest)) = input.split_once(':') else {
-            return Err(dbg!(Warning::TBD));
-        };
-        let long_var_name = decoder
-            .new_identifier(long_var_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidAttributeVariableName)?;
-        let (attributes, rest, duplicates) = Attributes::parse(decoder, rest, Some('/'))?;
-        if !duplicates.is_empty() {
-            decoder.warn(Warning::DuplicateVariableAttributes {
-                variable: long_var_name.clone(),
-                attributes: duplicates,
-            });
-        }
-        let var_attribute = VarAttributes {
-            long_var_name,
-            attributes,
-        };
-        Ok((var_attribute, rest))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct RawVariableAttributesRecord(TextRecord);
-
-#[derive(Clone, Debug)]
-pub struct VariableAttributesRecord(pub Vec<VarAttributes>);
-
-impl RawVariableAttributesRecord {
-    fn parse(extension: Extension) -> Result<Record, Warning> {
-        Ok(Record::VariableAttributes(Self(TextRecord::parse(
-            extension,
-            "variable attributes record",
-        )?)))
-    }
-    fn decode(self, decoder: &mut Decoder) -> VariableAttributesRecord {
-        let decoded = decoder.decode(&self.0.text);
-        let mut input = decoded.as_ref();
-        let mut var_attribute_sets = Vec::new();
-        while !input.is_empty() {
-            let Some((var_attribute, rest)) =
-                VarAttributes::parse(decoder, input).issue_warning(&mut decoder.warn)
-            else {
-                break;
-            };
-            var_attribute_sets.push(var_attribute);
-            input = rest;
-        }
-        VariableAttributesRecord(var_attribute_sets)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongName {
-    pub short_name: Identifier,
-    pub long_name: Identifier,
-}
-
-impl LongName {
-    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
-        let Some((short_name, long_name)) = input.split_once('=') else {
-            return Err(dbg!(Warning::LongNameMissingEquals));
-        };
-        let short_name = decoder
-            .new_identifier(short_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidShortName)?;
-        let long_name = decoder
-            .new_identifier(long_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidLongName)?;
-        Ok(LongName {
-            short_name,
-            long_name,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongNamesRecord(pub Vec<LongName>);
-
-#[derive(Clone, Debug)]
-pub struct ProductInfoRecord(pub String);
-
-#[derive(Clone, Debug)]
-pub struct VariableSet {
-    pub name: String,
-    pub variable_names: Vec<Identifier>,
-}
-
-impl VariableSet {
-    fn parse(input: &str, decoder: &mut Decoder) -> Result<Self, Warning> {
-        let (name, input) = input
-            .split_once('=')
-            .ok_or(Warning::VariableSetMissingEquals)?;
-        let mut vars = Vec::new();
-        for var in input.split_ascii_whitespace() {
-            if let Some(identifier) = decoder
-                .new_identifier(var)
-                .and_then(Identifier::must_be_ordinary)
-                .map_err(Warning::InvalidVariableSetName)
-                .issue_warning(&mut decoder.warn)
-            {
-                vars.push(identifier);
-            }
-        }
-        Ok(VariableSet {
-            name: name.to_string(),
-            variable_names: vars,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VariableSetRecord {
-    pub offsets: Range<u64>,
-    pub sets: Vec<VariableSet>,
-}
-
-trait IssueWarning<T> {
-    fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option<T>;
-}
-impl<T> IssueWarning<T> for Result<T, Warning> {
-    fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option<T> {
-        match self {
-            Ok(result) => Some(result),
-            Err(error) => {
-                warn(error);
-                None
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Extension {
-    pub offsets: Range<u64>,
-
-    /// Record subtype.
-    pub subtype: u32,
-
-    /// Size of each data element.
-    pub size: u32,
-
-    /// Number of data elements.
-    pub count: u32,
-
-    /// `size * count` bytes of data.
-    pub data: Vec<u8>,
-}
-
-impl Extension {
-    fn check_size(&self, expected: &ExtensionRecord) -> Result<(), Warning> {
-        match expected.size {
-            Some(expected_size) if self.size != expected_size => {
-                return Err(Warning::BadRecordSize {
-                    offset: self.offsets.start,
-                    record: expected.name.into(),
-                    size: self.size,
-                    expected_size,
-                });
-            }
-            _ => (),
-        }
-        match expected.count {
-            Some(expected_count) if self.count != expected_count => {
-                return Err(Warning::BadRecordCount {
-                    offset: self.offsets.start,
-                    record: expected.name.into(),
-                    count: self.count,
-                    expected_count,
-                });
-            }
-            _ => (),
-        }
-        Ok(())
-    }
-
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        var_types: &VarTypes,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Option<Record>, Error> {
-        let subtype = endian.parse(read_bytes(r)?);
-        let header_offset = r.stream_position()?;
-        let size: u32 = endian.parse(read_bytes(r)?);
-        let count = endian.parse(read_bytes(r)?);
-        let Some(product) = size.checked_mul(count) else {
-            return Err(Error::ExtensionRecordTooLarge {
-                offset: header_offset,
-                subtype,
-                size,
-                count,
-            });
-        };
-        let start_offset = r.stream_position()?;
-        let data = read_vec(r, product as usize)?;
-        let end_offset = start_offset + product as u64;
-        let extension = Extension {
-            offsets: start_offset..end_offset,
-            subtype,
-            size,
-            count,
-            data,
-        };
-        let result = match subtype {
-            3 => IntegerInfoRecord::parse(&extension, endian),
-            4 => FloatInfoRecord::parse(&extension, endian),
-            11 => VarDisplayRecord::parse(&extension, var_types, endian, warn),
-            7 | 19 => MultipleResponseRecord::parse(&extension, endian),
-            21 => LongStringValueLabelRecord::parse(&extension, endian),
-            22 => LongStringMissingValueRecord::parse(&extension, endian, warn),
-            20 => EncodingRecord::parse(&extension, endian),
-            16 => NumberOfCasesRecord::parse(&extension, endian),
-            5 => RawVariableSetRecord::parse(extension),
-            10 => RawProductInfoRecord::parse(extension),
-            13 => RawLongNamesRecord::parse(extension),
-            14 => RawVeryLongStringsRecord::parse(extension),
-            17 => RawFileAttributesRecord::parse(extension),
-            18 => RawVariableAttributesRecord::parse(extension),
-            _ => Ok(Record::OtherExtension(extension)),
-        };
-        match result {
-            Ok(result) => Ok(Some(result)),
-            Err(error) => {
-                warn(error);
-                Ok(None)
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ZHeader {
-    /// File offset to the start of the record.
-    pub offset: u64,
-
-    /// File offset to the ZLIB data header.
-    pub zheader_offset: u64,
-
-    /// File offset to the ZLIB trailer.
-    pub ztrailer_offset: u64,
-
-    /// Length of the ZLIB trailer in bytes.
-    pub ztrailer_len: u64,
-}
-
-impl ZHeader {
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
-        let offset = r.stream_position()?;
-        let zheader_offset: u64 = endian.parse(read_bytes(r)?);
-        let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
-        let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
-
-        if zheader_offset != offset {
-            return Err(Error::UnexpectedZHeaderOffset {
-                actual: zheader_offset,
-                expected: offset,
-            });
-        }
-
-        if ztrailer_offset < offset {
-            return Err(Error::ImpossibleZTrailerOffset(ztrailer_offset));
-        }
-
-        if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
-            return Err(Error::InvalidZTrailerLength(ztrailer_len));
-        }
-
-        Ok(ZHeader {
-            offset,
-            zheader_offset,
-            ztrailer_offset,
-            ztrailer_len,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ZTrailer {
-    /// File offset to the start of the record.
-    pub offset: u64,
-
-    /// Compression bias as a negative integer, e.g. -100.
-    pub int_bias: i64,
-
-    /// Always observed as zero.
-    pub zero: u64,
-
-    /// Uncompressed size of each block, except possibly the last.  Only
-    /// `0x3ff000` has been observed so far.
-    pub block_size: u32,
-
-    /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
-    pub blocks: Vec<ZBlock>,
-}
-
-#[derive(Clone, Debug)]
-pub struct ZBlock {
-    /// Offset of block of data if simple compression were used.
-    pub uncompressed_ofs: u64,
-
-    /// Actual offset within the file of the compressed data block.
-    pub compressed_ofs: u64,
-
-    /// The number of bytes in this data block after decompression.  This is
-    /// `block_size` in every data block but the last, which may be smaller.
-    pub uncompressed_size: u32,
-
-    /// The number of bytes in this data block, as stored compressed in this
-    /// file.
-    pub compressed_size: u32,
-}
-
-impl ZBlock {
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
-        Ok(ZBlock {
-            uncompressed_ofs: endian.parse(read_bytes(r)?),
-            compressed_ofs: endian.parse(read_bytes(r)?),
-            uncompressed_size: endian.parse(read_bytes(r)?),
-            compressed_size: endian.parse(read_bytes(r)?),
-        })
-    }
-}
-
-impl ZTrailer {
-    fn read<R: Read + Seek>(
-        reader: &mut R,
-        endian: Endian,
-        bias: f64,
-        zheader: &ZHeader,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<Option<ZTrailer>, Error> {
-        let start_offset = reader.stream_position()?;
-        if reader
-            .seek(SeekFrom::Start(zheader.ztrailer_offset))
-            .is_err()
-        {
-            return Ok(None);
-        }
-        let int_bias = endian.parse(read_bytes(reader)?);
-        if int_bias as f64 != -bias {
-            return Err(Error::WrongZlibTrailerBias {
-                actual: int_bias,
-                expected: -bias,
-            });
-        }
-        let zero = endian.parse(read_bytes(reader)?);
-        if zero != 0 {
-            return Err(Error::WrongZlibTrailerZero(zero));
-        }
-        let block_size = endian.parse(read_bytes(reader)?);
-        if block_size != 0x3ff000 {
-            return Err(Error::WrongZlibTrailerBlockSize(block_size));
-        }
-        let n_blocks: u32 = endian.parse(read_bytes(reader)?);
-        let expected_n_blocks = (zheader.ztrailer_len - 24) / 24;
-        if n_blocks as u64 != expected_n_blocks {
-            return Err(Error::BadZlibTrailerNBlocks {
-                offset: zheader.ztrailer_offset,
-                n_blocks,
-                expected_n_blocks,
-                ztrailer_len: zheader.ztrailer_len,
-            });
-        }
-        let blocks = (0..n_blocks)
-            .map(|_| ZBlock::read(reader, endian))
-            .collect::<Result<Vec<_>, _>>()?;
-
-        let mut expected_uncmp_ofs = zheader.zheader_offset;
-        let mut expected_cmp_ofs = zheader.zheader_offset + 24;
-        for (index, block) in blocks.iter().enumerate() {
-            if block.uncompressed_ofs != expected_uncmp_ofs {
-                return Err(Error::ZlibTrailerBlockWrongUncmpOfs {
-                    index,
-                    actual: block.uncompressed_ofs,
-                    expected: expected_cmp_ofs,
-                });
-            }
-            if block.compressed_ofs != expected_cmp_ofs {
-                return Err(Error::ZlibTrailerBlockWrongCmpOfs {
-                    index,
-                    actual: block.compressed_ofs,
-                    expected: expected_cmp_ofs,
-                });
-            }
-            if index < blocks.len() - 1 {
-                if block.uncompressed_size != block_size {
-                    warn(Warning::ZlibTrailerBlockWrongSize {
-                        index,
-                        actual: block.uncompressed_size,
-                        expected: block_size,
-                    });
-                }
-            } else {
-                if block.uncompressed_size > block_size {
-                    warn(Warning::ZlibTrailerBlockTooBig {
-                        index,
-                        actual: block.uncompressed_size,
-                        max_expected: block_size,
-                    });
-                }
-            }
-            // http://www.zlib.net/zlib_tech.html says that the maximum
-            // expansion from compression, with worst-case parameters, is 13.5%
-            // plus 11 bytes.  This code checks for an expansion of more than
-            // 14.3% plus 11 bytes.
-            if block.compressed_size > block.uncompressed_size + block.uncompressed_size / 7 + 11 {
-                return Err(Error::ZlibExpansion {
-                    index,
-                    compressed_size: block.compressed_size,
-                    uncompressed_size: block.uncompressed_size,
-                });
-            }
-
-            expected_cmp_ofs += block.compressed_size as u64;
-            expected_uncmp_ofs += block.uncompressed_size as u64;
-        }
-
-        if expected_cmp_ofs != zheader.ztrailer_offset {
-            return Err(Error::ZlibTrailerOffsetInconsistency {
-                descriptors: expected_cmp_ofs,
-                zheader: zheader.ztrailer_offset,
-            });
-        }
-
-        reader.seek(SeekFrom::Start(start_offset))?;
-        Ok(Some(ZTrailer {
-            offset: zheader.ztrailer_offset,
-            int_bias,
-            zero,
-            block_size,
-            blocks,
-        }))
-    }
-}
-
-fn skip_bytes<R: Read>(r: &mut R, mut n: usize) -> Result<(), IoError> {
-    thread_local! {
-        static BUF: RefCell<[u8; 256]> = RefCell::new([0u8; 256]);
-    }
-    BUF.with_borrow_mut(|buf| {
-        while n > 0 {
-            let chunk = n.min(buf.len());
-            r.read_exact(&mut buf[..n])?;
-            n -= chunk;
-        }
-        Ok(())
-    })
-}
-
-fn try_read_bytes_into<R: Read>(r: &mut R, buf: &mut [u8]) -> Result<bool, IoError> {
-    let n = r.read(buf)?;
-    if n > 0 {
-        if n < buf.len() {
-            r.read_exact(&mut buf[n..])?;
-        }
-        Ok(true)
-    } else {
-        Ok(false)
-    }
-}
-
-fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
-    let mut buf = [0; N];
-    match try_read_bytes_into(r, &mut buf)? {
-        true => Ok(Some(buf)),
-        false => Ok(None),
-    }
-}
-
-fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
-    let mut buf = [0; N];
-    r.read_exact(&mut buf)?;
-    Ok(buf)
-}
-
-fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
-    let mut vec = vec![0; n];
-    r.read_exact(&mut vec)?;
-    Ok(vec)
+fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
+    let mut vec = vec![0; n];
+    r.read_exact(&mut vec)?;
+    Ok(vec)
 }
 
 fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
@@ -3720,94 +1848,6 @@ fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError>
     Ok(read_vec(r, length as usize)?.into())
 }
 
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabels<N, S>
-where
-    S: Debug,
-{
-    pub var_name: N,
-    pub width: u32,
-
-    /// `(value, label)` pairs, where each value is `width` bytes.
-    pub labels: Vec<(RawString, S)>,
-}
-
-impl LongStringValueLabels<RawString, RawString> {
-    fn decode(
-        &self,
-        decoder: &mut Decoder,
-    ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
-        let var_name = decoder.decode(&self.var_name);
-        let var_name = Identifier::from_encoding(var_name.trim_end(), decoder.encoding)
-            .map_err(Warning::InvalidLongStringValueLabelName)?;
-
-        let mut labels = Vec::with_capacity(self.labels.len());
-        for (value, label) in self.labels.iter() {
-            let label = decoder.decode(label).to_string();
-            labels.push((value.clone(), label));
-        }
-
-        Ok(LongStringValueLabels {
-            var_name,
-            width: self.width,
-            labels,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
-where
-    N: Debug,
-    S: Debug;
-
-static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord {
-    size: Some(1),
-    count: None,
-    name: "long string value labels record",
-};
-
-impl LongStringValueLabelRecord<RawString, RawString> {
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size(&LONG_STRING_VALUE_LABEL_RECORD)?;
-
-        let mut input = &ext.data[..];
-        let mut label_set = Vec::new();
-        while !input.is_empty() {
-            let var_name = read_string(&mut input, endian)?;
-            let width: u32 = endian.parse(read_bytes(&mut input)?);
-            let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
-            let mut labels = Vec::new();
-            for _ in 0..n_labels {
-                let value = read_string(&mut input, endian)?;
-                let label = read_string(&mut input, endian)?;
-                labels.push((value, label));
-            }
-            label_set.push(LongStringValueLabels {
-                var_name,
-                width,
-                labels,
-            })
-        }
-        Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
-            label_set,
-        )))
-    }
-}
-
-impl LongStringValueLabelRecord<RawString, RawString> {
-    fn decode(self, decoder: &mut Decoder) -> LongStringValueLabelRecord<Identifier, String> {
-        let mut labels = Vec::with_capacity(self.0.len());
-        for label in &self.0 {
-            match label.decode(decoder) {
-                Ok(set) => labels.push(set),
-                Err(error) => decoder.warn(error),
-            }
-        }
-        LongStringValueLabelRecord(labels)
-    }
-}
-
 #[derive(Default)]
 pub struct VarTypes {
     pub types: Vec<Option<VarWidth>>,
diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs
new file mode 100644 (file)
index 0000000..00cbc9c
--- /dev/null
@@ -0,0 +1,1952 @@
+//! Raw records.
+//!
+//! Separated into a submodule just to reduce clutter.
+
+use std::{
+    borrow::Cow,
+    collections::BTreeMap,
+    fmt::{Debug, Formatter},
+    io::{Read, Seek, SeekFrom},
+    ops::Range,
+    str::from_utf8,
+};
+
+use crate::{
+    dictionary::{
+        Alignment, Attributes, CategoryLabels, Datum, Measure, MissingValueRange, MissingValues,
+        VarType, VarWidth,
+    },
+    endian::{Endian, Parse},
+    identifier::{Error as IdError, Identifier},
+    sys::raw::{
+        read_bytes, read_string, read_vec, DecodedRecord, Decoder, Error, Magic, RawDatum,
+        RawStrArray, RawString, RawWidth, Record, VarTypes, Warning,
+    },
+};
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Compression {
+    Simple,
+    ZLib,
+}
+
+#[derive(Clone)]
+pub struct HeaderRecord<S>
+where
+    S: Debug,
+{
+    /// Offset in file.
+    pub offsets: Range<u64>,
+
+    /// Magic number.
+    pub magic: Magic,
+
+    /// Eye-catcher string, product name, in the file's encoding.  Padded
+    /// on the right with spaces.
+    pub eye_catcher: S,
+
+    /// Layout code, normally either 2 or 3.
+    pub layout_code: u32,
+
+    /// Number of variable positions, or `None` if the value in the file is
+    /// questionably trustworthy.
+    pub nominal_case_size: Option<u32>,
+
+    /// Compression type, if any,
+    pub compression: Option<Compression>,
+
+    /// 1-based variable index of the weight variable, or `None` if the file is
+    /// unweighted.
+    pub weight_index: Option<u32>,
+
+    /// Claimed number of cases, if known.
+    pub n_cases: Option<u32>,
+
+    /// Compression bias, usually 100.0.
+    pub bias: f64,
+
+    /// `dd mmm yy` in the file's encoding.
+    pub creation_date: S,
+
+    /// `HH:MM:SS` in the file's encoding.
+    pub creation_time: S,
+
+    /// File label, in the file's encoding.  Padded on the right with spaces.
+    pub file_label: S,
+
+    /// Endianness of the data in the file header.
+    pub endian: Endian,
+}
+
+impl<S> HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> std::fmt::Result
+    where
+        T: Debug,
+    {
+        writeln!(f, "{name:>17}: {:?}", value)
+    }
+}
+
+impl<S> Debug for HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        writeln!(f, "File header record:")?;
+        self.debug_field(f, "Magic", self.magic)?;
+        self.debug_field(f, "Product name", &self.eye_catcher)?;
+        self.debug_field(f, "Layout code", self.layout_code)?;
+        self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
+        self.debug_field(f, "Compression", self.compression)?;
+        self.debug_field(f, "Weight index", self.weight_index)?;
+        self.debug_field(f, "Number of cases", self.n_cases)?;
+        self.debug_field(f, "Compression bias", self.bias)?;
+        self.debug_field(f, "Creation date", &self.creation_date)?;
+        self.debug_field(f, "Creation time", &self.creation_time)?;
+        self.debug_field(f, "File label", &self.file_label)?;
+        self.debug_field(f, "Endianness", self.endian)
+    }
+}
+
+impl HeaderRecord<RawString> {
+    pub fn read<R: Read + Seek>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error> {
+        let start = r.stream_position()?;
+
+        let magic: [u8; 4] = read_bytes(r)?;
+        let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
+
+        let eye_catcher = RawString(read_vec(r, 60)?);
+        let layout_code: [u8; 4] = read_bytes(r)?;
+        let endian = Endian::identify_u32(2, layout_code)
+            .or_else(|| Endian::identify_u32(2, layout_code))
+            .ok_or(Error::NotASystemFile)?;
+        let layout_code = endian.parse(layout_code);
+
+        let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
+        let nominal_case_size = (1..i32::MAX as u32 / 16)
+            .contains(&nominal_case_size)
+            .then_some(nominal_case_size);
+
+        let compression_code: u32 = endian.parse(read_bytes(r)?);
+        let compression = match (magic, compression_code) {
+            (Magic::Zsav, 2) => Some(Compression::ZLib),
+            (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
+            (_, 0) => None,
+            (_, 1) => Some(Compression::Simple),
+            (_, code) => return Err(Error::InvalidSavCompression(code)),
+        };
+
+        let weight_index: u32 = endian.parse(read_bytes(r)?);
+        let weight_index = (weight_index > 0).then_some(weight_index);
+
+        let n_cases: u32 = endian.parse(read_bytes(r)?);
+        let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+
+        let bias: f64 = endian.parse(read_bytes(r)?);
+        if bias != 100.0 && bias != 0.0 {
+            warn(Warning::UnexpectedBias(bias));
+        }
+
+        let creation_date = RawString(read_vec(r, 9)?);
+        let creation_time = RawString(read_vec(r, 8)?);
+        let file_label = RawString(read_vec(r, 64)?);
+        let _: [u8; 3] = read_bytes(r)?;
+
+        Ok(HeaderRecord {
+            offsets: start..r.stream_position()?,
+            magic,
+            layout_code,
+            nominal_case_size,
+            compression,
+            weight_index,
+            n_cases,
+            bias,
+            creation_date,
+            creation_time,
+            eye_catcher,
+            file_label,
+            endian,
+        })
+    }
+
+    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+        let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
+        let file_label = decoder.decode(&self.file_label).to_string();
+        let creation_date = decoder.decode(&self.creation_date).to_string();
+        let creation_time = decoder.decode(&self.creation_time).to_string();
+        DecodedRecord::Header(HeaderRecord {
+            eye_catcher,
+            weight_index: self.weight_index,
+            n_cases: self.n_cases,
+            file_label,
+            offsets: self.offsets.clone(),
+            magic: self.magic,
+            layout_code: self.layout_code,
+            nominal_case_size: self.nominal_case_size,
+            compression: self.compression,
+            bias: self.bias,
+            creation_date,
+            creation_time,
+            endian: self.endian,
+        })
+    }
+}
+
+/// [crate::format::Format] as represented in a system file.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct RawFormat(
+    /// The most-significant 16 bits are the type, the next 8 bytes are the
+    /// width, and the least-significant 8 bits are the number of decimals.
+    pub u32,
+);
+
+impl Debug for RawFormat {
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        let type_ = format_name(self.0 >> 16);
+        let w = (self.0 >> 8) & 0xff;
+        let d = self.0 & 0xff;
+        write!(f, "{:06x} ({type_}{w}.{d})", self.0)
+    }
+}
+
+fn format_name(type_: u32) -> Cow<'static, str> {
+    match type_ {
+        1 => "A",
+        2 => "AHEX",
+        3 => "COMMA",
+        4 => "DOLLAR",
+        5 => "F",
+        6 => "IB",
+        7 => "PIBHEX",
+        8 => "P",
+        9 => "PIB",
+        10 => "PK",
+        11 => "RB",
+        12 => "RBHEX",
+        15 => "Z",
+        16 => "N",
+        17 => "E",
+        20 => "DATE",
+        21 => "TIME",
+        22 => "DATETIME",
+        23 => "ADATE",
+        24 => "JDATE",
+        25 => "DTIME",
+        26 => "WKDAY",
+        27 => "MONTH",
+        28 => "MOYR",
+        29 => "QYR",
+        30 => "WKYR",
+        31 => "PCT",
+        32 => "DOT",
+        33 => "CCA",
+        34 => "CCB",
+        35 => "CCC",
+        36 => "CCD",
+        37 => "CCE",
+        38 => "EDATE",
+        39 => "SDATE",
+        40 => "MTIME",
+        41 => "YMDHMS",
+        _ => return format!("<unknown format {type_}>").into(),
+    }
+    .into()
+}
+
+impl MissingValues {
+    pub fn read<R: Read + Seek>(
+        r: &mut R,
+        offset: u64,
+        raw_width: RawWidth,
+        code: i32,
+        endian: Endian,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Self, Error> {
+        let (individual_values, has_range) = match code {
+            0 => return Ok(Self::default()),
+            1..=3 => (code as usize, false),
+            -2 => (0, true),
+            -3 => (1, true),
+            _ => return Err(Error::BadMissingValueCode { offset, code }),
+        };
+
+        let mut values = Vec::with_capacity(individual_values);
+        let range = if has_range {
+            let low = read_bytes::<8, _>(r)?;
+            let high = read_bytes::<8, _>(r)?;
+            Some((low, high))
+        } else {
+            None
+        };
+        for _ in 0..individual_values {
+            values.push(read_bytes::<8, _>(r)?);
+        }
+
+        match VarWidth::try_from(raw_width) {
+            Ok(VarWidth::Numeric) => {
+                let values = values
+                    .into_iter()
+                    .map(|v| Datum::Number(endian.parse(v)))
+                    .collect();
+
+                let range = range.map(|(low, high)| {
+                    MissingValueRange::new(endian.parse(low), endian.parse(high))
+                });
+                return Ok(Self::new(values, range).unwrap());
+            }
+            Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange),
+            Ok(VarWidth::String(width)) => {
+                let width = width.min(8) as usize;
+                let values = values
+                    .into_iter()
+                    .map(|value| Datum::String(RawString::from(&value[..width])))
+                    .collect();
+                return Ok(Self::new(values, None).unwrap());
+            }
+            Err(()) => warn(Warning::MissingValueContinuation(offset)),
+        }
+        Ok(Self::default())
+    }
+}
+
+#[derive(Clone)]
+pub struct VariableRecord<S>
+where
+    S: Debug,
+{
+    /// Range of offsets in file.
+    pub offsets: Range<u64>,
+
+    /// Variable width, in the range -1..=255.
+    pub width: RawWidth,
+
+    /// Variable name, padded on the right with spaces.
+    pub name: S,
+
+    /// Print format.
+    pub print_format: RawFormat,
+
+    /// Write format.
+    pub write_format: RawFormat,
+
+    /// Missing values.
+    pub missing_values: MissingValues,
+
+    /// Optional variable label.
+    pub label: Option<S>,
+}
+
+impl<S> Debug for VariableRecord<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        writeln!(f, "Width: {}", self.width,)?;
+        writeln!(f, "Print format: {:?}", self.print_format)?;
+        writeln!(f, "Write format: {:?}", self.write_format)?;
+        writeln!(f, "Name: {:?}", &self.name)?;
+        writeln!(f, "Variable label: {:?}", self.label)?;
+        writeln!(f, "Missing values: {:?}", self.missing_values)
+    }
+}
+
+impl VariableRecord<RawString> {
+    pub fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Record, Error> {
+        let start_offset = r.stream_position()?;
+        let width: i32 = endian.parse(read_bytes(r)?);
+        let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
+            start_offset,
+            width,
+        })?;
+        let code_offset = r.stream_position()?;
+        let has_variable_label: u32 = endian.parse(read_bytes(r)?);
+        let missing_value_code: i32 = endian.parse(read_bytes(r)?);
+        let print_format = RawFormat(endian.parse(read_bytes(r)?));
+        let write_format = RawFormat(endian.parse(read_bytes(r)?));
+        let name = RawString(read_vec(r, 8)?);
+
+        let label = match has_variable_label {
+            0 => None,
+            1 => {
+                let len: u32 = endian.parse(read_bytes(r)?);
+                let read_len = len.min(65535) as usize;
+                let label = RawString(read_vec(r, read_len)?);
+
+                let padding_bytes = len.next_multiple_of(4) - len;
+                let _ = read_vec(r, padding_bytes as usize)?;
+
+                Some(label)
+            }
+            _ => {
+                return Err(Error::BadVariableLabelCode {
+                    start_offset,
+                    code_offset,
+                    code: has_variable_label,
+                });
+            }
+        };
+
+        let missing_values =
+            MissingValues::read(r, start_offset, width, missing_value_code, endian, warn)?;
+
+        let end_offset = r.stream_position()?;
+
+        Ok(Record::Variable(VariableRecord {
+            offsets: start_offset..end_offset,
+            width,
+            name,
+            print_format,
+            write_format,
+            missing_values,
+            label,
+        }))
+    }
+
+    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+        DecodedRecord::Variable(VariableRecord {
+            offsets: self.offsets.clone(),
+            width: self.width,
+            name: decoder.decode(&self.name).to_string(),
+            print_format: self.print_format,
+            write_format: self.write_format,
+            missing_values: self.missing_values,
+            label: self
+                .label
+                .as_ref()
+                .map(|label| decoder.decode(label).to_string()),
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ValueLabel<D, S>
+where
+    D: Debug,
+    S: Debug,
+{
+    pub datum: D,
+    pub label: S,
+}
+
+#[derive(Clone)]
+pub struct ValueLabelRecord<D, S>
+where
+    D: Debug,
+    S: Debug,
+{
+    /// Range of offsets in file.
+    pub offsets: Range<u64>,
+
+    /// The labels.
+    pub labels: Vec<ValueLabel<D, S>>,
+
+    /// The 1-based indexes of the variable indexes.
+    pub dict_indexes: Vec<u32>,
+
+    /// The types of the variables.
+    pub var_type: VarType,
+}
+
+impl<D, S> Debug for ValueLabelRecord<D, S>
+where
+    D: Debug,
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        writeln!(f, "labels: ")?;
+        for label in self.labels.iter() {
+            writeln!(f, "{label:?}")?;
+        }
+        write!(f, "apply to {} variables", self.var_type)?;
+        for dict_index in self.dict_indexes.iter() {
+            write!(f, " #{dict_index}")?;
+        }
+        Ok(())
+    }
+}
+
+impl<D, S> ValueLabelRecord<D, S>
+where
+    D: Debug,
+    S: Debug,
+{
+    /// Maximum number of value labels in a record.
+    pub const MAX_LABELS: u32 = u32::MAX / 8;
+
+    /// Maximum number of variable indexes in a record.
+    pub const MAX_INDEXES: u32 = u32::MAX / 8;
+}
+
+impl ValueLabelRecord<RawDatum, RawString> {
+    pub fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        var_types: &VarTypes,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Option<Record>, Error> {
+        let label_offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        if n > Self::MAX_LABELS {
+            return Err(Error::BadNumberOfValueLabels {
+                offset: label_offset,
+                n,
+                max: Self::MAX_LABELS,
+            });
+        }
+
+        let mut labels = Vec::new();
+        for _ in 0..n {
+            let value = super::UntypedDatum(read_bytes(r)?);
+            let label_len: u8 = endian.parse(read_bytes(r)?);
+            let label_len = label_len as usize;
+            let padded_len = (label_len + 1).next_multiple_of(8);
+
+            let mut label = read_vec(r, padded_len - 1)?;
+            label.truncate(label_len);
+            labels.push((value, RawString(label)));
+        }
+
+        let index_offset = r.stream_position()?;
+        let rec_type: u32 = endian.parse(read_bytes(r)?);
+        if rec_type != 4 {
+            return Err(Error::ExpectedVarIndexRecord {
+                offset: index_offset,
+                rec_type,
+            });
+        }
+
+        let n: u32 = endian.parse(read_bytes(r)?);
+        if n > Self::MAX_INDEXES {
+            return Err(Error::TooManyVarIndexes {
+                offset: index_offset,
+                n,
+                max: Self::MAX_INDEXES,
+            });
+        } else if n == 0 {
+            dbg!();
+            warn(Warning::NoVarIndexes {
+                offset: index_offset,
+            });
+            return Ok(None);
+        }
+
+        let index_offset = r.stream_position()?;
+        let mut dict_indexes = Vec::with_capacity(n as usize);
+        let mut invalid_indexes = Vec::new();
+        for _ in 0..n {
+            let index: u32 = endian.parse(read_bytes(r)?);
+            if var_types.is_valid_index(index as usize) {
+                dict_indexes.push(index);
+            } else {
+                invalid_indexes.push(index);
+            }
+        }
+        if !invalid_indexes.is_empty() {
+            warn(Warning::InvalidVarIndexes {
+                offset: index_offset,
+                max: var_types.n_values(),
+                invalid: invalid_indexes,
+            });
+        }
+
+        let Some(&first_index) = dict_indexes.first() else {
+            return Ok(None);
+        };
+        let var_type = VarType::from(var_types.types[first_index as usize - 1].unwrap());
+        let mut wrong_type_indexes = Vec::new();
+        dict_indexes.retain(|&index| {
+            if var_types.types[index as usize - 1].map(VarType::from) != Some(var_type) {
+                wrong_type_indexes.push(index);
+                false
+            } else {
+                true
+            }
+        });
+        if !wrong_type_indexes.is_empty() {
+            warn(Warning::MixedVarTypes {
+                offset: index_offset,
+                var_type,
+                wrong_types: wrong_type_indexes,
+            });
+        }
+
+        let labels = labels
+            .into_iter()
+            .map(|(value, label)| ValueLabel {
+                datum: RawDatum::from_raw(&value, var_type, endian),
+                label,
+            })
+            .collect();
+
+        let end_offset = r.stream_position()?;
+        Ok(Some(Record::ValueLabel(ValueLabelRecord {
+            offsets: label_offset..end_offset,
+            labels,
+            dict_indexes,
+            var_type,
+        })))
+    }
+
+    pub fn decode(self, decoder: &mut Decoder) -> ValueLabelRecord<RawDatum, String> {
+        let labels = self
+            .labels
+            .iter()
+            .map(
+                |ValueLabel {
+                     datum: value,
+                     label,
+                 }| ValueLabel {
+                    datum: value.clone(),
+                    label: decoder.decode(label).to_string(),
+                },
+            )
+            .collect();
+        ValueLabelRecord {
+            offsets: self.offsets.clone(),
+            labels,
+            dict_indexes: self.dict_indexes.clone(),
+            var_type: self.var_type,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct DocumentRecord<S>
+where
+    S: Debug,
+{
+    pub offsets: Range<u64>,
+
+    /// The document, as an array of lines.  Raw lines are exactly 80 bytes long
+    /// and are right-padded with spaces without any new-line termination.
+    pub lines: Vec<S>,
+}
+
+pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
+
+/// Length of a line in a document.  Document lines are fixed-length and
+/// padded on the right with spaces.
+pub const DOC_LINE_LEN: usize = 80;
+
+impl DocumentRecord<RawDocumentLine> {
+    /// Maximum number of lines we will accept in a document.  This is simply
+    /// the maximum number that will fit in a 32-bit space.
+    pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
+
+    pub fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+        let start_offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        let n = n as usize;
+        if n > Self::MAX_LINES {
+            Err(Error::BadDocumentLength {
+                offset: start_offset,
+                n,
+                max: Self::MAX_LINES,
+            })
+        } else {
+            let mut lines = Vec::with_capacity(n);
+            for _ in 0..n {
+                lines.push(RawStrArray(read_bytes(r)?));
+            }
+            let end_offset = r.stream_position()?;
+            Ok(Record::Document(DocumentRecord {
+                offsets: start_offset..end_offset,
+                lines,
+            }))
+        }
+    }
+
+    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+        DecodedRecord::Document(DocumentRecord {
+            offsets: self.offsets.clone(),
+            lines: self
+                .lines
+                .iter()
+                .map(|s| decoder.decode_slice(&s.0).to_string())
+                .collect(),
+        })
+    }
+}
+
+pub struct ExtensionRecord<'a> {
+    pub size: Option<u32>,
+    pub count: Option<u32>,
+    pub name: &'a str,
+}
+
+#[derive(Clone, Debug)]
+pub struct IntegerInfoRecord {
+    pub offsets: Range<u64>,
+    pub version: (i32, i32, i32),
+    pub machine_code: i32,
+    pub floating_point_rep: i32,
+    pub compression_code: i32,
+    pub endianness: i32,
+    pub character_code: i32,
+}
+
+static INTEGER_INFO_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(4),
+    count: Some(8),
+    name: "integer record",
+};
+
+impl IntegerInfoRecord {
+    pub fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size(&INTEGER_INFO_RECORD)?;
+
+        let mut input = &ext.data[..];
+        let data: Vec<i32> = (0..8)
+            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
+            .collect();
+        Ok(Record::IntegerInfo(IntegerInfoRecord {
+            offsets: ext.offsets.clone(),
+            version: (data[0], data[1], data[2]),
+            machine_code: data[3],
+            floating_point_rep: data[4],
+            compression_code: data[5],
+            endianness: data[6],
+            character_code: data[7],
+        }))
+    }
+}
+
+static FLOAT_INFO_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(8),
+    count: Some(3),
+    name: "floating point record",
+};
+
+impl FloatInfoRecord {
+    pub fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size(&FLOAT_INFO_RECORD)?;
+
+        let mut input = &ext.data[..];
+        let data: Vec<f64> = (0..3)
+            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
+            .collect();
+        Ok(Record::FloatInfo(FloatInfoRecord {
+            sysmis: data[0],
+            highest: data[1],
+            lowest: data[2],
+        }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct FloatInfoRecord {
+    pub sysmis: f64,
+    pub highest: f64,
+    pub lowest: f64,
+}
+
+#[derive(Clone, Debug)]
+pub struct RawLongNamesRecord(TextRecord);
+
+impl RawLongNamesRecord {
+    pub fn parse(extension: Extension) -> Result<Record, Warning> {
+        Ok(Record::LongNames(Self(TextRecord::parse(
+            extension,
+            "long names record",
+        )?)))
+    }
+    pub fn decode(self, decoder: &mut Decoder) -> LongNamesRecord {
+        let input = decoder.decode(&self.0.text);
+        let mut names = Vec::new();
+        for pair in input.split('\t').filter(|s| !s.is_empty()) {
+            if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&mut decoder.warn)
+            {
+                names.push(long_name);
+            }
+        }
+        LongNamesRecord(names)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct TextRecord {
+    pub offsets: Range<u64>,
+
+    /// The text content of the record.
+    pub text: RawString,
+}
+
+impl TextRecord {
+    pub fn parse(extension: Extension, name: &str) -> Result<TextRecord, Warning> {
+        extension.check_size(&ExtensionRecord {
+            size: Some(1),
+            count: None,
+            name,
+        })?;
+        Ok(Self {
+            offsets: extension.offsets,
+            text: extension.data.into(),
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongString {
+    pub short_name: Identifier,
+    pub length: u16,
+}
+
+impl VeryLongString {
+    fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
+        let Some((short_name, length)) = input.split_once('=') else {
+            return Err(Warning::VeryLongStringMissingDelimiter(input.into()));
+        };
+        let short_name = decoder
+            .new_identifier(short_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidLongStringName)?;
+        let length = length
+            .parse()
+            .map_err(|_| Warning::VeryLongStringInvalidLength(input.into()))?;
+        Ok(VeryLongString { short_name, length })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawVeryLongStringsRecord(TextRecord);
+
+#[derive(Clone, Debug)]
+pub struct VeryLongStringsRecord(pub Vec<VeryLongString>);
+
+impl RawVeryLongStringsRecord {
+    pub fn parse(extension: Extension) -> Result<Record, Warning> {
+        Ok(Record::VeryLongStrings(Self(TextRecord::parse(
+            extension,
+            "very long strings record",
+        )?)))
+    }
+    pub fn decode(self, decoder: &mut Decoder) -> VeryLongStringsRecord {
+        let input = decoder.decode(&self.0.text);
+        let mut very_long_strings = Vec::new();
+        for tuple in input
+            .split('\0')
+            .map(|s| s.trim_start_matches('\t'))
+            .filter(|s| !s.is_empty())
+        {
+            if let Some(vls) =
+                VeryLongString::parse(decoder, tuple).issue_warning(&mut decoder.warn)
+            {
+                very_long_strings.push(vls)
+            }
+        }
+        VeryLongStringsRecord(very_long_strings)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum MultipleResponseType {
+    MultipleDichotomy {
+        value: RawString,
+        labels: CategoryLabels,
+    },
+    MultipleCategory,
+}
+
+impl MultipleResponseType {
+    fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
+        let (mr_type, input) = match input.split_first() {
+            Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
+            Some((b'D', input)) => {
+                let (value, input) = parse_counted_string(input)?;
+                (
+                    MultipleResponseType::MultipleDichotomy {
+                        value,
+                        labels: CategoryLabels::VarLabels,
+                    },
+                    input,
+                )
+            }
+            Some((b'E', input)) => {
+                let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
+                    (CategoryLabels::CountedValues, rest)
+                } else if let Some(rest) = input.strip_prefix(b" 11 ") {
+                    (CategoryLabels::VarLabels, rest)
+                } else {
+                    return Err(Warning::InvalidMultipleDichotomyLabelType);
+                };
+                let (value, input) = parse_counted_string(input)?;
+                (
+                    MultipleResponseType::MultipleDichotomy { value, labels },
+                    input,
+                )
+            }
+            _ => return Err(Warning::InvalidMultipleResponseType),
+        };
+        Ok((mr_type, input))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet<I, S>
+where
+    I: Debug,
+    S: Debug,
+{
+    pub name: I,
+    pub label: S,
+    pub mr_type: MultipleResponseType,
+    pub short_names: Vec<I>,
+}
+
+impl MultipleResponseSet<RawString, RawString> {
+    fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
+        let Some(equals) = input.iter().position(|&b| b == b'=') else {
+            return Err(Warning::MultipleResponseSyntaxError("missing `=`"));
+        };
+        let (name, input) = input.split_at(equals);
+        let input = input.strip_prefix(b"=").unwrap();
+        let (mr_type, input) = MultipleResponseType::parse(input)?;
+        let Some(input) = input.strip_prefix(b" ") else {
+            return Err(Warning::MultipleResponseSyntaxError(
+                "missing space after multiple response type",
+            ));
+        };
+        let (label, mut input) = parse_counted_string(input)?;
+        let mut vars = Vec::new();
+        while input.first() != Some(&b'\n') {
+            match input.split_first() {
+                Some((b' ', rest)) => {
+                    let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
+                        return Err(Warning::MultipleResponseSyntaxError(
+                            "missing variable name delimiter",
+                        ));
+                    };
+                    let (var, rest) = rest.split_at(length);
+                    if !var.is_empty() {
+                        vars.push(var.into());
+                    }
+                    input = rest;
+                }
+                _ => {
+                    return Err(Warning::MultipleResponseSyntaxError(
+                        "missing space preceding variable name",
+                    ));
+                }
+            }
+        }
+        while input.first() == Some(&b'\n') {
+            input = &input[1..];
+        }
+        Ok((
+            MultipleResponseSet {
+                name: name.into(),
+                label,
+                mr_type,
+                short_names: vars,
+            },
+            input,
+        ))
+    }
+
+    fn decode(
+        &self,
+        decoder: &mut Decoder,
+    ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
+        let mut short_names = Vec::with_capacity(self.short_names.len());
+        for short_name in self.short_names.iter() {
+            if let Some(short_name) = decoder
+                .decode_identifier(short_name)
+                .map_err(Warning::InvalidMrSetName)
+                .issue_warning(&mut decoder.warn)
+            {
+                short_names.push(short_name);
+            }
+        }
+        Ok(MultipleResponseSet {
+            name: decoder
+                .decode_identifier(&self.name)
+                .map_err(Warning::InvalidMrSetVariableName)?,
+            label: decoder.decode(&self.label).to_string(),
+            mr_type: self.mr_type.clone(),
+            short_names,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
+where
+    I: Debug,
+    S: Debug;
+
+static MULTIPLE_RESPONSE_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(1),
+    count: None,
+    name: "multiple response set record",
+};
+
+impl MultipleResponseRecord<RawString, RawString> {
+    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
+        ext.check_size(&MULTIPLE_RESPONSE_RECORD)?;
+
+        let mut input = &ext.data[..];
+        let mut sets = Vec::new();
+        loop {
+            while let Some(suffix) = input.strip_prefix(b"\n") {
+                input = suffix;
+            }
+            if input.is_empty() {
+                break;
+            }
+            let (set, rest) = MultipleResponseSet::parse(input)?;
+            sets.push(set);
+            input = rest;
+        }
+        Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
+    }
+}
+
+impl MultipleResponseRecord<RawString, RawString> {
+    pub fn decode(self, decoder: &mut Decoder) -> DecodedRecord {
+        let mut sets = Vec::new();
+        for set in self.0.iter() {
+            if let Some(set) = set.decode(decoder).issue_warning(&mut decoder.warn) {
+                sets.push(set);
+            }
+        }
+        DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
+    }
+}
+
+fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
+    let Some(space) = input.iter().position(|&b| b == b' ') else {
+        return Err(Warning::CountedStringMissingSpace);
+    };
+    let Ok(length) = from_utf8(&input[..space]) else {
+        return Err(Warning::CountedStringInvalidUTF8);
+    };
+    let Ok(length): Result<usize, _> = length.parse() else {
+        return Err(Warning::CountedStringInvalidLength(length.into()));
+    };
+
+    let Some((string, rest)) = input[space + 1..].split_at_checked(length) else {
+        return Err(Warning::CountedStringTooLong(length));
+    };
+    Ok((string.into(), rest))
+}
+
+impl Measure {
+    fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Measure::Nominal)),
+            2 => Ok(Some(Measure::Ordinal)),
+            3 => Ok(Some(Measure::Scale)),
+            _ => Err(Warning::InvalidMeasurement(source)),
+        }
+    }
+}
+
+impl Alignment {
+    fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
+        match source {
+            0 => Ok(Some(Alignment::Left)),
+            1 => Ok(Some(Alignment::Right)),
+            2 => Ok(Some(Alignment::Center)),
+            _ => Err(Warning::InvalidAlignment(source)),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VarDisplay {
+    pub measure: Option<Measure>,
+    pub width: Option<u32>,
+    pub alignment: Option<Alignment>,
+}
+
+#[derive(Clone, Debug)]
+pub struct VarDisplayRecord(pub Vec<VarDisplay>);
+
+impl VarDisplayRecord {
+    fn parse(
+        ext: &Extension,
+        var_types: &VarTypes,
+        endian: Endian,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Record, Warning> {
+        if ext.size != 4 {
+            return Err(Warning::BadRecordSize {
+                offset: ext.offsets.start,
+                record: String::from("variable display record"),
+                size: ext.size,
+                expected_size: 4,
+            });
+        }
+
+        let n_vars = var_types.n_vars();
+        let has_width = if ext.count as usize == 3 * n_vars {
+            true
+        } else if ext.count as usize == 2 * n_vars {
+            false
+        } else {
+            return Err(Warning::InvalidVariableDisplayCount {
+                count: ext.count as usize,
+                first: 2 * n_vars,
+                second: 3 * n_vars,
+            });
+        };
+
+        let mut var_displays = Vec::new();
+        let mut input = &ext.data[..];
+        for _ in 0..n_vars {
+            let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .issue_warning(warn)
+                .flatten();
+            let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
+            let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .issue_warning(warn)
+                .flatten();
+            var_displays.push(VarDisplay {
+                measure,
+                width,
+                alignment,
+            });
+        }
+        Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValues<N>
+where
+    N: Debug,
+{
+    /// Variable name.
+    pub var_name: N,
+
+    /// Missing values.
+    pub missing_values: Vec<RawStrArray<8>>,
+}
+
+impl LongStringMissingValues<RawString> {
+    fn decode(
+        &self,
+        decoder: &mut Decoder,
+    ) -> Result<LongStringMissingValues<Identifier>, IdError> {
+        Ok(LongStringMissingValues {
+            var_name: decoder.decode_identifier(&self.var_name)?,
+            missing_values: self.missing_values.clone(),
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValueRecord<N>(pub Vec<LongStringMissingValues<N>>)
+where
+    N: Debug;
+
+static LONG_STRING_MISSING_VALUE_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(1),
+    count: None,
+    name: "long string missing values record",
+};
+
+impl LongStringMissingValueRecord<RawString> {
+    fn parse(
+        ext: &Extension,
+        endian: Endian,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Record, Warning> {
+        ext.check_size(&LONG_STRING_MISSING_VALUE_RECORD)?;
+
+        let mut input = &ext.data[..];
+        let mut missing_value_set = Vec::new();
+        while !input.is_empty() {
+            let var_name = read_string(&mut input, endian)?;
+            dbg!(&var_name);
+            let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
+            let value_len: u32 = endian.parse(read_bytes(&mut input)?);
+            if value_len != 8 {
+                let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
+                warn(Warning::BadLongMissingValueLength {
+                    record_offset: ext.offsets.start,
+                    offset,
+                    value_len,
+                });
+                read_vec(
+                    &mut input,
+                    dbg!(value_len as usize * n_missing_values as usize),
+                )?;
+                continue;
+            }
+            let mut missing_values = Vec::new();
+            for i in 0..n_missing_values {
+                if i > 0 {
+                    // Tolerate files written by old, buggy versions of PSPP
+                    // where we believed that the value_length was repeated
+                    // before each missing value.
+                    let mut peek = input;
+                    let number: u32 = endian.parse(read_bytes(&mut peek)?);
+                    if number == 8 {
+                        input = peek;
+                    }
+                }
+
+                let value: [u8; 8] = read_bytes(&mut input)?;
+                missing_values.push(RawStrArray(value));
+            }
+            missing_value_set.push(LongStringMissingValues {
+                var_name,
+                missing_values,
+            });
+        }
+        Ok(Record::LongStringMissingValues(
+            LongStringMissingValueRecord(missing_value_set),
+        ))
+    }
+}
+
+impl LongStringMissingValueRecord<RawString> {
+    pub fn decode(self, decoder: &mut Decoder) -> LongStringMissingValueRecord<Identifier> {
+        let mut mvs = Vec::with_capacity(self.0.len());
+        for mv in self.0.iter() {
+            if let Some(mv) = mv
+                .decode(decoder)
+                .map_err(Warning::InvalidLongStringMissingValueVariableName)
+                .issue_warning(&mut decoder.warn)
+            {
+                mvs.push(mv);
+            }
+        }
+        LongStringMissingValueRecord(mvs)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct EncodingRecord(pub String);
+
+static ENCODING_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(1),
+    count: None,
+    name: "encoding record",
+};
+
+impl EncodingRecord {
+    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
+        ext.check_size(&ENCODING_RECORD)?;
+
+        Ok(Record::Encoding(EncodingRecord(
+            String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
+                offset: ext.offsets.start,
+            })?,
+        )))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct NumberOfCasesRecord {
+    /// Always observed as 1.
+    pub one: u64,
+
+    /// Number of cases.
+    pub n_cases: u64,
+}
+
+static NUMBER_OF_CASES_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(8),
+    count: Some(2),
+    name: "extended number of cases record",
+};
+
+impl NumberOfCasesRecord {
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size(&NUMBER_OF_CASES_RECORD)?;
+
+        let mut input = &ext.data[..];
+        let one = endian.parse(read_bytes(&mut input)?);
+        let n_cases = endian.parse(read_bytes(&mut input)?);
+
+        Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawVariableSetRecord(TextRecord);
+
+impl RawVariableSetRecord {
+    fn parse(extension: Extension) -> Result<Record, Warning> {
+        Ok(Record::VariableSets(Self(TextRecord::parse(
+            extension,
+            "variable sets record",
+        )?)))
+    }
+    pub fn decode(self, decoder: &mut Decoder) -> VariableSetRecord {
+        let mut sets = Vec::new();
+        let input = decoder.decode(&self.0.text);
+        for line in input.lines() {
+            if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&mut decoder.warn) {
+                sets.push(set)
+            }
+        }
+        VariableSetRecord {
+            offsets: self.0.offsets,
+            sets,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawProductInfoRecord(TextRecord);
+
+impl RawProductInfoRecord {
+    fn parse(extension: Extension) -> Result<Record, Warning> {
+        Ok(Record::ProductInfo(Self(TextRecord::parse(
+            extension,
+            "product info record",
+        )?)))
+    }
+    pub fn decode(self, decoder: &mut Decoder) -> ProductInfoRecord {
+        ProductInfoRecord(decoder.decode(&self.0.text).into())
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Attribute {
+    pub name: Identifier,
+    pub values: Vec<String>,
+}
+
+impl Attribute {
+    fn parse<'a>(decoder: &mut Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
+        let Some((name, mut input)) = input.split_once('(') else {
+            return Err(Warning::AttributeMissingLParen(input.into()));
+        };
+        let name = decoder
+            .new_identifier(name)
+            .map_err(Warning::InvalidAttributeName)?;
+        let mut values = Vec::new();
+        loop {
+            let Some((value, rest)) = input.split_once('\n') else {
+                return Err(Warning::AttributeMissingValue {
+                    name: name.clone(),
+                    index: values.len(),
+                });
+            };
+            if let Some(stripped) = value
+                .strip_prefix('\'')
+                .and_then(|value| value.strip_suffix('\''))
+            {
+                values.push(stripped.into());
+            } else {
+                decoder.warn(Warning::AttributeMissingQuotes {
+                    name: name.clone(),
+                    index: values.len(),
+                });
+                values.push(value.into());
+            }
+            if let Some(rest) = rest.strip_prefix(')') {
+                let attribute = Attribute { name, values };
+                return Ok((attribute, rest));
+            };
+            input = rest;
+        }
+    }
+}
+
+impl Attributes {
+    fn parse<'a>(
+        decoder: &mut Decoder,
+        mut input: &'a str,
+        sentinel: Option<char>,
+    ) -> Result<(Attributes, &'a str, Vec<Identifier>), Warning> {
+        let mut attributes = BTreeMap::new();
+        let mut duplicates = Vec::new();
+        let rest = loop {
+            match input.chars().next() {
+                None => break input,
+                c if c == sentinel => break &input[1..],
+                _ => {
+                    let (attribute, rest) = Attribute::parse(decoder, input)?;
+                    if attributes.contains_key(&attribute.name) {
+                        duplicates.push(attribute.name.clone());
+                    }
+                    attributes.insert(attribute.name, attribute.values);
+                    input = rest;
+                }
+            }
+        };
+        Ok((Attributes(attributes), rest, duplicates))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawFileAttributesRecord(TextRecord);
+
+#[derive(Clone, Debug, Default)]
+pub struct FileAttributesRecord(pub Attributes);
+
+impl RawFileAttributesRecord {
+    fn parse(extension: Extension) -> Result<Record, Warning> {
+        Ok(Record::FileAttributes(Self(TextRecord::parse(
+            extension,
+            "file attributes record",
+        )?)))
+    }
+    pub fn decode(self, decoder: &mut Decoder) -> FileAttributesRecord {
+        let input = decoder.decode(&self.0.text);
+        match Attributes::parse(decoder, &input, None).issue_warning(&mut decoder.warn) {
+            Some((set, rest, duplicates)) => {
+                if !duplicates.is_empty() {
+                    decoder.warn(Warning::DuplicateFileAttributes {
+                        attributes: duplicates,
+                    });
+                }
+                if !rest.is_empty() {
+                    decoder.warn(dbg!(Warning::TBD));
+                }
+                FileAttributesRecord(set)
+            }
+            None => FileAttributesRecord::default(),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VarAttributes {
+    pub long_var_name: Identifier,
+    pub attributes: Attributes,
+}
+
+impl VarAttributes {
+    fn parse<'a>(
+        decoder: &mut Decoder,
+        input: &'a str,
+    ) -> Result<(VarAttributes, &'a str), Warning> {
+        let Some((long_var_name, rest)) = input.split_once(':') else {
+            return Err(dbg!(Warning::TBD));
+        };
+        let long_var_name = decoder
+            .new_identifier(long_var_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidAttributeVariableName)?;
+        let (attributes, rest, duplicates) = Attributes::parse(decoder, rest, Some('/'))?;
+        if !duplicates.is_empty() {
+            decoder.warn(Warning::DuplicateVariableAttributes {
+                variable: long_var_name.clone(),
+                attributes: duplicates,
+            });
+        }
+        let var_attribute = VarAttributes {
+            long_var_name,
+            attributes,
+        };
+        Ok((var_attribute, rest))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RawVariableAttributesRecord(TextRecord);
+
+#[derive(Clone, Debug)]
+pub struct VariableAttributesRecord(pub Vec<VarAttributes>);
+
+impl RawVariableAttributesRecord {
+    fn parse(extension: Extension) -> Result<Record, Warning> {
+        Ok(Record::VariableAttributes(Self(TextRecord::parse(
+            extension,
+            "variable attributes record",
+        )?)))
+    }
+    pub fn decode(self, decoder: &mut Decoder) -> VariableAttributesRecord {
+        let decoded = decoder.decode(&self.0.text);
+        let mut input = decoded.as_ref();
+        let mut var_attribute_sets = Vec::new();
+        while !input.is_empty() {
+            let Some((var_attribute, rest)) =
+                VarAttributes::parse(decoder, input).issue_warning(&mut decoder.warn)
+            else {
+                break;
+            };
+            var_attribute_sets.push(var_attribute);
+            input = rest;
+        }
+        VariableAttributesRecord(var_attribute_sets)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongName {
+    pub short_name: Identifier,
+    pub long_name: Identifier,
+}
+
+impl LongName {
+    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
+        let Some((short_name, long_name)) = input.split_once('=') else {
+            return Err(dbg!(Warning::LongNameMissingEquals));
+        };
+        let short_name = decoder
+            .new_identifier(short_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidShortName)?;
+        let long_name = decoder
+            .new_identifier(long_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidLongName)?;
+        Ok(LongName {
+            short_name,
+            long_name,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongNamesRecord(pub Vec<LongName>);
+
+#[derive(Clone, Debug)]
+pub struct ProductInfoRecord(pub String);
+
+#[derive(Clone, Debug)]
+pub struct VariableSet {
+    pub name: String,
+    pub variable_names: Vec<Identifier>,
+}
+
+impl VariableSet {
+    fn parse(input: &str, decoder: &mut Decoder) -> Result<Self, Warning> {
+        let (name, input) = input
+            .split_once('=')
+            .ok_or(Warning::VariableSetMissingEquals)?;
+        let mut vars = Vec::new();
+        for var in input.split_ascii_whitespace() {
+            if let Some(identifier) = decoder
+                .new_identifier(var)
+                .and_then(Identifier::must_be_ordinary)
+                .map_err(Warning::InvalidVariableSetName)
+                .issue_warning(&mut decoder.warn)
+            {
+                vars.push(identifier);
+            }
+        }
+        Ok(VariableSet {
+            name: name.to_string(),
+            variable_names: vars,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableSetRecord {
+    pub offsets: Range<u64>,
+    pub sets: Vec<VariableSet>,
+}
+
+trait IssueWarning<T> {
+    fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option<T>;
+}
+impl<T> IssueWarning<T> for Result<T, Warning> {
+    fn issue_warning(self, warn: &mut dyn FnMut(Warning)) -> Option<T> {
+        match self {
+            Ok(result) => Some(result),
+            Err(error) => {
+                warn(error);
+                None
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Extension {
+    pub offsets: Range<u64>,
+
+    /// Record subtype.
+    pub subtype: u32,
+
+    /// Size of each data element.
+    pub size: u32,
+
+    /// Number of data elements.
+    pub count: u32,
+
+    /// `size * count` bytes of data.
+    pub data: Vec<u8>,
+}
+
+impl Extension {
+    pub fn check_size(&self, expected: &ExtensionRecord) -> Result<(), Warning> {
+        match expected.size {
+            Some(expected_size) if self.size != expected_size => {
+                return Err(Warning::BadRecordSize {
+                    offset: self.offsets.start,
+                    record: expected.name.into(),
+                    size: self.size,
+                    expected_size,
+                });
+            }
+            _ => (),
+        }
+        match expected.count {
+            Some(expected_count) if self.count != expected_count => {
+                return Err(Warning::BadRecordCount {
+                    offset: self.offsets.start,
+                    record: expected.name.into(),
+                    count: self.count,
+                    expected_count,
+                });
+            }
+            _ => (),
+        }
+        Ok(())
+    }
+
+    pub fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        var_types: &VarTypes,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Option<Record>, Error> {
+        let subtype = endian.parse(read_bytes(r)?);
+        let header_offset = r.stream_position()?;
+        let size: u32 = endian.parse(read_bytes(r)?);
+        let count = endian.parse(read_bytes(r)?);
+        let Some(product) = size.checked_mul(count) else {
+            return Err(Error::ExtensionRecordTooLarge {
+                offset: header_offset,
+                subtype,
+                size,
+                count,
+            });
+        };
+        let start_offset = r.stream_position()?;
+        let data = read_vec(r, product as usize)?;
+        let end_offset = start_offset + product as u64;
+        let extension = Extension {
+            offsets: start_offset..end_offset,
+            subtype,
+            size,
+            count,
+            data,
+        };
+        let result = match subtype {
+            3 => IntegerInfoRecord::parse(&extension, endian),
+            4 => FloatInfoRecord::parse(&extension, endian),
+            11 => VarDisplayRecord::parse(&extension, var_types, endian, warn),
+            7 | 19 => MultipleResponseRecord::parse(&extension, endian),
+            21 => LongStringValueLabelRecord::parse(&extension, endian),
+            22 => LongStringMissingValueRecord::parse(&extension, endian, warn),
+            20 => EncodingRecord::parse(&extension, endian),
+            16 => NumberOfCasesRecord::parse(&extension, endian),
+            5 => RawVariableSetRecord::parse(extension),
+            10 => RawProductInfoRecord::parse(extension),
+            13 => RawLongNamesRecord::parse(extension),
+            14 => RawVeryLongStringsRecord::parse(extension),
+            17 => RawFileAttributesRecord::parse(extension),
+            18 => RawVariableAttributesRecord::parse(extension),
+            _ => Ok(Record::OtherExtension(extension)),
+        };
+        match result {
+            Ok(result) => Ok(Some(result)),
+            Err(error) => {
+                warn(error);
+                Ok(None)
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels<N, S>
+where
+    S: Debug,
+{
+    pub var_name: N,
+    pub width: u32,
+
+    /// `(value, label)` pairs, where each value is `width` bytes.
+    pub labels: Vec<(RawString, S)>,
+}
+
+impl LongStringValueLabels<RawString, RawString> {
+    fn decode(
+        &self,
+        decoder: &mut Decoder,
+    ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
+        let var_name = decoder.decode(&self.var_name);
+        let var_name = Identifier::from_encoding(var_name.trim_end(), decoder.encoding)
+            .map_err(Warning::InvalidLongStringValueLabelName)?;
+
+        let mut labels = Vec::with_capacity(self.labels.len());
+        for (value, label) in self.labels.iter() {
+            let label = decoder.decode(label).to_string();
+            labels.push((value.clone(), label));
+        }
+
+        Ok(LongStringValueLabels {
+            var_name,
+            width: self.width,
+            labels,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
+where
+    N: Debug,
+    S: Debug;
+
+static LONG_STRING_VALUE_LABEL_RECORD: ExtensionRecord = ExtensionRecord {
+    size: Some(1),
+    count: None,
+    name: "long string value labels record",
+};
+
+impl LongStringValueLabelRecord<RawString, RawString> {
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size(&LONG_STRING_VALUE_LABEL_RECORD)?;
+
+        let mut input = &ext.data[..];
+        let mut label_set = Vec::new();
+        while !input.is_empty() {
+            let var_name = read_string(&mut input, endian)?;
+            let width: u32 = endian.parse(read_bytes(&mut input)?);
+            let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
+            let mut labels = Vec::new();
+            for _ in 0..n_labels {
+                let value = read_string(&mut input, endian)?;
+                let label = read_string(&mut input, endian)?;
+                labels.push((value, label));
+            }
+            label_set.push(LongStringValueLabels {
+                var_name,
+                width,
+                labels,
+            })
+        }
+        Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
+            label_set,
+        )))
+    }
+}
+
+impl LongStringValueLabelRecord<RawString, RawString> {
+    pub fn decode(self, decoder: &mut Decoder) -> LongStringValueLabelRecord<Identifier, String> {
+        let mut labels = Vec::with_capacity(self.0.len());
+        for label in &self.0 {
+            match label.decode(decoder) {
+                Ok(set) => labels.push(set),
+                Err(error) => decoder.warn(error),
+            }
+        }
+        LongStringValueLabelRecord(labels)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ZHeader {
+    /// File offset to the start of the record.
+    pub offset: u64,
+
+    /// File offset to the ZLIB data header.
+    pub zheader_offset: u64,
+
+    /// File offset to the ZLIB trailer.
+    pub ztrailer_offset: u64,
+
+    /// Length of the ZLIB trailer in bytes.
+    pub ztrailer_len: u64,
+}
+
+impl ZHeader {
+    pub fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
+        let offset = r.stream_position()?;
+        let zheader_offset: u64 = endian.parse(read_bytes(r)?);
+        let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
+        let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
+
+        if zheader_offset != offset {
+            return Err(Error::UnexpectedZHeaderOffset {
+                actual: zheader_offset,
+                expected: offset,
+            });
+        }
+
+        if ztrailer_offset < offset {
+            return Err(Error::ImpossibleZTrailerOffset(ztrailer_offset));
+        }
+
+        if ztrailer_len < 24 || ztrailer_len % 24 != 0 {
+            return Err(Error::InvalidZTrailerLength(ztrailer_len));
+        }
+
+        Ok(ZHeader {
+            offset,
+            zheader_offset,
+            ztrailer_offset,
+            ztrailer_len,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ZTrailer {
+    /// File offset to the start of the record.
+    pub offset: u64,
+
+    /// Compression bias as a negative integer, e.g. -100.
+    pub int_bias: i64,
+
+    /// Always observed as zero.
+    pub zero: u64,
+
+    /// Uncompressed size of each block, except possibly the last.  Only
+    /// `0x3ff000` has been observed so far.
+    pub block_size: u32,
+
+    /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
+    pub blocks: Vec<ZBlock>,
+}
+
+#[derive(Clone, Debug)]
+pub struct ZBlock {
+    /// Offset of block of data if simple compression were used.
+    pub uncompressed_ofs: u64,
+
+    /// Actual offset within the file of the compressed data block.
+    pub compressed_ofs: u64,
+
+    /// The number of bytes in this data block after decompression.  This is
+    /// `block_size` in every data block but the last, which may be smaller.
+    pub uncompressed_size: u32,
+
+    /// The number of bytes in this data block, as stored compressed in this
+    /// file.
+    pub compressed_size: u32,
+}
+
+impl ZBlock {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
+        Ok(ZBlock {
+            uncompressed_ofs: endian.parse(read_bytes(r)?),
+            compressed_ofs: endian.parse(read_bytes(r)?),
+            uncompressed_size: endian.parse(read_bytes(r)?),
+            compressed_size: endian.parse(read_bytes(r)?),
+        })
+    }
+}
+
+impl ZTrailer {
+    pub fn read<R: Read + Seek>(
+        reader: &mut R,
+        endian: Endian,
+        bias: f64,
+        zheader: &ZHeader,
+        warn: &mut dyn FnMut(Warning),
+    ) -> Result<Option<ZTrailer>, Error> {
+        let start_offset = reader.stream_position()?;
+        if reader
+            .seek(SeekFrom::Start(zheader.ztrailer_offset))
+            .is_err()
+        {
+            return Ok(None);
+        }
+        let int_bias = endian.parse(read_bytes(reader)?);
+        if int_bias as f64 != -bias {
+            return Err(Error::WrongZlibTrailerBias {
+                actual: int_bias,
+                expected: -bias,
+            });
+        }
+        let zero = endian.parse(read_bytes(reader)?);
+        if zero != 0 {
+            return Err(Error::WrongZlibTrailerZero(zero));
+        }
+        let block_size = endian.parse(read_bytes(reader)?);
+        if block_size != 0x3ff000 {
+            return Err(Error::WrongZlibTrailerBlockSize(block_size));
+        }
+        let n_blocks: u32 = endian.parse(read_bytes(reader)?);
+        let expected_n_blocks = (zheader.ztrailer_len - 24) / 24;
+        if n_blocks as u64 != expected_n_blocks {
+            return Err(Error::BadZlibTrailerNBlocks {
+                offset: zheader.ztrailer_offset,
+                n_blocks,
+                expected_n_blocks,
+                ztrailer_len: zheader.ztrailer_len,
+            });
+        }
+        let blocks = (0..n_blocks)
+            .map(|_| ZBlock::read(reader, endian))
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let mut expected_uncmp_ofs = zheader.zheader_offset;
+        let mut expected_cmp_ofs = zheader.zheader_offset + 24;
+        for (index, block) in blocks.iter().enumerate() {
+            if block.uncompressed_ofs != expected_uncmp_ofs {
+                return Err(Error::ZlibTrailerBlockWrongUncmpOfs {
+                    index,
+                    actual: block.uncompressed_ofs,
+                    expected: expected_cmp_ofs,
+                });
+            }
+            if block.compressed_ofs != expected_cmp_ofs {
+                return Err(Error::ZlibTrailerBlockWrongCmpOfs {
+                    index,
+                    actual: block.compressed_ofs,
+                    expected: expected_cmp_ofs,
+                });
+            }
+            if index < blocks.len() - 1 {
+                if block.uncompressed_size != block_size {
+                    warn(Warning::ZlibTrailerBlockWrongSize {
+                        index,
+                        actual: block.uncompressed_size,
+                        expected: block_size,
+                    });
+                }
+            } else {
+                if block.uncompressed_size > block_size {
+                    warn(Warning::ZlibTrailerBlockTooBig {
+                        index,
+                        actual: block.uncompressed_size,
+                        max_expected: block_size,
+                    });
+                }
+            }
+            // http://www.zlib.net/zlib_tech.html says that the maximum
+            // expansion from compression, with worst-case parameters, is 13.5%
+            // plus 11 bytes.  This code checks for an expansion of more than
+            // 14.3% plus 11 bytes.
+            if block.compressed_size > block.uncompressed_size + block.uncompressed_size / 7 + 11 {
+                return Err(Error::ZlibExpansion {
+                    index,
+                    compressed_size: block.compressed_size,
+                    uncompressed_size: block.uncompressed_size,
+                });
+            }
+
+            expected_cmp_ofs += block.compressed_size as u64;
+            expected_uncmp_ofs += block.uncompressed_size as u64;
+        }
+
+        if expected_cmp_ofs != zheader.ztrailer_offset {
+            return Err(Error::ZlibTrailerOffsetInconsistency {
+                descriptors: expected_cmp_ofs,
+                zheader: zheader.ztrailer_offset,
+            });
+        }
+
+        reader.seek(SeekFrom::Start(start_offset))?;
+        Ok(Some(ZTrailer {
+            offset: zheader.ztrailer_offset,
+            int_bias,
+            zero,
+            block_size,
+            blocks,
+        }))
+    }
+}