variables parsed
[pspp] / rust / src / raw.rs
index f0e8c540c2ac170f6fe18682bd8358ba817bc85d..e8a279f5e848418e0fdb846333cc80f5e0a60ce4 100644 (file)
-use crate::endian::{Endian, Parse, ToBytes};
-use crate::Error;
+use crate::{
+    dictionary::VarWidth,
+    encoding::{default_encoding, get_encoding, Error as EncodingError},
+    endian::{Endian, Parse, ToBytes},
+    identifier::{Error as IdError, Identifier},
+};
 
+use encoding_rs::{mem::decode_latin1, DecoderResult, Encoding};
 use flate2::read::ZlibDecoder;
 use num::Integer;
-use std::str::from_utf8;
 use std::{
-    collections::VecDeque,
+    borrow::Cow,
+    cell::RefCell,
+    cmp::Ordering,
+    collections::{HashMap, VecDeque},
+    fmt::{Debug, Display, Formatter, Result as FmtResult},
     io::{Error as IoError, Read, Seek, SeekFrom},
-    iter::FusedIterator,
+    iter::repeat,
+    mem::take,
+    ops::Range,
+    rc::Rc,
+    str::from_utf8,
 };
+use thiserror::Error as ThisError;
 
-use self::state::State;
+#[derive(ThisError, Debug)]
+pub enum Error {
+    #[error("Not an SPSS system file")]
+    NotASystemFile,
 
-#[derive(Copy, Clone, Debug)]
-pub enum Compression {
-    Simple,
-    ZLib,
+    #[error("Invalid magic number {0:?}")]
+    BadMagic([u8; 4]),
+
+    #[error("I/O error ({0})")]
+    Io(#[from] IoError),
+
+    #[error("Invalid SAV compression code {0}")]
+    InvalidSavCompression(u32),
+
+    #[error("Invalid ZSAV compression code {0}")]
+    InvalidZsavCompression(u32),
+
+    #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
+    BadDocumentLength { offset: u64, n: usize, max: usize },
+
+    #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
+    BadRecordType { offset: u64, rec_type: u32 },
+
+    #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")]
+    BadVariableWidth {
+        start_offset: u64,
+        width: i32,
+    },
+
+    #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
+    BadVariableLabelCode {
+        start_offset: u64,
+        code_offset: u64,
+        code: u32,
+    },
+
+    #[error(
+        "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
+    )]
+    BadNumericMissingValueCode { offset: u64, code: i32 },
+
+    #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
+    BadStringMissingValueCode { offset: u64, code: i32 },
+
+    #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
+    BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
+
+    #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
+    ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
+
+    #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
+    TooManyVarIndexes { offset: u64, n: u32, max: u32 },
+
+    #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
+    ExtensionRecordTooLarge {
+        offset: u64,
+        subtype: u32,
+        size: u32,
+        count: u32,
+    },
+
+    #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
+    EofInCase {
+        offset: u64,
+        case_ofs: u64,
+        case_len: usize,
+    },
+
+    #[error(
+        "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
+    )]
+    EofInCompressedCase { offset: u64, case_ofs: u64 },
+
+    #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
+    PartialCompressedCase { offset: u64, case_ofs: u64 },
+
+    #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
+    CompressedNumberExpected { offset: u64, case_ofs: u64 },
+
+    #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
+    CompressedStringExpected { offset: u64, case_ofs: u64 },
+
+    #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
+    BadZlibTrailerNBlocks {
+        offset: u64,
+        n_blocks: u32,
+        expected_n_blocks: u64,
+        ztrailer_len: u64,
+    },
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+}
+
+#[derive(ThisError, Debug)]
+pub enum Warning {
+    #[error("Unexpected end of data inside extension record.")]
+    UnexpectedEndOfData,
+
+    #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
+    NoVarIndexes { offset: u64 },
+
+    #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
+    MixedVarTypes {
+        offset: u64,
+        var_type: VarType,
+        wrong_types: Vec<u32>,
+    },
+
+    #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}]: {invalid:?}")]
+    InvalidVarIndexes {
+        offset: u64,
+        max: usize,
+        invalid: Vec<u32>,
+    },
+
+    #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
+    BadRecordSize {
+        offset: u64,
+        record: String,
+        size: u32,
+        expected_size: u32,
+    },
+
+    #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
+    BadRecordCount {
+        offset: u64,
+        record: String,
+        count: u32,
+        expected_count: u32,
+    },
+
+    #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
+    BadLongMissingValueLength {
+        record_offset: u64,
+        offset: u64,
+        value_len: u32,
+    },
+
+    #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
+    BadEncodingName { offset: u64 },
+
+    // XXX This is risky because `text` might be arbitarily long.
+    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+    MalformedString { encoding: String, text: String },
+
+    #[error("Invalid variable measurement level value {0}")]
+    InvalidMeasurement(u32),
+
+    #[error("Invalid variable display alignment value {0}")]
+    InvalidAlignment(u32),
+
+    #[error("Invalid attribute name.  {0}")]
+    InvalidAttributeName(IdError),
+
+    #[error("Invalid variable name in attribute record.  {0}")]
+    InvalidAttributeVariableName(IdError),
+
+    #[error("Invalid short name in long variable name record.  {0}")]
+    InvalidShortName(IdError),
+
+    #[error("Invalid name in long variable name record.  {0}")]
+    InvalidLongName(IdError),
+
+    #[error("Invalid variable name in very long string record.  {0}")]
+    InvalidLongStringName(IdError),
+
+    #[error("Invalid variable name in variable set record.  {0}")]
+    InvalidVariableSetName(IdError),
+
+    #[error("Invalid multiple response set name.  {0}")]
+    InvalidMrSetName(IdError),
+
+    #[error("Invalid multiple response set variable name.  {0}")]
+    InvalidMrSetVariableName(IdError),
+
+    #[error("Invalid variable name in long string missing values record.  {0}")]
+    InvalidLongStringMissingValueVariableName(IdError),
+
+    #[error("Invalid variable name in long string value label record.  {0}")]
+    InvalidLongStringValueLabelName(IdError),
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+
+    #[error("Details TBD")]
+    TBD,
+}
+
+impl From<IoError> for Warning {
+    fn from(_source: IoError) -> Self {
+        Self::UnexpectedEndOfData
+    }
 }
 
+#[derive(Clone, Debug)]
 pub enum Record {
-    Header(Header),
-    Document(Document),
-    Variable(Variable),
-    ValueLabel(ValueLabel),
-    VarIndexes(VarIndexes),
-    Extension(Extension),
+    Header(HeaderRecord<RawString>),
+    Variable(VariableRecord<RawString, RawStr<8>>),
+    ValueLabel(ValueLabelRecord<RawStr<8>, RawString>),
+    Document(DocumentRecord<RawDocumentLine>),
+    IntegerInfo(IntegerInfoRecord),
+    FloatInfo(FloatInfoRecord),
+    VarDisplay(VarDisplayRecord),
+    MultipleResponse(MultipleResponseRecord<RawString, RawString>),
+    LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
+    LongStringMissingValues(LongStringMissingValueRecord<RawString, RawStr<8>>),
+    Encoding(EncodingRecord),
+    NumberOfCases(NumberOfCasesRecord),
+    Text(TextRecord),
+    OtherExtension(Extension),
+    EndOfHeaders(u32),
+    ZHeader(ZHeader),
+    ZTrailer(ZTrailer),
+    Cases(Rc<RefCell<Cases>>),
+}
+
+#[derive(Clone, Debug)]
+pub enum DecodedRecord {
+    Header(HeaderRecord<String>),
+    Variable(VariableRecord<String, String>),
+    ValueLabel(ValueLabelRecord<RawStr<8>, String>),
+    Document(DocumentRecord<String>),
+    IntegerInfo(IntegerInfoRecord),
+    FloatInfo(FloatInfoRecord),
+    VarDisplay(VarDisplayRecord),
+    MultipleResponse(MultipleResponseRecord<Identifier, String>),
+    LongStringValueLabels(LongStringValueLabelRecord<Identifier, String>),
+    LongStringMissingValues(LongStringMissingValueRecord<Identifier, String>),
+    Encoding(EncodingRecord),
+    NumberOfCases(NumberOfCasesRecord),
+    VariableSets(VariableSetRecord),
+    ProductInfo(ProductInfoRecord),
+    LongNames(LongNamesRecord),
+    VeryLongStrings(VeryLongStringsRecord),
+    FileAttributes(FileAttributeRecord),
+    VariableAttributes(VariableAttributeRecord),
+    OtherExtension(Extension),
     EndOfHeaders(u32),
     ZHeader(ZHeader),
     ZTrailer(ZTrailer),
-    Case(Vec<Value>),
+    Cases(Rc<RefCell<Cases>>),
 }
 
 impl Record {
-    fn read<R: Read + Seek>(reader: &mut R, endian: Endian) -> Result<Record, Error> {
+    fn read<R>(
+        reader: &mut R,
+        endian: Endian,
+        var_types: &[VarType],
+        warn: &dyn Fn(Warning),
+    ) -> Result<Option<Record>, Error>
+    where
+        R: Read + Seek,
+    {
         let rec_type: u32 = endian.parse(read_bytes(reader)?);
         match rec_type {
-            2 => Ok(Record::Variable(Variable::read(reader, endian)?)),
-            3 => Ok(Record::ValueLabel(ValueLabel::read(reader, endian)?)),
-            4 => Ok(Record::VarIndexes(VarIndexes::read(reader, endian)?)),
-            6 => Ok(Record::Document(Document::read(reader, endian)?)),
-            7 => Ok(Record::Extension(Extension::read(reader, endian)?)),
-            999 => Ok(Record::EndOfHeaders(endian.parse(read_bytes(reader)?))),
+            2 => Ok(Some(VariableRecord::read(reader, endian)?)),
+            3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
+            6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
+            7 => Extension::read(reader, endian, var_types.len(), warn),
+            999 => Ok(Some(Record::EndOfHeaders(
+                endian.parse(read_bytes(reader)?),
+            ))),
             _ => Err(Error::BadRecordType {
                 offset: reader.stream_position()?,
                 rec_type,
             }),
         }
     }
+
+    pub fn decode(self, decoder: &Decoder) -> Result<DecodedRecord, Error> {
+        Ok(match self {
+            Record::Header(record) => record.decode(decoder),
+            Record::Variable(record) => record.decode(decoder),
+            Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
+            Record::Document(record) => record.decode(decoder),
+            Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
+            Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
+            Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
+            Record::MultipleResponse(record) => record.decode(decoder),
+            Record::LongStringValueLabels(record) => {
+                DecodedRecord::LongStringValueLabels(record.decode(decoder))
+            }
+            Record::LongStringMissingValues(record) => {
+                DecodedRecord::LongStringMissingValues(record.decode(decoder))
+            }
+            Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
+            Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
+            Record::Text(record) => record.decode(decoder),
+            Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
+            Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
+            Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
+            Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
+            Record::Cases(record) => DecodedRecord::Cases(record.clone()),
+        })
+    }
+}
+
+pub fn encoding_from_headers(
+    headers: &Vec<Record>,
+    warn: &impl Fn(Warning),
+) -> Result<&'static Encoding, Error> {
+    let mut encoding_record = None;
+    let mut integer_info_record = None;
+    for record in headers {
+        match record {
+            Record::Encoding(record) => encoding_record = Some(record),
+            Record::IntegerInfo(record) => integer_info_record = Some(record),
+            _ => (),
+        }
+    }
+    let encoding = encoding_record.map(|record| record.0.as_str());
+    let character_code = integer_info_record.map(|record| record.character_code);
+    match get_encoding(encoding, character_code) {
+        Ok(encoding) => Ok(encoding),
+        Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
+        Err(err) => {
+            warn(Warning::EncodingError(err));
+            // Warn that we're using the default encoding.
+            Ok(default_encoding())
+        }
+    }
+}
+
+// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
+// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
+fn default_decode(s: &[u8]) -> Cow<str> {
+    from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Compression {
+    Simple,
+    ZLib,
+}
+
+trait Header {
+    fn offsets(&self) -> Range<u64>;
 }
 
-pub struct Header {
+#[derive(Clone)]
+pub struct HeaderRecord<S>
+where
+    S: Debug,
+{
+    /// Offset in file.
+    pub offsets: Range<u64>,
+
     /// Magic number.
     pub magic: Magic,
 
     /// Eye-catcher string, product name, in the file's encoding.  Padded
     /// on the right with spaces.
-    pub eye_catcher: [u8; 60],
+    pub eye_catcher: S,
 
     /// Layout code, normally either 2 or 3.
     pub layout_code: u32,
@@ -67,7 +388,7 @@ pub struct Header {
     /// Compression type, if any,
     pub compression: Option<Compression>,
 
-    /// 0-based variable index of the weight variable, or `None` if the file is
+    /// 1-based variable index of the weight variable, or `None` if the file is
     /// unweighted.
     pub weight_index: Option<u32>,
 
@@ -78,24 +399,59 @@ pub struct Header {
     pub bias: f64,
 
     /// `dd mmm yy` in the file's encoding.
-    pub creation_date: [u8; 9],
+    pub creation_date: S,
 
     /// `HH:MM:SS` in the file's encoding.
-    pub creation_time: [u8; 8],
+    pub creation_time: S,
 
     /// File label, in the file's encoding.  Padded on the right with spaces.
-    pub file_label: [u8; 64],
+    pub file_label: S,
 
     /// Endianness of the data in the file header.
     pub endian: Endian,
 }
 
-impl Header {
-    fn read<R: Read>(r: &mut R) -> Result<Header, Error> {
+impl<S> HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
+    where
+        T: Debug,
+    {
+        writeln!(f, "{name:>17}: {:?}", value)
+    }
+}
+
+impl<S> Debug for HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        writeln!(f, "File header record:")?;
+        self.debug_field(f, "Magic", self.magic)?;
+        self.debug_field(f, "Product name", &self.eye_catcher)?;
+        self.debug_field(f, "Layout code", self.layout_code)?;
+        self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
+        self.debug_field(f, "Compression", self.compression)?;
+        self.debug_field(f, "Weight index", self.weight_index)?;
+        self.debug_field(f, "Number of cases", self.n_cases)?;
+        self.debug_field(f, "Compression bias", self.bias)?;
+        self.debug_field(f, "Creation date", &self.creation_date)?;
+        self.debug_field(f, "Creation time", &self.creation_time)?;
+        self.debug_field(f, "File label", &self.file_label)?;
+        self.debug_field(f, "Endianness", self.endian)
+    }
+}
+
+impl HeaderRecord<RawString> {
+    fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
+        let start = r.stream_position()?;
+
         let magic: [u8; 4] = read_bytes(r)?;
         let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
 
-        let eye_catcher: [u8; 60] = read_bytes(r)?;
+        let eye_catcher = RawString(read_vec(r, 60)?);
         let layout_code: [u8; 4] = read_bytes(r)?;
         let endian = Endian::identify_u32(2, layout_code)
             .or_else(|| Endian::identify_u32(2, layout_code))
@@ -108,27 +464,28 @@ impl Header {
 
         let compression_code: u32 = endian.parse(read_bytes(r)?);
         let compression = match (magic, compression_code) {
-            (Magic::ZSAV, 2) => Some(Compression::ZLib),
-            (Magic::ZSAV, code) => return Err(Error::InvalidZsavCompression(code)),
+            (Magic::Zsav, 2) => Some(Compression::ZLib),
+            (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
             (_, 0) => None,
             (_, 1) => Some(Compression::Simple),
             (_, code) => return Err(Error::InvalidSavCompression(code)),
         };
 
         let weight_index: u32 = endian.parse(read_bytes(r)?);
-        let weight_index = (weight_index > 0).then_some(weight_index - 1);
+        let weight_index = (weight_index > 0).then_some(weight_index);
 
         let n_cases: u32 = endian.parse(read_bytes(r)?);
         let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
 
         let bias: f64 = endian.parse(read_bytes(r)?);
 
-        let creation_date: [u8; 9] = read_bytes(r)?;
-        let creation_time: [u8; 8] = read_bytes(r)?;
-        let file_label: [u8; 64] = read_bytes(r)?;
+        let creation_date = RawString(read_vec(r, 9)?);
+        let creation_time = RawString(read_vec(r, 8)?);
+        let file_label = RawString(read_vec(r, 64)?);
         let _: [u8; 3] = read_bytes(r)?;
 
-        Ok(Header {
+        Ok(HeaderRecord {
+            offsets: start..r.stream_position()?,
             magic,
             layout_code,
             nominal_case_size,
@@ -143,205 +500,232 @@ impl Header {
             endian,
         })
     }
+
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
+        let file_label = decoder.decode(&self.file_label).to_string();
+        let creation_date = decoder.decode(&self.creation_date).to_string();
+        let creation_time = decoder.decode(&self.creation_time).to_string();
+        DecodedRecord::Header(HeaderRecord {
+            eye_catcher,
+            weight_index: self.weight_index,
+            n_cases: self.n_cases,
+            file_label,
+            offsets: self.offsets.clone(),
+            magic: self.magic,
+            layout_code: self.layout_code,
+            nominal_case_size: self.nominal_case_size,
+            compression: self.compression,
+            bias: self.bias,
+            creation_date,
+            creation_time,
+            endian: self.endian,
+        })
+    }
+}
+
+pub struct Decoder {
+    pub encoding: &'static Encoding,
+    pub warn: Box<dyn Fn(Warning)>,
+}
+
+impl Decoder {
+    pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
+    where
+        F: Fn(Warning) + 'static,
+    {
+        Self {
+            encoding,
+            warn: Box::new(warn),
+        }
+    }
+    fn warn(&self, warning: Warning) {
+        (self.warn)(warning)
+    }
+    fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
+        let (output, malformed) = self.encoding.decode_without_bom_handling(input);
+        if malformed {
+            self.warn(Warning::MalformedString {
+                encoding: self.encoding.name().into(),
+                text: output.clone().into(),
+            });
+        }
+        output
+    }
+
+    fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
+        self.decode_slice(input.0.as_slice())
+    }
+
+    /// Returns `input` decoded from `self.encoding` into UTF-8 such that
+    /// re-encoding the result back into `self.encoding` will have exactly the
+    /// same length in bytes.
+    ///
+    /// XXX warn about errors?
+    pub fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
+        if let (s, false) = self.encoding.decode_without_bom_handling(input) {
+            // This is the common case.  Usually there will be no errors.
+            s
+        } else {
+            // Unusual case.  Don't bother to optimize it much.
+            let mut decoder = self.encoding.new_decoder_without_bom_handling();
+            let mut output = String::with_capacity(
+                decoder
+                    .max_utf8_buffer_length_without_replacement(input.len())
+                    .unwrap(),
+            );
+            let mut rest = input;
+            while !rest.is_empty() {
+                match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
+                    (DecoderResult::InputEmpty, _) => break,
+                    (DecoderResult::OutputFull, _) => unreachable!(),
+                    (DecoderResult::Malformed(a, b), consumed) => {
+                        let skipped = a as usize + b as usize;
+                        output.extend(repeat('?').take(skipped));
+                        rest = &rest[consumed..];
+                    }
+                }
+            }
+            assert_eq!(self.encoding.encode(&output).0.len(), input.len());
+            output.into()
+        }
+    }
+
+    pub fn decode_identifier(&self, input: &RawString) -> Result<Identifier, IdError> {
+        self.new_identifier(&self.decode(input))
+    }
+
+    pub fn new_identifier(&self, name: &str) -> Result<Identifier, IdError> {
+        Identifier::new(name, self.encoding)
+    }
+}
+
+impl<S> Header for HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn offsets(&self) -> Range<u64> {
+        self.offsets.clone()
+    }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub struct Magic([u8; 4]);
+pub enum Magic {
+    /// Regular system file.
+    Sav,
+
+    /// System file with Zlib-compressed data.
+    Zsav,
+
+    /// EBCDIC-encoded system file.
+    Ebcdic,
+}
 
 impl Magic {
     /// Magic number for a regular system file.
-    pub const SAV: Magic = Magic(*b"$FL2");
+    pub const SAV: [u8; 4] = *b"$FL2";
 
     /// Magic number for a system file that contains zlib-compressed data.
-    pub const ZSAV: Magic = Magic(*b"$FL3");
+    pub const ZSAV: [u8; 4] = *b"$FL3";
 
-    /// Magic number for an EBDIC-encoded system file.  This is `$FL2` encoded
+    /// Magic number for an EBCDIC-encoded system file.  This is `$FL2` encoded
     /// in EBCDIC.
-    pub const EBCDIC: Magic = Magic([0x5b, 0xc6, 0xd3, 0xf2]);
+    pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
+}
+
+impl Debug for Magic {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let s = match *self {
+            Magic::Sav => "$FL2",
+            Magic::Zsav => "$FL3",
+            Magic::Ebcdic => "($FL2 in EBCDIC)",
+        };
+        write!(f, "{s}")
+    }
 }
 
 impl TryFrom<[u8; 4]> for Magic {
     type Error = Error;
 
     fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
-        let magic = Magic(value);
-        match magic {
-            Magic::SAV | Magic::ZSAV | Magic::EBCDIC => Ok(magic),
+        match value {
+            Magic::SAV => Ok(Magic::Sav),
+            Magic::ZSAV => Ok(Magic::Zsav),
+            Magic::EBCDIC => Ok(Magic::Ebcdic),
             _ => Err(Error::BadMagic(value)),
         }
     }
 }
 
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum VarType {
-    Number,
+    Numeric,
     String,
 }
 
 impl VarType {
-    fn from_width(width: i32) -> VarType {
+    pub fn from_width(width: VarWidth) -> VarType {
         match width {
-            0 => VarType::Number,
-            _ => VarType::String,
-        }
-    }
-}
-
-mod state {
-    use super::{
-        Compression, Error, Header, Record, Value, VarType, Variable, ZHeader, ZTrailer,
-        ZlibDecodeMultiple,
-    };
-    use crate::endian::Endian;
-    use std::{
-        collections::VecDeque,
-        io::{Read, Seek},
-    };
-
-    pub trait State {
-        #[allow(clippy::type_complexity)]
-        fn read(self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error>;
-    }
-
-    struct Start<R: Read + Seek> {
-        reader: R,
-    }
-
-    pub fn new<R: Read + Seek + 'static>(reader: R) -> Box<dyn State> {
-        Box::new(Start { reader })
-    }
-
-    struct CommonState<R: Read + Seek> {
-        reader: R,
-        endian: Endian,
-        bias: f64,
-        compression: Option<Compression>,
-        var_types: Vec<VarType>,
-    }
-
-    impl<R: Read + Seek + 'static> State for Start<R> {
-        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            let header = Header::read(&mut self.reader)?;
-            let next_state = Headers(CommonState {
-                reader: self.reader,
-                endian: header.endian,
-                bias: header.bias,
-                compression: header.compression,
-                var_types: Vec::new(),
-            });
-            Ok(Some((Record::Header(header), Box::new(next_state))))
+            VarWidth::Numeric => Self::Numeric,
+            VarWidth::String(_) => Self::String,
         }
     }
 
-    struct Headers<R: Read + Seek>(CommonState<R>);
-
-    impl<R: Read + Seek + 'static> State for Headers<R> {
-        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            let record = Record::read(&mut self.0.reader, self.0.endian)?;
-            match record {
-                Record::Variable(Variable { width, .. }) => {
-                    self.0.var_types.push(VarType::from_width(width));
-                }
-                Record::EndOfHeaders(_) => {
-                    let next_state: Box<dyn State> = match self.0.compression {
-                        None => Box::new(Data(self.0)),
-                        Some(Compression::Simple) => Box::new(CompressedData::new(self.0)),
-                        Some(Compression::ZLib) => Box::new(ZlibHeader(self.0)),
-                    };
-                    return Ok(Some((record, next_state)));
-                }
-                _ => (),
-            };
-            Ok(Some((record, self)))
-        }
-    }
-
-    struct ZlibHeader<R: Read + Seek>(CommonState<R>);
-
-    impl<R: Read + Seek + 'static> State for ZlibHeader<R> {
-        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            let zheader = ZHeader::read(&mut self.0.reader, self.0.endian)?;
-            Ok(Some((Record::ZHeader(zheader), self)))
-        }
-    }
-
-    struct ZlibTrailer<R: Read + Seek>(CommonState<R>, ZHeader);
-
-    impl<R: Read + Seek + 'static> State for ZlibTrailer<R> {
-        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            let retval = ZTrailer::read(
-                &mut self.0.reader,
-                self.0.endian,
-                self.1.ztrailer_offset,
-                self.1.ztrailer_len,
-            )?;
-            let next_state = Box::new(CompressedData::new(CommonState {
-                reader: ZlibDecodeMultiple::new(self.0.reader),
-                endian: self.0.endian,
-                bias: self.0.bias,
-                compression: self.0.compression,
-                var_types: self.0.var_types,
-            }));
-            match retval {
-                None => next_state.read(),
-                Some(ztrailer) => Ok(Some((Record::ZTrailer(ztrailer), next_state))),
-            }
+    pub fn opposite(self) -> VarType {
+        match self {
+            Self::Numeric => Self::String,
+            Self::String => Self::Numeric,
         }
     }
+}
 
-    struct Data<R: Read + Seek>(CommonState<R>);
-
-    impl<R: Read + Seek + 'static> State for Data<R> {
-        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            match Value::read_case(&mut self.0.reader, &self.0.var_types, self.0.endian)? {
-                None => Ok(None),
-                Some(values) => Ok(Some((Record::Case(values), self))),
-            }
+impl Display for VarType {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        match self {
+            VarType::Numeric => write!(f, "numeric"),
+            VarType::String => write!(f, "string"),
         }
     }
+}
 
-    struct CompressedData<R: Read + Seek> {
-        common: CommonState<R>,
-        codes: VecDeque<u8>,
-    }
+#[derive(Copy, Clone)]
+pub enum Value<S>
+where
+    S: Debug,
+{
+    Number(Option<f64>),
+    String(S),
+}
 
-    impl<R: Read + Seek + 'static> CompressedData<R> {
-        fn new(common: CommonState<R>) -> CompressedData<R> {
-            CompressedData {
-                common,
-                codes: VecDeque::new(),
-            }
-        }
-    }
+type RawValue = Value<RawStr<8>>;
 
-    impl<R: Read + Seek + 'static> State for CompressedData<R> {
-        fn read(mut self: Box<Self>) -> Result<Option<(Record, Box<dyn State>)>, Error> {
-            match Value::read_compressed_case(
-                &mut self.common.reader,
-                &self.common.var_types,
-                &mut self.codes,
-                self.common.endian,
-                self.common.bias,
-            )? {
-                None => Ok(None),
-                Some(values) => Ok(Some((Record::Case(values), self))),
-            }
+impl<S> Debug for Value<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        match self {
+            Value::Number(Some(number)) => write!(f, "{number:?}"),
+            Value::Number(None) => write!(f, "SYSMIS"),
+            Value::String(s) => write!(f, "{:?}", s),
         }
     }
 }
 
-#[derive(Copy, Clone)]
-pub enum Value {
-    Number(Option<f64>),
-    String([u8; 8]),
-}
+impl RawValue {
+    fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Self, IoError> {
+        Ok(Self::from_raw(
+            &UntypedValue(read_bytes(r)?),
+            var_type,
+            endian,
+        ))
+    }
 
-impl Value {
-    pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
+    pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
         match var_type {
-            VarType::String => Value::String(raw),
-            VarType::Number => {
-                let number: f64 = endian.parse(raw);
+            VarType::String => Value::String(RawStr(raw.0)),
+            VarType::Numeric => {
+                let number: f64 = endian.parse(raw.0);
                 Value::Number((number != -f64::MAX).then_some(number))
             }
         }
@@ -351,7 +735,7 @@ impl Value {
         reader: &mut R,
         var_types: &[VarType],
         endian: Endian,
-    ) -> Result<Option<Vec<Value>>, Error> {
+    ) -> Result<Option<Vec<Self>>, Error> {
         let case_start = reader.stream_position()?;
         let mut values = Vec::with_capacity(var_types.len());
         for (i, &var_type) in var_types.iter().enumerate() {
@@ -367,7 +751,7 @@ impl Value {
                     });
                 }
             };
-            values.push(Value::from_raw(var_type, raw, endian));
+            values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
         }
         Ok(Some(values))
     }
@@ -378,7 +762,7 @@ impl Value {
         codes: &mut VecDeque<u8>,
         endian: Endian,
         bias: f64,
-    ) -> Result<Option<Vec<Value>>, Error> {
+    ) -> Result<Option<Vec<Self>>, Error> {
         let case_start = reader.stream_position()?;
         let mut values = Vec::with_capacity(var_types.len());
         for (i, &var_type) in var_types.iter().enumerate() {
@@ -401,9 +785,9 @@ impl Value {
                 match code {
                     0 => (),
                     1..=251 => match var_type {
-                        VarType::Number => break Value::Number(Some(code as f64 - bias)),
+                        VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
                         VarType::String => {
-                            break Value::String(endian.to_bytes(code as f64 - bias))
+                            break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
                         }
                     },
                     252 => {
@@ -417,10 +801,12 @@ impl Value {
                             });
                         }
                     }
-                    253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
+                    253 => {
+                        break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
+                    }
                     254 => match var_type {
-                        VarType::String => break Value::String(*b"        "), // XXX EBCDIC
-                        VarType::Number => {
+                        VarType::String => break Self::String(RawStr(*b"        ")), // XXX EBCDIC
+                        VarType::Numeric => {
                             return Err(Error::CompressedStringExpected {
                                 offset: case_start,
                                 case_ofs: reader.stream_position()? - case_start,
@@ -428,7 +814,7 @@ impl Value {
                         }
                     },
                     255 => match var_type {
-                        VarType::Number => break Value::Number(None),
+                        VarType::Numeric => break Self::Number(None),
                         VarType::String => {
                             return Err(Error::CompressedNumberExpected {
                                 offset: case_start,
@@ -442,6 +828,13 @@ impl Value {
         }
         Ok(Some(values))
     }
+
+    fn decode(self, decoder: &Decoder) -> Value<String> {
+        match self {
+            Self::Number(x) => Value::Number(x),
+            Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
+        }
+    }
 }
 
 struct ZlibDecodeMultiple<R>
@@ -488,297 +881,826 @@ where
     }
 }
 
-pub struct Reader {
-    state: Option<Box<dyn State>>,
+enum ReaderState {
+    Start,
+    Headers,
+    ZlibHeader,
+    ZlibTrailer {
+        ztrailer_offset: u64,
+        ztrailer_len: u64,
+    },
+    Cases,
+    End,
+}
+
+pub struct Reader<R>
+where
+    R: Read + Seek + 'static,
+{
+    reader: Option<R>,
+    warn: Box<dyn Fn(Warning)>,
+
+    header: HeaderRecord<RawString>,
+    var_types: Vec<VarType>,
+
+    state: ReaderState,
 }
 
-impl Reader {
-    pub fn new<R: Read + Seek + 'static>(reader: R) -> Result<Reader, Error> {
-        Ok(Reader {
-            state: Some(state::new(reader)),
+impl<R> Reader<R>
+where
+    R: Read + Seek + 'static,
+{
+    pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
+    where
+        F: Fn(Warning) + 'static,
+    {
+        let header = HeaderRecord::read(&mut reader)?;
+        Ok(Self {
+            reader: Some(reader),
+            warn: Box::new(warn),
+            header,
+            var_types: Vec::new(),
+            state: ReaderState::Start,
         })
     }
+    fn cases(&mut self) -> Cases {
+        self.state = ReaderState::End;
+        Cases::new(
+            self.reader.take().unwrap(),
+            take(&mut self.var_types),
+            &self.header,
+        )
+    }
+    fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
+        match self.state {
+            ReaderState::Start => {
+                self.state = ReaderState::Headers;
+                Some(Ok(Record::Header(self.header.clone())))
+            }
+            ReaderState::Headers => {
+                let record = loop {
+                    match Record::read(
+                        self.reader.as_mut().unwrap(),
+                        self.header.endian,
+                        self.var_types.as_slice(),
+                        &self.warn,
+                    ) {
+                        Ok(Some(record)) => break record,
+                        Ok(None) => (),
+                        Err(error) => return Some(Err(error)),
+                    }
+                };
+                match record {
+                    Record::Variable(VariableRecord { width, .. }) => {
+                        self.var_types.push(if width == 0 {
+                            VarType::Numeric
+                        } else {
+                            VarType::String
+                        });
+                    }
+                    Record::EndOfHeaders(_) => {
+                        self.state = if let Some(Compression::ZLib) = self.header.compression {
+                            ReaderState::ZlibHeader
+                        } else {
+                            ReaderState::Cases
+                        };
+                    }
+                    _ => (),
+                };
+                Some(Ok(record))
+            }
+            ReaderState::ZlibHeader => {
+                let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
+                {
+                    Ok(zheader) => zheader,
+                    Err(error) => return Some(Err(error)),
+                };
+                self.state = ReaderState::ZlibTrailer {
+                    ztrailer_offset: zheader.ztrailer_offset,
+                    ztrailer_len: zheader.ztrailer_len,
+                };
+                Some(Ok(Record::ZHeader(zheader)))
+            }
+            ReaderState::ZlibTrailer {
+                ztrailer_offset,
+                ztrailer_len,
+            } => {
+                match ZTrailer::read(
+                    self.reader.as_mut().unwrap(),
+                    self.header.endian,
+                    ztrailer_offset,
+                    ztrailer_len,
+                ) {
+                    Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
+                    Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
+                    Err(error) => Some(Err(error)),
+                }
+            }
+            ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
+            ReaderState::End => None,
+        }
+    }
 }
 
-impl Iterator for Reader {
+impl<R> Iterator for Reader<R>
+where
+    R: Read + Seek + 'static,
+{
     type Item = Result<Record, Error>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        match self.state.take()?.read() {
-            Ok(Some((record, next_state))) => {
-                self.state = Some(next_state);
-                Some(Ok(record))
-            }
-            Ok(None) => None,
-            Err(error) => Some(Err(error)),
+        let retval = self._next();
+        if matches!(retval, Some(Err(_))) {
+            self.state = ReaderState::End;
         }
+        retval
     }
 }
 
-impl FusedIterator for Reader {}
-
-pub struct Variable {
-    /// Offset from the start of the file to the start of the record.
-    pub offset: u64,
-
-    /// Variable width, in the range -1..=255.
-    pub width: i32,
+trait ReadSeek: Read + Seek {}
+impl<T> ReadSeek for T where T: Read + Seek {}
 
-    /// Variable name, padded on the right with spaces.
-    pub name: [u8; 8],
+pub struct Cases {
+    reader: Box<dyn ReadSeek>,
+    var_types: Vec<VarType>,
+    compression: Option<Compression>,
+    bias: f64,
+    endian: Endian,
+    codes: VecDeque<u8>,
+    eof: bool,
+}
 
-    /// Print format.
-    pub print_format: u32,
+impl Debug for Cases {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "Cases")
+    }
+}
 
-    /// Write format.
-    pub write_format: u32,
+impl Cases {
+    fn new<R>(reader: R, var_types: Vec<VarType>, header: &HeaderRecord<RawString>) -> Self
+    where
+        R: Read + Seek + 'static,
+    {
+        Self {
+            reader: if header.compression == Some(Compression::ZLib) {
+                Box::new(ZlibDecodeMultiple::new(reader))
+            } else {
+                Box::new(reader)
+            },
+            var_types,
+            compression: header.compression,
+            bias: header.bias,
+            endian: header.endian,
+            codes: VecDeque::with_capacity(8),
+            eof: false,
+        }
+    }
+}
 
-    /// Missing value code, one of -3, -2, 0, 1, 2, or 3.
-    pub missing_value_code: i32,
+impl Iterator for Cases {
+    type Item = Result<Vec<RawValue>, Error>;
 
-    /// Raw missing values, up to 3 of them.
-    pub missing: Vec<[u8; 8]>,
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.eof {
+            return None;
+        }
 
-    /// Optional variable label.
-    pub label: Option<Vec<u8>>,
+        let retval = if self.compression.is_some() {
+            Value::read_compressed_case(
+                &mut self.reader,
+                &self.var_types,
+                &mut self.codes,
+                self.endian,
+                self.bias,
+            )
+            .transpose()
+        } else {
+            Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
+        };
+        self.eof = matches!(retval, None | Some(Err(_)));
+        retval
+    }
 }
 
-impl Variable {
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Variable, Error> {
-        let offset = r.stream_position()?;
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Spec(pub u32);
+
+impl Debug for Spec {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let type_ = format_name(self.0 >> 16);
+        let w = (self.0 >> 8) & 0xff;
+        let d = self.0 & 0xff;
+        write!(f, "{:06x} ({type_}{w}.{d})", self.0)
+    }
+}
+
+fn format_name(type_: u32) -> Cow<'static, str> {
+    match type_ {
+        1 => "A",
+        2 => "AHEX",
+        3 => "COMMA",
+        4 => "DOLLAR",
+        5 => "F",
+        6 => "IB",
+        7 => "PIBHEX",
+        8 => "P",
+        9 => "PIB",
+        10 => "PK",
+        11 => "RB",
+        12 => "RBHEX",
+        15 => "Z",
+        16 => "N",
+        17 => "E",
+        20 => "DATE",
+        21 => "TIME",
+        22 => "DATETIME",
+        23 => "ADATE",
+        24 => "JDATE",
+        25 => "DTIME",
+        26 => "WKDAY",
+        27 => "MONTH",
+        28 => "MOYR",
+        29 => "QYR",
+        30 => "WKYR",
+        31 => "PCT",
+        32 => "DOT",
+        33 => "CCA",
+        34 => "CCB",
+        35 => "CCC",
+        36 => "CCD",
+        37 => "CCE",
+        38 => "EDATE",
+        39 => "SDATE",
+        40 => "MTIME",
+        41 => "YMDHMS",
+        _ => return format!("<unknown format {type_}>").into(),
+    }
+    .into()
+}
+
+#[derive(Clone)]
+pub struct MissingValues<S = String>
+where
+    S: Debug,
+{
+    /// Individual missing values, up to 3 of them.
+    pub values: Vec<Value<S>>,
+
+    /// Optional range of missing values.
+    pub range: Option<(Value<S>, Value<S>)>,
+}
+
+impl<S> Debug for MissingValues<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        for (i, value) in self.values.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{value:?}")?;
+        }
+
+        if let Some((low, high)) = &self.range {
+            if !self.values.is_empty() {
+                write!(f, ", ")?;
+            }
+            write!(f, "{low:?} THRU {high:?}")?;
+        }
+
+        if self.is_empty() {
+            write!(f, "none")?;
+        }
+
+        Ok(())
+    }
+}
+
+impl<S> MissingValues<S>
+where
+    S: Debug,
+{
+    fn is_empty(&self) -> bool {
+        self.values.is_empty() && self.range.is_none()
+    }
+}
+
+impl<S> Default for MissingValues<S>
+where
+    S: Debug,
+{
+    fn default() -> Self {
+        Self {
+            values: Vec::new(),
+            range: None,
+        }
+    }
+}
+
+impl MissingValues<RawStr<8>> {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        offset: u64,
+        width: i32,
+        code: i32,
+        endian: Endian,
+    ) -> Result<Self, Error> {
+        let (n_values, has_range) = match (width, code) {
+            (_, 0..=3) => (code, false),
+            (0, -2) => (0, true),
+            (0, -3) => (1, true),
+            (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
+            (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
+        };
+
+        let var_type = if width == 0 {
+            VarType::Numeric
+        } else {
+            VarType::String
+        };
+
+        let mut values = Vec::new();
+        for _ in 0..n_values {
+            values.push(RawValue::read(r, var_type, endian)?);
+        }
+        let range = if has_range {
+            let low = RawValue::read(r, var_type, endian)?;
+            let high = RawValue::read(r, var_type, endian)?;
+            Some((low, high))
+        } else {
+            None
+        };
+        Ok(Self { values, range })
+    }
+    fn decode(&self, decoder: &Decoder) -> MissingValues<String> {
+        MissingValues {
+            values: self
+                .values
+                .iter()
+                .map(|value| value.decode(decoder))
+                .collect(),
+            range: self
+                .range
+                .as_ref()
+                .map(|(low, high)| (low.decode(decoder), high.decode(decoder))),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct VariableRecord<S, V>
+where
+    S: Debug,
+    V: Debug,
+{
+    /// Range of offsets in file.
+    pub offsets: Range<u64>,
+
+    /// Variable width, in the range -1..=255.
+    pub width: i32,
+
+    /// Variable name, padded on the right with spaces.
+    pub name: S,
+
+    /// Print format.
+    pub print_format: Spec,
+
+    /// Write format.
+    pub write_format: Spec,
+
+    /// Missing values.
+    pub missing_values: MissingValues<V>,
+
+    /// Optional variable label.
+    pub label: Option<S>,
+}
+
+impl<S, V> Debug for VariableRecord<S, V>
+where
+    S: Debug,
+    V: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        writeln!(
+            f,
+            "Width: {} ({})",
+            self.width,
+            match self.width.cmp(&0) {
+                Ordering::Greater => "string",
+                Ordering::Equal => "numeric",
+                Ordering::Less => "long string continuation record",
+            }
+        )?;
+        writeln!(f, "Print format: {:?}", self.print_format)?;
+        writeln!(f, "Write format: {:?}", self.write_format)?;
+        writeln!(f, "Name: {:?}", &self.name)?;
+        writeln!(f, "Variable label: {:?}", self.label)?;
+        writeln!(f, "Missing values: {:?}", self.missing_values)
+    }
+}
+
+impl VariableRecord<RawString, RawStr<8>> {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+        let start_offset = r.stream_position()?;
         let width: i32 = endian.parse(read_bytes(r)?);
+        if !(-1..=255).contains(&width) {
+            return Err(Error::BadVariableWidth { start_offset, width });
+        }
+        let code_offset = r.stream_position()?;
         let has_variable_label: u32 = endian.parse(read_bytes(r)?);
         let missing_value_code: i32 = endian.parse(read_bytes(r)?);
-        let print_format: u32 = endian.parse(read_bytes(r)?);
-        let write_format: u32 = endian.parse(read_bytes(r)?);
-        let name: [u8; 8] = read_bytes(r)?;
+        let print_format = Spec(endian.parse(read_bytes(r)?));
+        let write_format = Spec(endian.parse(read_bytes(r)?));
+        let name = RawString(read_vec(r, 8)?);
 
         let label = match has_variable_label {
             0 => None,
             1 => {
                 let len: u32 = endian.parse(read_bytes(r)?);
                 let read_len = len.min(65535) as usize;
-                let label = Some(read_vec(r, read_len)?);
+                let label = RawString(read_vec(r, read_len)?);
 
                 let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
                 let _ = read_vec(r, padding_bytes as usize)?;
 
-                label
+                Some(label)
             }
             _ => {
                 return Err(Error::BadVariableLabelCode {
-                    offset,
+                    start_offset,
+                    code_offset,
                     code: has_variable_label,
                 })
             }
         };
 
-        let mut missing = Vec::new();
-        if missing_value_code != 0 {
-            match (width, missing_value_code) {
-                (0, -3 | -2 | 1 | 2 | 3) => (),
-                (0, _) => {
-                    return Err(Error::BadNumericMissingValueCode {
-                        offset,
-                        code: missing_value_code,
-                    })
-                }
-                (_, 0..=3) => (),
-                (_, _) => {
-                    return Err(Error::BadStringMissingValueCode {
-                        offset,
-                        code: missing_value_code,
-                    })
-                }
-            }
+        let missing_values =
+            MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
 
-            for _ in 0..missing_value_code.abs() {
-                missing.push(read_bytes(r)?);
-            }
-        }
+        let end_offset = r.stream_position()?;
 
-        Ok(Variable {
-            offset,
+        Ok(Record::Variable(VariableRecord {
+            offsets: start_offset..end_offset,
             width,
             name,
             print_format,
             write_format,
-            missing_value_code,
-            missing,
+            missing_values,
             label,
+        }))
+    }
+
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        DecodedRecord::Variable(VariableRecord {
+            offsets: self.offsets.clone(),
+            width: self.width,
+            name: decoder.decode(&self.name).to_string(),
+            print_format: self.print_format,
+            write_format: self.write_format,
+            missing_values: self.missing_values.decode(decoder),
+            label: self
+                .label
+                .as_ref()
+                .map(|label| decoder.decode(label).to_string()),
         })
     }
 }
 
-pub struct ValueLabel {
-    /// Offset from the start of the file to the start of the record.
-    pub offset: u64,
+#[derive(Copy, Clone)]
+pub struct UntypedValue(pub [u8; 8]);
+
+impl Debug for UntypedValue {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let little: f64 = Endian::Little.parse(self.0);
+        let little = format!("{:?}", little);
+        let big: f64 = Endian::Big.parse(self.0);
+        let big = format!("{:?}", big);
+        let number = if little.len() <= big.len() {
+            little
+        } else {
+            big
+        };
+        write!(f, "{number}")?;
+
+        let string = default_decode(&self.0);
+        let string = string
+            .split(|c: char| c == '\0' || c.is_control())
+            .next()
+            .unwrap();
+        write!(f, "{string:?}")?;
+        Ok(())
+    }
+}
+
+#[derive(Clone)]
+pub struct RawString(pub Vec<u8>);
+
+impl From<Vec<u8>> for RawString {
+    fn from(source: Vec<u8>) -> Self {
+        Self(source)
+    }
+}
+
+impl From<&[u8]> for RawString {
+    fn from(source: &[u8]) -> Self {
+        Self(source.into())
+    }
+}
+
+impl Debug for RawString {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{:?}", default_decode(self.0.as_slice()))
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct RawStr<const N: usize>(pub [u8; N]);
+
+impl<const N: usize> From<[u8; N]> for RawStr<N> {
+    fn from(source: [u8; N]) -> Self {
+        Self(source)
+    }
+}
+
+impl<const N: usize> Debug for RawStr<N> {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{:?}", default_decode(&self.0))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ValueLabel<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    pub value: Value<V>,
+    pub label: S,
+}
+
+#[derive(Clone)]
+pub struct ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    /// Range of offsets in file.
+    pub offsets: Range<u64>,
 
     /// The labels.
-    pub labels: Vec<([u8; 8], Vec<u8>)>,
+    pub labels: Vec<ValueLabel<V, S>>,
+
+    /// The 1-based indexes of the variable indexes.
+    pub dict_indexes: Vec<u32>,
+
+    /// The types of the variables.
+    pub var_type: VarType,
 }
 
-impl ValueLabel {
+impl<V, S> Debug for ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        writeln!(f, "labels: ")?;
+        for label in self.labels.iter() {
+            writeln!(f, "{label:?}")?;
+        }
+        write!(f, "apply to {} variables", self.var_type)?;
+        for dict_index in self.dict_indexes.iter() {
+            write!(f, " #{dict_index}")?;
+        }
+        Ok(())
+    }
+}
+
+impl<V, S> Header for ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    fn offsets(&self) -> Range<u64> {
+        self.offsets.clone()
+    }
+}
+
+impl<V, S> ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
     /// Maximum number of value labels in a record.
-    pub const MAX: u32 = u32::MAX / 8;
+    pub const MAX_LABELS: u32 = u32::MAX / 8;
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ValueLabel, Error> {
-        let offset = r.stream_position()?;
+    /// Maximum number of variable indexes in a record.
+    pub const MAX_INDEXES: u32 = u32::MAX / 8;
+}
+
+impl ValueLabelRecord<RawStr<8>, RawString> {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        var_types: &[VarType],
+        warn: &dyn Fn(Warning),
+    ) -> Result<Option<Record>, Error> {
+        let label_offset = r.stream_position()?;
         let n: u32 = endian.parse(read_bytes(r)?);
-        if n > ValueLabel::MAX {
+        if n > Self::MAX_LABELS {
             return Err(Error::BadNumberOfValueLabels {
-                offset,
+                offset: label_offset,
                 n,
-                max: ValueLabel::MAX,
+                max: Self::MAX_LABELS,
             });
         }
 
         let mut labels = Vec::new();
         for _ in 0..n {
-            let value: [u8; 8] = read_bytes(r)?;
+            let value = UntypedValue(read_bytes(r)?);
             let label_len: u8 = endian.parse(read_bytes(r)?);
             let label_len = label_len as usize;
             let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
 
-            let mut label = read_vec(r, padded_len)?;
+            let mut label = read_vec(r, padded_len - 1)?;
             label.truncate(label_len);
-            labels.push((value, label));
+            labels.push((value, RawString(label)));
         }
-        Ok(ValueLabel { offset, labels })
-    }
-}
 
-pub struct VarIndexes {
-    /// Offset from the start of the file to the start of the record.
-    pub offset: u64,
-
-    /// The 0-based indexes of the variable indexes.
-    pub var_indexes: Vec<u32>,
-}
-
-impl VarIndexes {
-    /// Maximum number of variable indexes in a record.
-    pub const MAX: u32 = u32::MAX / 8;
+        let index_offset = r.stream_position()?;
+        let rec_type: u32 = endian.parse(read_bytes(r)?);
+        if rec_type != 4 {
+            return Err(Error::ExpectedVarIndexRecord {
+                offset: index_offset,
+                rec_type,
+            });
+        }
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VarIndexes, Error> {
-        let offset = r.stream_position()?;
         let n: u32 = endian.parse(read_bytes(r)?);
-        if n > VarIndexes::MAX {
-            return Err(Error::BadNumberOfVarIndexes {
-                offset,
+        if n > Self::MAX_INDEXES {
+            return Err(Error::TooManyVarIndexes {
+                offset: index_offset,
                 n,
-                max: VarIndexes::MAX,
+                max: Self::MAX_INDEXES,
             });
         }
-        let mut var_indexes = Vec::with_capacity(n as usize);
+
+        let index_offset = r.stream_position()?;
+        let mut dict_indexes = Vec::with_capacity(n as usize);
+        let mut invalid_indexes = Vec::new();
         for _ in 0..n {
-            var_indexes.push(endian.parse(read_bytes(r)?));
+            let index: u32 = endian.parse(read_bytes(r)?);
+            if index == 0 || index as usize > var_types.len() {
+                dict_indexes.push(index);
+            } else {
+                invalid_indexes.push(index);
+            }
+        }
+        if !invalid_indexes.is_empty() {
+            warn(Warning::InvalidVarIndexes {
+                offset: index_offset,
+                max: var_types.len(),
+                invalid: invalid_indexes,
+            });
         }
 
-        Ok(VarIndexes {
-            offset,
-            var_indexes,
-        })
+        let Some(&first_index) = dict_indexes.first() else {
+            warn(Warning::NoVarIndexes {
+                offset: index_offset,
+            });
+            return Ok(None);
+        };
+        let var_type = var_types[first_index as usize - 1];
+        let mut wrong_type_indexes = Vec::new();
+        dict_indexes.retain(|&index| {
+            if var_types[index as usize - 1] != var_type {
+                wrong_type_indexes.push(index);
+                false
+            } else {
+                true
+            }
+        });
+        if !wrong_type_indexes.is_empty() {
+            warn(Warning::MixedVarTypes {
+                offset: index_offset,
+                var_type,
+                wrong_types: wrong_type_indexes,
+            });
+        }
+
+        let labels = labels
+            .into_iter()
+            .map(|(value, label)| ValueLabel {
+                value: Value::from_raw(&value, var_type, endian),
+                label,
+            })
+            .collect();
+
+        let end_offset = r.stream_position()?;
+        Ok(Some(Record::ValueLabel(ValueLabelRecord {
+            offsets: label_offset..end_offset,
+            labels,
+            dict_indexes,
+            var_type,
+        })))
+    }
+
+    fn decode(self, decoder: &Decoder) -> ValueLabelRecord<RawStr<8>, String> {
+        let labels = self
+            .labels
+            .iter()
+            .map(|ValueLabel { value, label }| ValueLabel {
+                value: *value,
+                label: decoder.decode(label).to_string(),
+            })
+            .collect();
+        ValueLabelRecord {
+            offsets: self.offsets.clone(),
+            labels,
+            dict_indexes: self.dict_indexes.clone(),
+            var_type: self.var_type,
+        }
     }
 }
 
-pub struct Document {
-    /// Offset from the start of the file to the start of the record.
-    pub pos: u64,
+#[derive(Clone, Debug)]
+pub struct DocumentRecord<S>
+where
+    S: Debug,
+{
+    pub offsets: Range<u64>,
 
-    /// The document, as an array of 80-byte lines.
-    pub lines: Vec<[u8; Document::LINE_LEN as usize]>,
+    /// The document, as an array of lines.  Raw lines are exactly 80 bytes long
+    /// and are right-padded with spaces without any new-line termination.
+    pub lines: Vec<S>,
 }
 
-impl Document {
-    /// Length of a line in a document.  Document lines are fixed-length and
-    /// padded on the right with spaces.
-    pub const LINE_LEN: u32 = 80;
+pub type RawDocumentLine = RawStr<DOC_LINE_LEN>;
+
+/// Length of a line in a document.  Document lines are fixed-length and
+/// padded on the right with spaces.
+pub const DOC_LINE_LEN: usize = 80;
 
+impl DocumentRecord<RawDocumentLine> {
     /// Maximum number of lines we will accept in a document.  This is simply
     /// the maximum number that will fit in a 32-bit space.
-    pub const MAX_LINES: u32 = i32::MAX as u32 / Self::LINE_LEN;
+    pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Document, Error> {
-        let offset = r.stream_position()?;
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+        let start_offset = r.stream_position()?;
         let n: u32 = endian.parse(read_bytes(r)?);
-        match n {
-            0..=Self::MAX_LINES => Ok(Document {
-                pos: r.stream_position()?,
-                lines: (0..n)
-                    .map(|_| read_bytes(r))
-                    .collect::<Result<Vec<_>, _>>()?,
-            }),
-            _ => Err(Error::BadDocumentLength {
-                offset,
+        let n = n as usize;
+        if n > Self::MAX_LINES {
+            Err(Error::BadDocumentLength {
+                offset: start_offset,
                 n,
                 max: Self::MAX_LINES,
-            }),
+            })
+        } else {
+            let mut lines = Vec::with_capacity(n);
+            for _ in 0..n {
+                lines.push(RawStr(read_bytes(r)?));
+            }
+            let end_offset = r.stream_position()?;
+            Ok(Record::Document(DocumentRecord {
+                offsets: start_offset..end_offset,
+                lines,
+            }))
         }
     }
+
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        DecodedRecord::Document(DocumentRecord {
+            offsets: self.offsets.clone(),
+            lines: self
+                .lines
+                .iter()
+                .map(|s| decoder.decode_slice(&s.0).to_string())
+                .collect(),
+        })
+    }
 }
 
-/*
-#[derive(FromPrimitive)]
-enum ExtensionType {
-    /// Machine integer info.
-    Integer = 3,
-    /// Machine floating-point info.
-    Float = 4,
-    /// Variable sets.
-    VarSets = 5,
-    /// DATE.
-    Date = 6,
-    /// Multiple response sets.
-    Mrsets = 7,
-    /// SPSS Data Entry.
-    DataEntry = 8,
-    /// Extra product info text.
-    ProductInfo = 10,
-    /// Variable display parameters.
-    Display = 11,
-    /// Long variable names.
-    LongNames = 13,
-    /// Long strings.
-    LongStrings = 14,
-    /// Extended number of cases.
-    Ncases = 16,
-    /// Data file attributes.
-    FileAttrs = 17,
-    /// Variable attributes.
-    VarAttrs = 18,
-    /// Multiple response sets (extended).
-    Mrsets2 = 19,
-    /// Character encoding.
-    Encoding = 20,
-    /// Value labels for long strings.
-    LongLabels = 21,
-    /// Missing values for long strings.
-    LongMissing = 22,
-    /// "Format properties in dataview table".
-    Dataview = 24,
-}
- */
-
-trait TextRecord
+impl<S> Header for DocumentRecord<S>
 where
-    Self: Sized,
+    S: Debug,
 {
-    const NAME: &'static str;
-    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
+    fn offsets(&self) -> Range<u64> {
+        self.offsets.clone()
+    }
 }
 
-trait ExtensionRecord
-where
-    Self: Sized,
-{
+trait ExtensionRecord {
+    const SUBTYPE: u32;
     const SIZE: Option<u32>;
     const COUNT: Option<u32>;
     const NAME: &'static str;
-    fn parse(ext: &Extension, endian: Endian, warn: impl Fn(Error)) -> Result<Self, Error>;
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
 }
 
-pub struct IntegerInfo {
+#[derive(Clone, Debug)]
+pub struct IntegerInfoRecord {
+    pub offsets: Range<u64>,
     pub version: (i32, i32, i32),
     pub machine_code: i32,
     pub floating_point_rep: i32,
@@ -787,155 +1709,199 @@ pub struct IntegerInfo {
     pub character_code: i32,
 }
 
-impl ExtensionRecord for IntegerInfo {
+impl ExtensionRecord for IntegerInfoRecord {
+    const SUBTYPE: u32 = 3;
     const SIZE: Option<u32> = Some(4);
     const COUNT: Option<u32> = Some(8);
     const NAME: &'static str = "integer record";
 
-    fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
         ext.check_size::<Self>()?;
 
         let mut input = &ext.data[..];
         let data: Vec<i32> = (0..8)
             .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
             .collect();
-        Ok(IntegerInfo {
+        Ok(Record::IntegerInfo(IntegerInfoRecord {
+            offsets: ext.offsets.clone(),
             version: (data[0], data[1], data[2]),
             machine_code: data[3],
             floating_point_rep: data[4],
             compression_code: data[5],
             endianness: data[6],
             character_code: data[7],
-        })
+        }))
     }
 }
 
-pub struct FloatInfo {
+#[derive(Clone, Debug)]
+pub struct FloatInfoRecord {
     pub sysmis: f64,
     pub highest: f64,
     pub lowest: f64,
 }
 
-impl ExtensionRecord for FloatInfo {
+impl ExtensionRecord for FloatInfoRecord {
+    const SUBTYPE: u32 = 4;
     const SIZE: Option<u32> = Some(8);
     const COUNT: Option<u32> = Some(3);
     const NAME: &'static str = "floating point record";
 
-    fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
         ext.check_size::<Self>()?;
 
         let mut input = &ext.data[..];
         let data: Vec<f64> = (0..3)
             .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
             .collect();
-        Ok(FloatInfo {
+        Ok(Record::FloatInfo(FloatInfoRecord {
             sysmis: data[0],
             highest: data[1],
             lowest: data[2],
-        })
+        }))
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum CategoryLabels {
     VarLabels,
     CountedValues,
 }
+
+#[derive(Clone, Debug)]
 pub enum MultipleResponseType {
     MultipleDichotomy {
-        value: Vec<u8>,
+        value: RawString,
         labels: CategoryLabels,
     },
     MultipleCategory,
 }
-pub struct MultipleResponseSet {
-    pub name: Vec<u8>,
-    pub label: Vec<u8>,
-    pub mr_type: MultipleResponseType,
-    pub vars: Vec<Vec<u8>>,
-}
 
-impl MultipleResponseSet {
-    fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
-        let Some(equals) = input.iter().position(|&b| b == b'=') else {
-            return Err(Error::TBD);
-        };
-        let (name, input) = input.split_at(equals);
-        let (mr_type, input) = match input.get(0) {
-            Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]),
-            Some(b'D') => {
-                let (value, input) = parse_counted_string(&input[1..])?;
+impl MultipleResponseType {
+    fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
+        let (mr_type, input) = match input.split_first() {
+            Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
+            Some((b'D', input)) => {
+                let (value, input) = parse_counted_string(input)?;
                 (
                     MultipleResponseType::MultipleDichotomy {
-                        value: value.into(),
+                        value,
                         labels: CategoryLabels::VarLabels,
                     },
                     input,
                 )
             }
-            Some(b'E') => {
-                let Some(b' ') = input.get(1) else {
-                    return Err(Error::TBD);
-                };
-                let input = &input[2..];
+            Some((b'E', input)) => {
                 let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
                     (CategoryLabels::CountedValues, rest)
                 } else if let Some(rest) = input.strip_prefix(b" 11 ") {
                     (CategoryLabels::VarLabels, rest)
                 } else {
-                    return Err(Error::TBD);
+                    return Err(Warning::TBD);
                 };
                 let (value, input) = parse_counted_string(input)?;
                 (
-                    MultipleResponseType::MultipleDichotomy {
-                        value: value.into(),
-                        labels,
-                    },
+                    MultipleResponseType::MultipleDichotomy { value, labels },
                     input,
                 )
             }
-            _ => return Err(Error::TBD),
+            _ => return Err(Warning::TBD),
         };
-        let Some(b' ') = input.get(0) else {
-            return Err(Error::TBD);
+        Ok((mr_type, input))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet<I, S>
+where
+    I: Debug,
+    S: Debug,
+{
+    pub name: I,
+    pub label: S,
+    pub mr_type: MultipleResponseType,
+    pub short_names: Vec<I>,
+}
+
+impl MultipleResponseSet<RawString, RawString> {
+    fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
+        let Some(equals) = input.iter().position(|&b| b == b'=') else {
+            return Err(Warning::TBD);
+        };
+        let (name, input) = input.split_at(equals);
+        let (mr_type, input) = MultipleResponseType::parse(input)?;
+        let Some(input) = input.strip_prefix(b" ") else {
+            return Err(Warning::TBD);
         };
-        let (label, mut input) = parse_counted_string(&input[1..])?;
+        let (label, mut input) = parse_counted_string(input)?;
         let mut vars = Vec::new();
-        while input.get(0) == Some(&b' ') {
-            input = &input[1..];
-            let Some(length) = input.iter().position(|b| b" \n".contains(b)) else {
-                return Err(Error::TBD);
-            };
-            if length > 0 {
-                vars.push(input[..length].into());
+        while input.first() != Some(&b'\n') {
+            match input.split_first() {
+                Some((b' ', rest)) => {
+                    let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
+                        return Err(Warning::TBD);
+                    };
+                    let (var, rest) = rest.split_at(length);
+                    if !var.is_empty() {
+                        vars.push(var.into());
+                    }
+                    input = rest;
+                }
+                _ => return Err(Warning::TBD),
             }
-            input = &input[length..];
-        }
-        if input.get(0) != Some(&b'\n') {
-            return Err(Error::TBD);
         }
-        while input.get(0) == Some(&b'\n') {
+        while input.first() == Some(&b'\n') {
             input = &input[1..];
         }
         Ok((
             MultipleResponseSet {
                 name: name.into(),
-                label: label.into(),
+                label,
                 mr_type,
-                vars,
+                short_names: vars,
             },
             input,
         ))
     }
+
+    fn decode(
+        &self,
+        decoder: &Decoder,
+    ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
+        let mut short_names = Vec::with_capacity(self.short_names.len());
+        for short_name in self.short_names.iter() {
+            if let Some(short_name) = decoder
+                .decode_identifier(short_name)
+                .map_err(Warning::InvalidMrSetName)
+                .issue_warning(&decoder.warn)
+            {
+                short_names.push(short_name);
+            }
+        }
+        Ok(MultipleResponseSet {
+            name: decoder
+                .decode_identifier(&self.name)
+                .map_err(Warning::InvalidMrSetVariableName)?,
+            label: decoder.decode(&self.label).to_string(),
+            mr_type: self.mr_type.clone(),
+            short_names,
+        })
+    }
 }
 
-pub struct MultipleResponseSets(Vec<MultipleResponseSet>);
+#[derive(Clone, Debug)]
+pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
+where
+    I: Debug,
+    S: Debug;
 
-impl ExtensionRecord for MultipleResponseSets {
+impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
+    const SUBTYPE: u32 = 7;
     const SIZE: Option<u32> = Some(1);
     const COUNT: Option<u32> = None;
     const NAME: &'static str = "multiple response set record";
 
-    fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
         ext.check_size::<Self>()?;
 
         let mut input = &ext.data[..];
@@ -945,209 +1911,188 @@ impl ExtensionRecord for MultipleResponseSets {
             sets.push(set);
             input = rest;
         }
-        Ok(MultipleResponseSets(sets))
+        Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
     }
 }
 
-fn parse_counted_string(input: &[u8]) -> Result<(&[u8], &[u8]), Error> {
+impl MultipleResponseRecord<RawString, RawString> {
+    fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        let mut sets = Vec::new();
+        for set in self.0.iter() {
+            if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
+                sets.push(set);
+            }
+        }
+        DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
+    }
+}
+
+fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
     let Some(space) = input.iter().position(|&b| b == b' ') else {
-        return Err(Error::TBD);
+        return Err(Warning::TBD);
     };
     let Ok(length) = from_utf8(&input[..space]) else {
-        return Err(Error::TBD);
+        return Err(Warning::TBD);
     };
     let Ok(length): Result<usize, _> = length.parse() else {
-        return Err(Error::TBD);
+        return Err(Warning::TBD);
     };
 
     let input = &input[space + 1..];
     if input.len() < length {
-        return Err(Error::TBD);
+        return Err(Warning::TBD);
     };
 
     let (string, rest) = input.split_at(length);
-    Ok((string, rest))
-}
-
-pub struct ExtraProductInfo(String);
-
-impl TextRecord for ExtraProductInfo {
-    const NAME: &'static str = "extra product info";
-    fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
-        Ok(ExtraProductInfo(input.into()))
-    }
-}
-
-pub struct VarDisplayRecord(Vec<u32>);
-
-impl ExtensionRecord for VarDisplayRecord {
-    const SIZE: Option<u32> = Some(4);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "variable display record";
-
-    fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let display = (0..ext.count)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(VarDisplayRecord(display))
-    }
-}
-
-pub struct VariableSet {
-    pub name: String,
-    pub vars: Vec<String>,
-}
-
-impl VariableSet {
-    fn parse(input: &str) -> Result<Self, Error> {
-        let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
-        let vars = input.split_ascii_whitespace().map(String::from).collect();
-        Ok(VariableSet {
-            name: name.into(),
-            vars,
-        })
-    }
-}
-
-pub struct VariableSetRecord(Vec<VariableSet>);
-
-impl TextRecord for VariableSetRecord {
-    const NAME: &'static str = "variable set";
-    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
-        let mut sets = Vec::new();
-        for line in input.lines() {
-            match VariableSet::parse(line) {
-                Ok(set) => sets.push(set),
-                Err(error) => warn(error),
-            }
-        }
-        Ok(VariableSetRecord(sets))
-    }
+    Ok((string.into(), rest))
 }
 
-pub struct LongVariableName {
-    pub short_name: String,
-    pub long_name: String,
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Measure {
+    Nominal,
+    Ordinal,
+    Scale,
 }
 
-pub struct LongVariableNameRecord(Vec<LongVariableName>);
+impl Measure {
+    pub fn default_for_type(var_type: VarType) -> Option<Measure> {
+        match var_type {
+            VarType::Numeric => None,
+            VarType::String => Some(Self::Nominal),
+        }
+    }
 
-impl TextRecord for LongVariableNameRecord {
-    const NAME: &'static str = "long variable names";
-    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
-        let mut names = Vec::new();
-        for pair in input.split('\t').filter(|s| !s.is_empty()) {
-            if let Some((short_name, long_name)) = pair.split_once('=') {
-                let name = LongVariableName {
-                    short_name: short_name.into(),
-                    long_name: long_name.into(),
-                };
-                names.push(name);
-            } else {
-                warn(Error::TBD)
-            }
+    fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Measure::Nominal)),
+            2 => Ok(Some(Measure::Ordinal)),
+            3 => Ok(Some(Measure::Scale)),
+            _ => Err(Warning::InvalidMeasurement(source)),
         }
-        Ok(LongVariableNameRecord(names))
     }
 }
 
-pub struct VeryLongString {
-    pub short_name: String,
-    pub length: usize,
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Alignment {
+    Left,
+    Right,
+    Center,
 }
 
-impl VeryLongString {
-    fn parse(input: &str) -> Result<VeryLongString, Error> {
-        let Some((short_name, length)) = input.split_once('=') else {
-            return Err(Error::TBD);
-        };
-        let length: usize = length.parse().map_err(|_| Error::TBD)?;
-        Ok(VeryLongString {
-            short_name: short_name.into(),
-            length,
-        })
+impl Alignment {
+    fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Alignment::Left)),
+            2 => Ok(Some(Alignment::Right)),
+            3 => Ok(Some(Alignment::Center)),
+            _ => Err(Warning::InvalidAlignment(source)),
+        }
     }
-}
-
-pub struct VeryLongStringRecord(Vec<VeryLongString>);
 
-impl TextRecord for VeryLongStringRecord {
-    const NAME: &'static str = "very long strings";
-    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
-        let mut very_long_strings = Vec::new();
-        for tuple in input
-            .split('\0')
-            .map(|s| s.trim_end_matches('\t'))
-            .filter(|s| !s.is_empty())
-        {
-            match VeryLongString::parse(tuple) {
-                Ok(vls) => very_long_strings.push(vls),
-                Err(error) => warn(error),
-            }
+    pub fn default_for_type(var_type: VarType) -> Self {
+        match var_type {
+            VarType::Numeric => Self::Right,
+            VarType::String => Self::Left,
         }
-        Ok(VeryLongStringRecord(very_long_strings))
     }
 }
 
-pub struct LongStringValueLabels {
-    pub var_name: Vec<u8>,
-    pub width: u32,
-
-    /// `(value, label)` pairs, where each value is `width` bytes.
-    pub labels: Vec<(Vec<u8>, Vec<u8>)>,
+#[derive(Clone, Debug)]
+pub struct VarDisplay {
+    pub measure: Option<Measure>,
+    pub width: Option<u32>,
+    pub alignment: Option<Alignment>,
 }
 
-pub struct LongStringValueLabelSet(Vec<LongStringValueLabels>);
+#[derive(Clone, Debug)]
+pub struct VarDisplayRecord(pub Vec<VarDisplay>);
 
-impl ExtensionRecord for LongStringValueLabelSet {
-    const SIZE: Option<u32> = Some(1);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "long string value labels record";
+impl VarDisplayRecord {
+    const SUBTYPE: u32 = 11;
 
-    fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
-        ext.check_size::<Self>()?;
+    fn parse(
+        ext: &Extension,
+        n_vars: usize,
+        endian: Endian,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Record, Warning> {
+        if ext.size != 4 {
+            return Err(Warning::BadRecordSize {
+                offset: ext.offsets.start,
+                record: String::from("variable display record"),
+                size: ext.size,
+                expected_size: 4,
+            });
+        }
+
+        let has_width = if ext.count as usize == 3 * n_vars {
+            true
+        } else if ext.count as usize == 2 * n_vars {
+            false
+        } else {
+            return Err(Warning::TBD);
+        };
 
+        let mut var_displays = Vec::new();
         let mut input = &ext.data[..];
-        let mut label_set = Vec::new();
-        while !input.is_empty() {
-            let var_name = read_string(&mut input, endian)?;
-            let width: u32 = endian.parse(read_bytes(&mut input)?);
-            let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
-            let mut labels = Vec::new();
-            for _ in 0..n_labels {
-                let value = read_string(&mut input, endian)?;
-                let label = read_string(&mut input, endian)?;
-                labels.push((value, label));
-            }
-            label_set.push(LongStringValueLabels {
-                var_name,
+        for _ in 0..n_vars {
+            let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .issue_warning(&warn)
+                .flatten();
+            let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
+            let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .issue_warning(&warn)
+                .flatten();
+            var_displays.push(VarDisplay {
+                measure,
                 width,
-                labels,
-            })
+                alignment,
+            });
         }
-        Ok(LongStringValueLabelSet(label_set))
+        Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
     }
 }
 
-pub struct LongStringMissingValues {
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValues<N, V>
+where
+    N: Debug,
+    V: Debug,
+{
     /// Variable name.
-    pub var_name: Vec<u8>,
+    pub var_name: N,
+
+    /// Missing values.
+    pub missing_values: MissingValues<V>,
+}
 
-    /// Up to three missing values.
-    pub missing_values: Vec<[u8; 8]>,
+impl LongStringMissingValues<RawString, RawStr<8>> {
+    fn decode(
+        &self,
+        decoder: &Decoder,
+    ) -> Result<LongStringMissingValues<Identifier, String>, IdError> {
+        Ok(LongStringMissingValues {
+            var_name: decoder.decode_identifier(&self.var_name)?,
+            missing_values: self.missing_values.decode(decoder),
+        })
+    }
 }
 
-pub struct LongStringMissingValueSet(Vec<LongStringMissingValues>);
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValueRecord<N, V>(pub Vec<LongStringMissingValues<N, V>>)
+where
+    N: Debug,
+    V: Debug;
 
-impl ExtensionRecord for LongStringMissingValueSet {
+impl ExtensionRecord for LongStringMissingValueRecord<RawString, RawStr<8>> {
+    const SUBTYPE: u32 = 22;
     const SIZE: Option<u32> = Some(1);
     const COUNT: Option<u32> = None;
     const NAME: &'static str = "long string missing values record";
 
-    fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
         ext.check_size::<Self>()?;
 
         let mut input = &ext.data[..];
@@ -1157,14 +2102,14 @@ impl ExtensionRecord for LongStringMissingValueSet {
             let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
             let value_len: u32 = endian.parse(read_bytes(&mut input)?);
             if value_len != 8 {
-                let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offset;
-                return Err(Error::BadLongMissingValueLength {
-                    record_offset: ext.offset,
+                let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
+                return Err(Warning::BadLongMissingValueLength {
+                    record_offset: ext.offsets.start,
                     offset,
                     value_len,
                 });
             }
-            let mut missing_values = Vec::new();
+            let mut values = Vec::new();
             for i in 0..n_missing_values {
                 let value: [u8; 8] = read_bytes(&mut input)?;
                 let numeric_value: u64 = endian.parse(value);
@@ -1176,47 +2121,195 @@ impl ExtensionRecord for LongStringMissingValueSet {
                 } else {
                     value
                 };
-                missing_values.push(value);
+                values.push(Value::String(RawStr(value)));
             }
+            let missing_values = MissingValues {
+                values,
+                range: None,
+            };
             missing_value_set.push(LongStringMissingValues {
                 var_name,
                 missing_values,
             });
         }
-        Ok(LongStringMissingValueSet(missing_value_set))
+        Ok(Record::LongStringMissingValues(
+            LongStringMissingValueRecord(missing_value_set),
+        ))
+    }
+}
+
+impl LongStringMissingValueRecord<RawString, RawStr<8>> {
+    pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier, String> {
+        let mut mvs = Vec::with_capacity(self.0.len());
+        for mv in self.0.iter() {
+            if let Some(mv) = mv
+                .decode(decoder)
+                .map_err(Warning::InvalidLongStringMissingValueVariableName)
+                .issue_warning(&decoder.warn)
+            {
+                mvs.push(mv);
+            }
+        }
+        LongStringMissingValueRecord(mvs)
     }
 }
 
-pub struct Encoding(pub String);
+#[derive(Clone, Debug)]
+pub struct EncodingRecord(pub String);
 
-impl ExtensionRecord for Encoding {
+impl ExtensionRecord for EncodingRecord {
+    const SUBTYPE: u32 = 20;
     const SIZE: Option<u32> = Some(1);
     const COUNT: Option<u32> = None;
     const NAME: &'static str = "encoding record";
 
-    fn parse(ext: &Extension, _endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
+    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        Ok(Record::Encoding(EncodingRecord(
+            String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
+                offset: ext.offsets.start,
+            })?,
+        )))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct NumberOfCasesRecord {
+    /// Always observed as 1.
+    pub one: u64,
+
+    /// Number of cases.
+    pub n_cases: u64,
+}
+
+impl ExtensionRecord for NumberOfCasesRecord {
+    const SUBTYPE: u32 = 16;
+    const SIZE: Option<u32> = Some(8);
+    const COUNT: Option<u32> = Some(2);
+    const NAME: &'static str = "extended number of cases record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
         ext.check_size::<Self>()?;
 
-        Ok(Encoding(String::from_utf8(ext.data.clone()).map_err(
-            |_| Error::BadEncodingName { offset: ext.offset },
-        )?))
+        let mut input = &ext.data[..];
+        let one = endian.parse(read_bytes(&mut input)?);
+        let n_cases = endian.parse(read_bytes(&mut input)?);
+
+        Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct TextRecord {
+    pub offsets: Range<u64>,
+
+    /// Type of record.
+    pub rec_type: TextRecordType,
+
+    /// The text content of the record.
+    pub text: RawString,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum TextRecordType {
+    VariableSets,
+    ProductInfo,
+    LongNames,
+    VeryLongStrings,
+    FileAttributes,
+    VariableAttributes,
+}
+
+impl TextRecord {
+    fn new(extension: Extension, rec_type: TextRecordType) -> Self {
+        Self {
+            offsets: extension.offsets,
+            rec_type,
+            text: extension.data.into(),
+        }
+    }
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        match self.rec_type {
+            TextRecordType::VariableSets => {
+                DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder))
+            }
+            TextRecordType::ProductInfo => {
+                DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder))
+            }
+            TextRecordType::LongNames => {
+                DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder))
+            }
+            TextRecordType::VeryLongStrings => {
+                DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder))
+            }
+            TextRecordType::FileAttributes => {
+                DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder))
+            }
+            TextRecordType::VariableAttributes => {
+                DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder))
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongString {
+    pub short_name: Identifier,
+    pub length: u16,
+}
+
+impl VeryLongString {
+    fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
+        let Some((short_name, length)) = input.split_once('=') else {
+            return Err(Warning::TBD);
+        };
+        let short_name = decoder
+            .new_identifier(short_name)
+            .map_err(Warning::InvalidLongStringName)?;
+        let length = length.parse().map_err(|_| Warning::TBD)?;
+        Ok(VeryLongString { short_name, length })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongStringsRecord(Vec<VeryLongString>);
+
+impl VeryLongStringsRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let input = decoder.decode(&source.text);
+        let mut very_long_strings = Vec::new();
+        for tuple in input
+            .split('\0')
+            .map(|s| s.trim_end_matches('\t'))
+            .filter(|s| !s.is_empty())
+        {
+            if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
+                very_long_strings.push(vls)
+            }
+        }
+        VeryLongStringsRecord(very_long_strings)
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct Attribute {
-    pub name: String,
+    pub name: Identifier,
     pub values: Vec<String>,
 }
 
 impl Attribute {
-    fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
+    fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
         let Some((name, mut input)) = input.split_once('(') else {
-            return Err(Error::TBD);
+            return Err(Warning::TBD);
         };
+        let name = decoder
+            .new_identifier(name)
+            .map_err(Warning::InvalidAttributeName)?;
         let mut values = Vec::new();
         loop {
             let Some((value, rest)) = input.split_once('\n') else {
-                return Err(Error::TBD);
+                return Err(Warning::TBD);
             };
             if let Some(stripped) = value
                 .strip_prefix('\'')
@@ -1224,39 +2317,36 @@ impl Attribute {
             {
                 values.push(stripped.into());
             } else {
-                warn(Error::TBD);
+                decoder.warn(Warning::TBD);
                 values.push(value.into());
             }
             if let Some(rest) = rest.strip_prefix(')') {
-                return Ok((
-                    Attribute {
-                        name: name.into(),
-                        values,
-                    },
-                    rest,
-                ));
-            }
+                let attribute = Attribute { name, values };
+                return Ok((attribute, rest));
+            };
             input = rest;
         }
     }
 }
 
-pub struct AttributeSet(pub Vec<Attribute>);
+#[derive(Clone, Debug, Default)]
+pub struct AttributeSet(pub HashMap<Identifier, Vec<String>>);
 
 impl AttributeSet {
     fn parse<'a>(
+        decoder: &Decoder,
         mut input: &'a str,
         sentinel: Option<char>,
-        warn: &impl Fn(Error),
-    ) -> Result<(AttributeSet, &'a str), Error> {
-        let mut attributes = Vec::new();
+    ) -> Result<(AttributeSet, &'a str), Warning> {
+        let mut attributes = HashMap::new();
         let rest = loop {
             match input.chars().next() {
                 None => break input,
                 c if c == sentinel => break &input[1..],
                 _ => {
-                    let (attribute, rest) = Attribute::parse(input, &warn)?;
-                    attributes.push(attribute);
+                    let (attribute, rest) = Attribute::parse(decoder, input)?;
+                    // XXX report duplicate name
+                    attributes.insert(attribute.name, attribute.values);
                     input = rest;
                 }
             }
@@ -1265,92 +2355,187 @@ impl AttributeSet {
     }
 }
 
-pub struct FileAttributeRecord(AttributeSet);
+#[derive(Clone, Debug, Default)]
+pub struct FileAttributeRecord(pub AttributeSet);
 
-impl TextRecord for FileAttributeRecord {
-    const NAME: &'static str = "data file attributes";
-    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
-        let (set, rest) = AttributeSet::parse(input, None, &warn)?;
-        if !rest.is_empty() {
-            warn(Error::TBD);
+impl FileAttributeRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let input = decoder.decode(&source.text);
+        match AttributeSet::parse(decoder, &input, None).issue_warning(&decoder.warn) {
+            Some((set, rest)) => {
+                if !rest.is_empty() {
+                    decoder.warn(Warning::TBD);
+                }
+                FileAttributeRecord(set)
+            }
+            None => FileAttributeRecord::default(),
         }
-        Ok(FileAttributeRecord(set))
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct VarAttributeSet {
-    pub long_var_name: String,
+    pub long_var_name: Identifier,
     pub attributes: AttributeSet,
 }
 
 impl VarAttributeSet {
-    fn parse<'a>(
-        input: &'a str,
-        warn: &impl Fn(Error),
-    ) -> Result<(VarAttributeSet, &'a str), Error> {
+    fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Warning> {
         let Some((long_var_name, rest)) = input.split_once(':') else {
-            return Err(Error::TBD);
+            return Err(Warning::TBD);
         };
-        let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
-        Ok((
-            VarAttributeSet {
-                long_var_name: long_var_name.into(),
-                attributes,
-            },
-            rest,
-        ))
+        let long_var_name = decoder
+            .new_identifier(long_var_name)
+            .map_err(Warning::InvalidAttributeVariableName)?;
+        let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?;
+        let var_attribute = VarAttributeSet {
+            long_var_name,
+            attributes,
+        };
+        Ok((var_attribute, rest))
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
 
-impl TextRecord for VariableAttributeRecord {
-    const NAME: &'static str = "variable attributes";
-    fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+impl VariableAttributeRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let decoded = decoder.decode(&source.text);
+        let mut input = decoded.as_ref();
         let mut var_attribute_sets = Vec::new();
         while !input.is_empty() {
-            match VarAttributeSet::parse(input, &warn) {
-                Ok((var_attribute, rest)) => {
-                    var_attribute_sets.push(var_attribute);
-                    input = rest;
-                }
-                Err(error) => {
-                    warn(error);
-                    break;
-                }
+            let Some((var_attribute, rest)) =
+                VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn)
+            else {
+                break;
+            };
+            var_attribute_sets.push(var_attribute);
+            input = rest;
+        }
+        VariableAttributeRecord(var_attribute_sets)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongName {
+    pub short_name: Identifier,
+    pub long_name: Identifier,
+}
+
+impl LongName {
+    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
+        let Some((short_name, long_name)) = input.split_once('=') else {
+            return Err(Warning::TBD);
+        };
+        let short_name = decoder
+            .new_identifier(short_name)
+            .map_err(Warning::InvalidShortName)?;
+        let long_name = decoder
+            .new_identifier(long_name)
+            .map_err(Warning::InvalidLongName)?;
+        Ok(LongName {
+            short_name,
+            long_name,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongNamesRecord(Vec<LongName>);
+
+impl LongNamesRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let input = decoder.decode(&source.text);
+        let mut names = Vec::new();
+        for pair in input.split('\t').filter(|s| !s.is_empty()) {
+            if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) {
+                names.push(long_name);
             }
         }
-        Ok(VariableAttributeRecord(var_attribute_sets))
+        LongNamesRecord(names)
     }
 }
 
-pub struct NumberOfCasesRecord {
-    /// Always observed as 1.
-    pub one: u64,
+#[derive(Clone, Debug)]
+pub struct ProductInfoRecord(pub String);
 
-    /// Number of cases.
-    pub n_cases: u64,
+impl ProductInfoRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        Self(decoder.decode(&source.text).into())
+    }
+}
+#[derive(Clone, Debug)]
+pub struct VariableSet {
+    pub name: String,
+    pub vars: Vec<Identifier>,
 }
 
-impl ExtensionRecord for NumberOfCasesRecord {
-    const SIZE: Option<u32> = Some(8);
-    const COUNT: Option<u32> = Some(2);
-    const NAME: &'static str = "extended number of cases record";
+impl VariableSet {
+    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
+        let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
+        let mut vars = Vec::new();
+        for var in input.split_ascii_whitespace() {
+            if let Some(identifier) = decoder
+                .new_identifier(var)
+                .map_err(Warning::InvalidVariableSetName)
+                .issue_warning(&decoder.warn)
+            {
+                vars.push(identifier);
+            }
+        }
+        Ok(VariableSet {
+            name: name.into(),
+            vars,
+        })
+    }
+}
 
-    fn parse(ext: &Extension, endian: Endian, _warn: impl Fn(Error)) -> Result<Self, Error> {
-        ext.check_size::<Self>()?;
+#[derive(Clone, Debug)]
+pub struct VariableSetRecord {
+    pub offsets: Range<u64>,
+    pub sets: Vec<VariableSet>,
+}
 
-        let mut input = &ext.data[..];
-        let one = endian.parse(read_bytes(&mut input)?);
-        let n_cases = endian.parse(read_bytes(&mut input)?);
+impl VariableSetRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
+        let mut sets = Vec::new();
+        let input = decoder.decode(&source.text);
+        for line in input.lines() {
+            if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) {
+                sets.push(set)
+            }
+        }
+        VariableSetRecord {
+            offsets: source.offsets.clone(),
+            sets,
+        }
+    }
+}
 
-        Ok(NumberOfCasesRecord { one, n_cases })
+trait IssueWarning<T> {
+    fn issue_warning<F>(self, warn: &F) -> Option<T>
+    where
+        F: Fn(Warning);
+}
+impl<T> IssueWarning<T> for Result<T, Warning> {
+    fn issue_warning<F>(self, warn: &F) -> Option<T>
+    where
+        F: Fn(Warning),
+    {
+        match self {
+            Ok(result) => Some(result),
+            Err(error) => {
+                warn(error);
+                None
+            }
+        }
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct Extension {
-    /// Offset from the start of the file to the start of the record.
-    pub offset: u64,
+    pub offsets: Range<u64>,
 
     /// Record subtype.
     pub subtype: u32,
@@ -1365,40 +2550,12 @@ pub struct Extension {
     pub data: Vec<u8>,
 }
 
-/*
-fn extension_record_size_requirements(extension: ExtensionType) -> (u32, u32) {
-    match extension {
-        /* Implemented record types. */
-        ExtensionType::Integer => (4, 8),
-        ExtensionType::Float => (8, 3),
-        ExtensionType::VarSets => (1, 0),
-        ExtensionType::Mrsets => (1, 0),
-        ExtensionType::ProductInfo => (1, 0),
-        ExtensionType::Display => (4, 0),
-        ExtensionType::LongNames => (1, 0),
-        ExtensionType::LongStrings => (1, 0),
-        ExtensionType::Ncases => (8, 2),
-        ExtensionType::FileAttrs => (1, 0),
-        ExtensionType::VarAttrs => (1, 0),
-        ExtensionType::Mrsets2 => (1, 0),
-        ExtensionType::Encoding => (1, 0),
-        ExtensionType::LongLabels => (1, 0),
-        ExtensionType::LongMissing => (1, 0),
-
-        /* Ignored record types. */
-        ExtensionType::Date => (0, 0),
-        ExtensionType::DataEntry => (0, 0),
-        ExtensionType::Dataview => (0, 0),
-    }
-}
- */
-
 impl Extension {
-    fn check_size<E: ExtensionRecord>(&self) -> Result<(), Error> {
+    fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
         if let Some(expected_size) = E::SIZE {
             if self.size != expected_size {
-                return Err(Error::BadRecordSize {
-                    offset: self.offset,
+                return Err(Warning::BadRecordSize {
+                    offset: self.offsets.start,
                     record: E::NAME.into(),
                     size: self.size,
                     expected_size,
@@ -1407,8 +2564,8 @@ impl Extension {
         }
         if let Some(expected_count) = E::COUNT {
             if self.count != expected_count {
-                return Err(Error::BadRecordCount {
-                    offset: self.offset,
+                return Err(Warning::BadRecordCount {
+                    offset: self.offsets.start,
                     record: E::NAME.into(),
                     count: self.count,
                     expected_count,
@@ -1418,31 +2575,83 @@ impl Extension {
         Ok(())
     }
 
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Extension, Error> {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        n_vars: usize,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Option<Record>, Error> {
         let subtype = endian.parse(read_bytes(r)?);
-        let offset = r.stream_position()?;
+        let header_offset = r.stream_position()?;
         let size: u32 = endian.parse(read_bytes(r)?);
         let count = endian.parse(read_bytes(r)?);
         let Some(product) = size.checked_mul(count) else {
             return Err(Error::ExtensionRecordTooLarge {
-                offset,
+                offset: header_offset,
                 subtype,
                 size,
                 count,
             });
         };
-        let offset = r.stream_position()?;
+        let start_offset = r.stream_position()?;
         let data = read_vec(r, product as usize)?;
-        Ok(Extension {
-            offset,
+        let end_offset = start_offset + product as u64;
+        let extension = Extension {
+            offsets: start_offset..end_offset,
             subtype,
             size,
             count,
             data,
-        })
+        };
+        let result = match subtype {
+            IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
+            FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
+            VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
+            MultipleResponseRecord::SUBTYPE | 19 => {
+                MultipleResponseRecord::parse(&extension, endian)
+            }
+            LongStringValueLabelRecord::SUBTYPE => {
+                LongStringValueLabelRecord::parse(&extension, endian)
+            }
+            EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
+            NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
+            5 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::VariableSets,
+            ))),
+            10 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::ProductInfo,
+            ))),
+            13 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::LongNames,
+            ))),
+            14 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::VeryLongStrings,
+            ))),
+            17 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::FileAttributes,
+            ))),
+            18 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::VariableAttributes,
+            ))),
+            _ => Ok(Record::OtherExtension(extension)),
+        };
+        match result {
+            Ok(result) => Ok(Some(result)),
+            Err(error) => {
+                warn(error);
+                Ok(None)
+            }
+        }
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct ZHeader {
     /// File offset to the start of the record.
     pub offset: u64,
@@ -1473,6 +2682,7 @@ impl ZHeader {
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct ZTrailer {
     /// File offset to the start of the record.
     pub offset: u64,
@@ -1491,6 +2701,7 @@ pub struct ZTrailer {
     pub blocks: Vec<ZBlock>,
 }
 
+#[derive(Clone, Debug)]
 pub struct ZBlock {
     /// Offset of block of data if simple compression were used.
     pub uncompressed_ofs: u64,
@@ -1581,7 +2792,95 @@ fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
     Ok(vec)
 }
 
-fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<Vec<u8>, IoError> {
+fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
     let length: u32 = endian.parse(read_bytes(r)?);
-    read_vec(r, length as usize)
+    Ok(read_vec(r, length as usize)?.into())
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels<N, S>
+where
+    S: Debug,
+{
+    pub var_name: N,
+    pub width: u32,
+
+    /// `(value, label)` pairs, where each value is `width` bytes.
+    pub labels: Vec<(S, S)>,
+}
+
+impl LongStringValueLabels<RawString, RawString> {
+    fn decode(
+        &self,
+        decoder: &Decoder,
+    ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
+        let var_name = decoder.decode(&self.var_name);
+        let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+            .map_err(Warning::InvalidLongStringValueLabelName)?;
+
+        let mut labels = Vec::with_capacity(self.labels.len());
+        for (value, label) in self.labels.iter() {
+            let value = decoder.decode_exact_length(&value.0).to_string();
+            let label = decoder.decode(label).to_string();
+            labels.push((value, label));
+        }
+
+        Ok(LongStringValueLabels {
+            var_name,
+            width: self.width,
+            labels,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
+where
+    N: Debug,
+    S: Debug;
+
+impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
+    const SUBTYPE: u32 = 21;
+    const SIZE: Option<u32> = Some(1);
+    const COUNT: Option<u32> = None;
+    const NAME: &'static str = "long string value labels record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let mut label_set = Vec::new();
+        while !input.is_empty() {
+            let var_name = read_string(&mut input, endian)?;
+            let width: u32 = endian.parse(read_bytes(&mut input)?);
+            let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
+            let mut labels = Vec::new();
+            for _ in 0..n_labels {
+                let value = read_string(&mut input, endian)?;
+                let label = read_string(&mut input, endian)?;
+                labels.push((value, label));
+            }
+            label_set.push(LongStringValueLabels {
+                var_name,
+                width,
+                labels,
+            })
+        }
+        Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
+            label_set,
+        )))
+    }
+}
+
+impl LongStringValueLabelRecord<RawString, RawString> {
+    fn decode(self, decoder: &Decoder) -> LongStringValueLabelRecord<Identifier, String> {
+        let mut labels = Vec::with_capacity(self.0.len());
+        for label in &self.0 {
+            match label.decode(decoder) {
+                Ok(set) => labels.push(set),
+                Err(error) => decoder.warn(error),
+            }
+        }
+        LongStringValueLabelRecord(labels)
+    }
 }