Move sources related to sys files into submodule.
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 1 May 2025 22:49:31 +0000 (15:49 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Thu, 1 May 2025 22:49:31 +0000 (15:49 -0700)
21 files changed:
rust/pspp/src/cooked.rs [deleted file]
rust/pspp/src/dictionary.rs
rust/pspp/src/encoding.rs [deleted file]
rust/pspp/src/format/mod.rs
rust/pspp/src/format/parse.rs
rust/pspp/src/lib.rs
rust/pspp/src/main.rs
rust/pspp/src/output/pivot/mod.rs
rust/pspp/src/raw.rs [deleted file]
rust/pspp/src/sack.rs [deleted file]
rust/pspp/src/settings.rs
rust/pspp/src/sys/cooked.rs [new file with mode: 0644]
rust/pspp/src/sys/encoding.rs [new file with mode: 0644]
rust/pspp/src/sys/mod.rs [new file with mode: 0644]
rust/pspp/src/sys/raw.rs [new file with mode: 0644]
rust/pspp/src/sys/sack.rs [new file with mode: 0644]
rust/pspp/tests/sack.rs
src/language/lexer/macro.c
src/language/lexer/scan.c
src/output/cairo-pager.c
src/output/table-provider.h

diff --git a/rust/pspp/src/cooked.rs b/rust/pspp/src/cooked.rs
deleted file mode 100644 (file)
index 8de34d5..0000000
+++ /dev/null
@@ -1,904 +0,0 @@
-use core::str;
-use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
-
-use crate::{
-    dictionary::{
-        Dictionary, InvalidRole, MultipleResponseSet, MultipleResponseType, Value, VarWidth,
-        Variable, VariableSet,
-    },
-    encoding::Error as EncodingError,
-    endian::Endian,
-    format::{Error as FormatError, Format, UncheckedFormat},
-    identifier::{ByIdentifier, Error as IdError, Identifier},
-    raw::{
-        self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
-        FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
-        LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues,
-        MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStrArray, RawWidth,
-        ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord,
-        VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
-    },
-};
-use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
-use encoding_rs::Encoding;
-use indexmap::set::MutableValues;
-use thiserror::Error as ThisError;
-
-pub use crate::raw::{CategoryLabels, Compression};
-
-#[derive(ThisError, Debug)]
-pub enum Error {
-    #[error("Missing header record")]
-    MissingHeaderRecord,
-
-    // XXX this is an internal error
-    #[error("More than one file header record")]
-    DuplicateHeaderRecord,
-
-    #[error("{0}")]
-    EncodingError(EncodingError),
-
-    #[error("Using default encoding {0}.")]
-    UsingDefaultEncoding(String),
-
-    #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)]
-    InvalidVariableWidth { offsets: Range<u64>, width: i32 },
-
-    #[error("This file has corrupted metadata written by a buggy version of PSPP.  To ensure that other software can read it correctly, save a new copy of the file.")]
-    InvalidLongMissingValueFormat,
-
-    #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format.  Using 01 Jan 1970.")]
-    InvalidCreationDate { creation_date: String },
-
-    #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format.  Using midnight.")]
-    InvalidCreationTime { creation_time: String },
-
-    #[error("{id_error}  Renaming variable to {new_name}.")]
-    InvalidVariableName {
-        id_error: IdError,
-        new_name: Identifier,
-    },
-
-    #[error(
-        "Substituting {new_spec} for invalid print format on variable {variable}.  {format_error}"
-    )]
-    InvalidPrintFormat {
-        new_spec: Format,
-        variable: Identifier,
-        format_error: FormatError,
-    },
-
-    #[error(
-        "Substituting {new_spec} for invalid write format on variable {variable}.  {format_error}"
-    )]
-    InvalidWriteFormat {
-        new_spec: Format,
-        variable: Identifier,
-        format_error: FormatError,
-    },
-
-    #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
-    DuplicateVariableName {
-        duplicate_name: Identifier,
-        new_name: Identifier,
-    },
-
-    #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
-    InvalidDictIndex { dict_index: usize, max_index: usize },
-
-    #[error("Dictionary index {0} refers to a long string continuation.")]
-    DictIndexIsContinuation(usize),
-
-    #[error("At offset {offset:#x}, one or more variable indexes for value labels referred to long string continuation records: {indexes:?}")]
-    LongStringContinuationIndexes { offset: u64, indexes: Vec<u32> },
-
-    #[error(
-        "At offsets {:#x}...{:#x}, record types 3 and 4 may not add value labels to one or more long string variables: {variables:?}", .offsets.start, .offsets.end
-    )]
-    InvalidLongStringValueLabels {
-        offsets: Range<u64>,
-        variables: Vec<Identifier>,
-    },
-
-    #[error("Variables associated with value label are not all of identical type.  Variable {numeric_var} is numeric, but variable {string_var} is string.")]
-    ValueLabelsDifferentTypes {
-        numeric_var: Identifier,
-        string_var: Identifier,
-    },
-
-    #[error("Invalid multiple response set name.  {0}")]
-    InvalidMrSetName(IdError),
-
-    #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
-    UnknownMrSetVariable {
-        mr_set: Identifier,
-        short_name: Identifier,
-    },
-
-    #[error("Multiple response set {0} has no variables.")]
-    EmptyMrSet(Identifier),
-
-    #[error("Multiple response set {0} has only one variable.")]
-    OneVarMrSet(Identifier),
-
-    #[error("Multiple response set {0} contains both string and numeric variables.")]
-    MixedMrSet(Identifier),
-
-    #[error(
-        "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
-    )]
-    InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
-
-    #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
-    TooWideMDGroupCountedValue {
-        mr_set: Identifier,
-        value: String,
-        width: usize,
-        max_width: u16,
-    },
-
-    #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
-    InvalidLongValueLabelWidth {
-        name: Identifier,
-        width: u32,
-        min_width: u16,
-        max_width: u16,
-    },
-
-    #[error("Invalid attribute name.  {0}")]
-    InvalidAttributeName(IdError),
-
-    #[error("Invalid short name in long variable name record.  {0}")]
-    InvalidShortName(IdError),
-
-    #[error("Invalid name in long variable name record.  {0}")]
-    InvalidLongName(IdError),
-
-    #[error("Invalid variable name in very long string record.  {0}")]
-    InvalidLongStringName(IdError),
-
-    #[error("Invalid variable name in long string value label record.  {0}")]
-    InvalidLongStringValueLabelName(IdError),
-
-    #[error("Invalid variable name in attribute record.  {0}")]
-    InvalidAttributeVariableName(IdError),
-
-    // XXX This is risky because `text` might be arbitarily long.
-    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
-    MalformedString { encoding: String, text: String },
-
-    #[error("Details TBD")]
-    TBD,
-}
-
-#[derive(Clone, Debug)]
-pub struct Headers {
-    pub header: HeaderRecord<String>,
-    pub variable: Vec<VariableRecord<String>>,
-    pub value_label: Vec<ValueLabelRecord<RawStrArray<8>, String>>,
-    pub document: Vec<DocumentRecord<String>>,
-    pub integer_info: Option<IntegerInfoRecord>,
-    pub float_info: Option<FloatInfoRecord>,
-    pub var_display: Option<VarDisplayRecord>,
-    pub multiple_response: Vec<MultipleResponseRecord<Identifier, String>>,
-    pub long_string_value_labels: Vec<LongStringValueLabelRecord<Identifier, String>>,
-    pub long_string_missing_values: Vec<LongStringMissingValueRecord<Identifier>>,
-    pub encoding: Option<EncodingRecord>,
-    pub number_of_cases: Option<NumberOfCasesRecord>,
-    pub variable_sets: Vec<VariableSetRecord>,
-    pub product_info: Option<ProductInfoRecord>,
-    pub long_names: Vec<LongNamesRecord>,
-    pub very_long_strings: Vec<VeryLongStringsRecord>,
-    pub file_attributes: Vec<FileAttributeRecord>,
-    pub variable_attributes: Vec<VariableAttributeRecord>,
-    pub other_extension: Vec<Extension>,
-    pub end_of_headers: Option<u32>,
-    pub z_header: Option<ZHeader>,
-    pub z_trailer: Option<ZTrailer>,
-    pub cases: Option<Rc<RefCell<Cases>>>,
-}
-
-fn take_first<T, F>(mut vec: Vec<T>, more_than_one: F) -> Option<T>
-where
-    F: FnOnce(),
-{
-    if vec.len() > 1 {
-        more_than_one();
-    }
-    vec.drain(..).next()
-}
-
-impl Headers {
-    pub fn new(headers: Vec<raw::DecodedRecord>, warn: &impl Fn(Error)) -> Result<Headers, Error> {
-        let mut file_header = Vec::new();
-        let mut variable = Vec::new();
-        let mut value_label = Vec::new();
-        let mut document = Vec::new();
-        let mut integer_info = Vec::new();
-        let mut float_info = Vec::new();
-        let mut var_display = Vec::new();
-        let mut multiple_response = Vec::new();
-        let mut long_string_value_labels = Vec::new();
-        let mut long_string_missing_values = Vec::new();
-        let mut encoding = Vec::new();
-        let mut number_of_cases = Vec::new();
-        let mut variable_sets = Vec::new();
-        let mut product_info = Vec::new();
-        let mut long_names = Vec::new();
-        let mut very_long_strings = Vec::new();
-        let mut file_attributes = Vec::new();
-        let mut variable_attributes = Vec::new();
-        let mut other_extension = Vec::new();
-        let mut end_of_headers = Vec::new();
-        let mut z_header = Vec::new();
-        let mut z_trailer = Vec::new();
-        let mut cases = Vec::new();
-
-        for header in headers {
-            match header {
-                DecodedRecord::Header(record) => {
-                    file_header.push(record);
-                }
-                DecodedRecord::Variable(record) => {
-                    variable.push(record);
-                }
-                DecodedRecord::ValueLabel(record) => {
-                    value_label.push(record);
-                }
-                DecodedRecord::Document(record) => {
-                    document.push(record);
-                }
-                DecodedRecord::IntegerInfo(record) => {
-                    integer_info.push(record);
-                }
-                DecodedRecord::FloatInfo(record) => {
-                    float_info.push(record);
-                }
-                DecodedRecord::VariableSets(record) => {
-                    variable_sets.push(record);
-                }
-                DecodedRecord::VarDisplay(record) => {
-                    var_display.push(record);
-                }
-                DecodedRecord::MultipleResponse(record) => {
-                    multiple_response.push(record);
-                }
-                DecodedRecord::LongStringValueLabels(record) => {
-                    long_string_value_labels.push(record)
-                }
-                DecodedRecord::LongStringMissingValues(record) => {
-                    long_string_missing_values.push(record);
-                }
-                DecodedRecord::Encoding(record) => {
-                    encoding.push(record);
-                }
-                DecodedRecord::NumberOfCases(record) => {
-                    number_of_cases.push(record);
-                }
-                DecodedRecord::ProductInfo(record) => {
-                    product_info.push(record);
-                }
-                DecodedRecord::LongNames(record) => {
-                    long_names.push(record);
-                }
-                DecodedRecord::VeryLongStrings(record) => {
-                    very_long_strings.push(record);
-                }
-                DecodedRecord::FileAttributes(record) => {
-                    file_attributes.push(record);
-                }
-                DecodedRecord::VariableAttributes(record) => {
-                    variable_attributes.push(record);
-                }
-                DecodedRecord::OtherExtension(record) => {
-                    other_extension.push(record);
-                }
-                DecodedRecord::EndOfHeaders(record) => {
-                    end_of_headers.push(record);
-                }
-                DecodedRecord::ZHeader(record) => {
-                    z_header.push(record);
-                }
-                DecodedRecord::ZTrailer(record) => {
-                    z_trailer.push(record);
-                }
-                DecodedRecord::Cases(record) => {
-                    cases.push(record);
-                }
-            }
-        }
-
-        let Some(file_header) = take_first(file_header, || warn(Error::DuplicateHeaderRecord))
-        else {
-            return Err(Error::MissingHeaderRecord);
-        };
-
-        Ok(Headers {
-            header: file_header,
-            variable,
-            value_label,
-            document,
-            integer_info: take_first(integer_info, || warn(Error::TBD)),
-            float_info: take_first(float_info, || warn(Error::TBD)),
-            var_display: take_first(var_display, || warn(Error::TBD)),
-            multiple_response,
-            long_string_value_labels,
-            long_string_missing_values,
-            encoding: take_first(encoding, || warn(Error::TBD)),
-            number_of_cases: take_first(number_of_cases, || warn(Error::TBD)),
-            variable_sets,
-            product_info: take_first(product_info, || warn(Error::TBD)),
-            long_names,
-            very_long_strings,
-            file_attributes,
-            variable_attributes,
-            other_extension,
-            end_of_headers: take_first(end_of_headers, || warn(Error::TBD)),
-            z_header: take_first(z_header, || warn(Error::TBD)),
-            z_trailer: take_first(z_trailer, || warn(Error::TBD)),
-            cases: take_first(cases, || warn(Error::TBD)),
-        })
-    }
-}
-
-#[derive(Debug)]
-pub struct Metadata {
-    pub creation: NaiveDateTime,
-    pub endian: Endian,
-    pub compression: Option<Compression>,
-    pub n_cases: Option<u64>,
-    pub product: String,
-    pub product_ext: Option<String>,
-    pub version: Option<(i32, i32, i32)>,
-}
-
-impl Metadata {
-    fn decode(headers: &Headers, warn: impl Fn(Error)) -> Self {
-        let header = &headers.header;
-        let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %Y")
-            .unwrap_or_else(|_| {
-                warn(Error::InvalidCreationDate {
-                    creation_date: header.creation_date.to_string(),
-                });
-                Default::default()
-            });
-        let creation_time = NaiveTime::parse_from_str(&header.creation_time, "%H:%M:%S")
-            .unwrap_or_else(|_| {
-                warn(Error::InvalidCreationTime {
-                    creation_time: header.creation_time.to_string(),
-                });
-                Default::default()
-            });
-        let creation = NaiveDateTime::new(creation_date, creation_time);
-
-        let product = header
-            .eye_catcher
-            .trim_start_matches("@(#) SPSS DATA FILE")
-            .trim_end()
-            .to_string();
-
-        Self {
-            creation,
-            endian: header.endian,
-            compression: header.compression,
-            n_cases: header.n_cases.map(|n| n as u64),
-            product,
-            product_ext: headers.product_info.as_ref().map(|pe| fix_line_ends(&pe.0)),
-            version: headers.integer_info.as_ref().map(|ii| ii.version),
-        }
-    }
-}
-
-struct Decoder {
-    pub encoding: &'static Encoding,
-    n_generated_names: usize,
-}
-
-impl Decoder {
-    fn generate_name(&mut self, dictionary: &Dictionary) -> Identifier {
-        loop {
-            self.n_generated_names += 1;
-            let name = Identifier::from_encoding(
-                format!("VAR{:03}", self.n_generated_names),
-                self.encoding,
-            )
-            .unwrap();
-            if !dictionary.variables.contains(&name.0) {
-                return name;
-            }
-            assert!(self.n_generated_names < usize::MAX);
-        }
-    }
-}
-
-pub fn decode(
-    mut headers: Headers,
-    encoding: &'static Encoding,
-    warn: impl Fn(Error),
-) -> Result<(Dictionary, Metadata), Error> {
-    let mut dictionary = Dictionary::new(encoding);
-
-    let file_label = fix_line_ends(headers.header.file_label.trim_end_matches(' '));
-    if !file_label.is_empty() {
-        dictionary.file_label = Some(file_label);
-    }
-
-    for mut attributes in headers.file_attributes.drain(..) {
-        dictionary.attributes.append(&mut attributes.0)
-    }
-
-    // Concatenate all the document records (really there should only be one)
-    // and trim off the trailing spaces that pad them to 80 bytes.
-    dictionary.documents = headers
-        .document
-        .drain(..)
-        .flat_map(|record| record.lines)
-        .map(trim_end_spaces)
-        .collect();
-
-    // XXX warn for weird integer format
-    // XXX warn for weird floating-point format, etc.
-
-    let mut decoder = Decoder {
-        encoding,
-        n_generated_names: 0,
-    };
-
-    let mut var_index_map = HashMap::new();
-    let mut value_index = 0;
-    for (index, input) in headers
-        .variable
-        .iter()
-        .enumerate()
-        .filter(|(_index, record)| record.width != RawWidth::Continuation)
-    {
-        let name = trim_end_spaces(input.name.to_string());
-        let name = match Identifier::from_encoding(name, encoding) {
-            Ok(name) => {
-                if !dictionary.variables.contains(&name.0) {
-                    name
-                } else {
-                    let new_name = decoder.generate_name(&dictionary);
-                    warn(Error::DuplicateVariableName {
-                        duplicate_name: name.clone(),
-                        new_name: new_name.clone(),
-                    });
-                    new_name
-                }
-            }
-            Err(id_error) => {
-                let new_name = decoder.generate_name(&dictionary);
-                warn(Error::InvalidVariableName {
-                    id_error,
-                    new_name: new_name.clone(),
-                });
-                new_name
-            }
-        };
-        let mut variable = Variable::new(name.clone(), VarWidth::try_from(input.width).unwrap());
-
-        // Set the short name the same as the long name (even if we renamed it).
-        variable.short_names = vec![name];
-
-        variable.label = input.label.clone();
-
-        variable.missing_values = input.missing_values.clone();
-
-        variable.print_format = decode_format(
-            input.print_format,
-            variable.width,
-            |new_spec, format_error| {
-                warn(Error::InvalidPrintFormat {
-                    new_spec,
-                    variable: variable.name.clone(),
-                    format_error,
-                })
-            },
-        );
-        variable.write_format = decode_format(
-            input.write_format,
-            variable.width,
-            |new_spec, format_error| {
-                warn(Error::InvalidWriteFormat {
-                    new_spec,
-                    variable: variable.name.clone(),
-                    format_error,
-                })
-            },
-        );
-
-        // Check for long string continuation records.
-        let n_values = input.width.n_values().unwrap();
-        for offset in 1..n_values {
-            if headers
-                .variable
-                .get(index + offset)
-                .is_none_or(|record| record.width != RawWidth::Continuation)
-            {
-                warn(Error::TBD);
-                break;
-            }
-        }
-
-        let dict_index = dictionary.add_var(variable).unwrap();
-        assert_eq!(var_index_map.insert(value_index, dict_index), None);
-        value_index += n_values;
-    }
-
-    if let Some(weight_index) = headers.header.weight_index {
-        if let Some(dict_index) = var_index_map.get(&(weight_index as usize - 1)) {
-            let variable = &dictionary.variables[*dict_index];
-            if variable.is_numeric() {
-                dictionary.weight = Some(*dict_index);
-            } else {
-                warn(Error::TBD);
-            }
-        } else {
-            warn(Error::TBD);
-        }
-    }
-
-    for record in headers.value_label.drain(..) {
-        let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
-        let mut long_string_variables = Vec::new();
-        for value_index in record.dict_indexes.iter() {
-            let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
-                unreachable!()
-            };
-            let variable = &dictionary.variables[*dict_index];
-            if variable.width.is_long_string() {
-                long_string_variables.push(variable.name.clone());
-            } else {
-                dict_indexes.push(*dict_index);
-            }
-        }
-        if !long_string_variables.is_empty() {
-            warn(Error::InvalidLongStringValueLabels {
-                offsets: record.offsets.clone(),
-                variables: long_string_variables,
-            });
-        }
-
-        for dict_index in dict_indexes {
-            let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
-            for ValueLabel { value, label } in record.labels.iter().cloned() {
-                let value = value.decode(variable.width);
-                variable.value_labels.insert(value, label);
-            }
-        }
-    }
-
-    if let Some(display) = &headers.var_display {
-        for (index, display) in display.0.iter().enumerate() {
-            if let Some(variable) = dictionary.variables.get_index_mut2(index) {
-                if let Some(width) = display.width {
-                    variable.display_width = width;
-                }
-                if let Some(alignment) = display.alignment {
-                    variable.alignment = alignment;
-                }
-                if let Some(measure) = display.measure {
-                    variable.measure = Some(measure);
-                }
-            } else {
-                warn(Error::TBD);
-            }
-        }
-    }
-
-    for record in headers
-        .multiple_response
-        .iter()
-        .flat_map(|record| record.0.iter())
-    {
-        match MultipleResponseSet::decode(&dictionary, record, &warn) {
-            Ok(mrset) => {
-                dictionary.mrsets.insert(ByIdentifier::new(mrset));
-            }
-            Err(error) => warn(error),
-        }
-    }
-
-    'outer: for record in headers
-        .very_long_strings
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
-            warn(Error::TBD);
-            continue;
-        };
-        let width = VarWidth::String(record.length);
-        let n_segments = width.n_segments();
-        if n_segments == 1 {
-            warn(Error::TBD);
-            continue;
-        }
-        if index + n_segments > dictionary.variables.len() {
-            warn(Error::TBD);
-            continue;
-        }
-        let mut short_names = Vec::with_capacity(n_segments);
-        for i in 0..n_segments {
-            let alloc_width = width.segment_alloc_width(i);
-            let segment = &dictionary.variables[index + i];
-            short_names.push(segment.short_names[0].clone());
-            let segment_width = segment.width.as_string_width().unwrap_or(0);
-            if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
-                warn(Error::TBD);
-                continue 'outer;
-            }
-        }
-        dictionary.delete_vars(index + 1..index + n_segments);
-        let variable = dictionary.variables.get_index_mut2(index).unwrap();
-        variable.short_names = short_names;
-        variable.width = width;
-    }
-
-    if headers.long_names.is_empty() {
-        // There are no long variable names.  Use the short variable names,
-        // converted to lowercase, as the long variable names.
-        for index in 0..dictionary.variables.len() {
-            let lower = dictionary.variables[index].name.0.as_ref().to_lowercase();
-            if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) {
-                dictionary.try_rename_var(index, new_name);
-            }
-        }
-    } else {
-        // Rename each of the variables, one by one.  (In a correctly
-        // constructed system file, this cannot create any intermediate
-        // duplicate variable names, because all of the new variable names are
-        // longer than any of the old variable names and thus there cannot be
-        // any overlaps.)
-        for renaming in headers
-            .long_names
-            .iter()
-            .flat_map(|record| record.0.iter().cloned())
-        {
-            let LongName {
-                short_name,
-                long_name,
-            } = renaming;
-            if let Some(index) = dictionary.variables.get_index_of(&short_name.0) {
-                dictionary.try_rename_var(index, long_name);
-                dictionary
-                    .variables
-                    .get_index_mut2(index)
-                    .unwrap()
-                    .short_names = vec![short_name];
-            } else {
-                warn(Error::TBD);
-            }
-        }
-    }
-
-    for mut attr_set in headers
-        .variable_attributes
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        if let Some((_, variable)) = dictionary
-            .variables
-            .get_full_mut2(&attr_set.long_var_name.0)
-        {
-            variable.attributes.append(&mut attr_set.attributes);
-        } else {
-            warn(Error::TBD);
-        }
-    }
-
-    // Assign variable roles.
-    for index in 0..dictionary.variables.len() {
-        let variable = dictionary.variables.get_index_mut2(index).unwrap();
-        match variable.attributes.role() {
-            Ok(role) => variable.role = role,
-            Err(InvalidRole) => warn(Error::TBD),
-        }
-    }
-
-    // Long string value labels.
-    for record in headers
-        .long_string_value_labels
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
-            warn(Error::TBD);
-            continue;
-        };
-        let Some(width) = variable.width.as_string_width() else {
-            warn(Error::TBD);
-            continue;
-        };
-        for (mut value, label) in record.labels.into_iter() {
-            // XXX warn about too-long value?
-            value.0.resize(width, b' ');
-            // XXX warn abouat duplicate value labels?
-            variable.value_labels.insert(Value::String(value), label);
-        }
-    }
-
-    let mut value = Vec::new();
-    for record in headers
-        .long_string_missing_values
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
-            warn(Error::TBD);
-            continue;
-        };
-        let values = record
-            .missing_values
-            .into_iter()
-            .map(|v| {
-                value.clear();
-                value.extend_from_slice(v.0.as_slice());
-                value.resize(variable.width.as_string_width().unwrap(), b' ');
-                Value::String(Box::from(value.as_slice()))
-            })
-            .collect::<Vec<_>>();
-        variable.missing_values = MissingValues {
-            values,
-            range: None,
-        };
-    }
-
-    for record in headers
-        .variable_sets
-        .drain(..)
-        .flat_map(|record| record.sets.into_iter())
-    {
-        let mut variables = Vec::with_capacity(record.variable_names.len());
-        for variable_name in record.variable_names {
-            let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else {
-                warn(Error::TBD);
-                continue;
-            };
-            variables.push(dict_index);
-        }
-        if !variables.is_empty() {
-            let variable_set = VariableSet {
-                name: record.name,
-                variables,
-            };
-            dictionary
-                .variable_sets
-                .insert(ByIdentifier::new(variable_set));
-        }
-    }
-
-    let metadata = Metadata::decode(&headers, warn);
-    Ok((dictionary, metadata))
-}
-
-impl MultipleResponseSet {
-    fn decode(
-        dictionary: &Dictionary,
-        input: &raw::MultipleResponseSet<Identifier, String>,
-        warn: &impl Fn(Error),
-    ) -> Result<Self, Error> {
-        let mr_set_name = input.name.clone();
-        let mut variables = Vec::with_capacity(input.short_names.len());
-        for short_name in input.short_names.iter() {
-            let Some(dict_index) = dictionary.variables.get_index_of(&short_name.0) else {
-                warn(Error::UnknownMrSetVariable {
-                    mr_set: mr_set_name.clone(),
-                    short_name: short_name.clone(),
-                });
-                continue;
-            };
-            variables.push(dict_index);
-        }
-
-        match variables.len() {
-            0 => return Err(Error::EmptyMrSet(mr_set_name)),
-            1 => return Err(Error::OneVarMrSet(mr_set_name)),
-            _ => (),
-        }
-
-        let Some((Some(min_width), Some(max_width))) = variables
-            .iter()
-            .copied()
-            .map(|dict_index| dictionary.variables[dict_index].width)
-            .map(|w| (Some(w), Some(w)))
-            .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
-        else {
-            return Err(Error::MixedMrSet(mr_set_name));
-        };
-
-        let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width)?;
-
-        Ok(MultipleResponseSet {
-            name: mr_set_name,
-            width: min_width..=max_width,
-            label: input.label.to_string(),
-            mr_type,
-            variables,
-        })
-    }
-}
-
-fn trim_end_spaces(mut s: String) -> String {
-    s.truncate(s.trim_end_matches(' ').len());
-    s
-}
-
-/// Returns a copy of `s` in which all lone CR and CR LF pairs have been
-/// replaced by LF.
-///
-/// (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
-/// files that use CR-only line ends in the file label and extra product info.)
-fn fix_line_ends(s: &str) -> String {
-    let mut out = String::with_capacity(s.len());
-    let mut s = s.chars().peekable();
-    while let Some(c) = s.next() {
-        match c {
-            '\r' => {
-                s.next_if_eq(&'\n');
-                out.push('\n')
-            }
-            c => out.push(c),
-        }
-    }
-    out
-}
-
-fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Format, FormatError)) -> Format {
-    UncheckedFormat::try_from(raw)
-        .and_then(Format::try_from)
-        .and_then(|x| x.check_width_compatibility(width))
-        .unwrap_or_else(|error| {
-            let new_format = Format::default_for_width(width);
-            warn(new_format, error);
-            new_format
-        })
-}
-
-impl MultipleResponseType {
-    fn decode(
-        mr_set: &Identifier,
-        input: &raw::MultipleResponseType,
-        min_width: VarWidth,
-    ) -> Result<Self, Error> {
-        match input {
-            raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
-                let value = match min_width {
-                    VarWidth::Numeric => {
-                        let string = String::from_utf8_lossy(&value.0);
-                        let number: f64 = string.trim().parse().map_err(|_| {
-                            Error::InvalidMDGroupCountedValue {
-                                mr_set: mr_set.clone(),
-                                number: string.into(),
-                            }
-                        })?;
-                        Value::Number(Some(number))
-                    }
-                    VarWidth::String(max_width) => {
-                        let mut value = value.0.as_slice();
-                        while value.ends_with(b" ") {
-                            value = &value[..value.len() - 1];
-                        }
-                        let width = value.len();
-                        if width > max_width as usize {
-                            return Err(Error::TooWideMDGroupCountedValue {
-                                mr_set: mr_set.clone(),
-                                value: String::from_utf8_lossy(value).into(),
-                                width,
-                                max_width,
-                            });
-                        };
-                        Value::String(value.into())
-                    }
-                };
-                Ok(MultipleResponseType::MultipleDichotomy {
-                    value,
-                    labels: *labels,
-                })
-            }
-            raw::MultipleResponseType::MultipleCategory => {
-                Ok(MultipleResponseType::MultipleCategory)
-            }
-        }
-    }
-}
index d0941be0612066e9c75a8c70b0235fdec452c84e..6a0a1843baad19e669c3c002bcbfea8bc068cf8b 100644 (file)
@@ -18,7 +18,7 @@ use unicase::UniCase;
 use crate::{
     format::Format,
     identifier::{ByIdentifier, HasIdentifier, Identifier},
-    raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType},
+    sys::raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType},
 };
 
 /// An index within [Dictionary::variables].
diff --git a/rust/pspp/src/encoding.rs b/rust/pspp/src/encoding.rs
deleted file mode 100644 (file)
index c408bf5..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-use crate::locale_charset::locale_charset;
-use encoding_rs::{Encoding, UTF_8};
-
-include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
-
-pub fn codepage_from_encoding(encoding: &str) -> Option<u32> {
-    CODEPAGE_NAME_TO_NUMBER
-        .get(encoding.to_ascii_lowercase().as_str())
-        .copied()
-}
-
-use thiserror::Error as ThisError;
-
-#[derive(ThisError, Debug)]
-pub enum Error {
-    #[error("This system file does not indicate its own character encoding.  For best results, specify an encoding explicitly.  Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
-    NoEncoding,
-
-    #[error("This system file encodes text strings with unknown code page {0}.")]
-    UnknownCodepage(i32),
-
-    #[error("This system file encodes text strings with unknown encoding {0}.")]
-    UnknownEncoding(String),
-
-    #[error("This system file is encoded in EBCDIC, which is not supported.")]
-    Ebcdic,
-}
-
-pub fn default_encoding() -> &'static Encoding {
-    lazy_static! {
-        static ref DEFAULT_ENCODING: &'static Encoding =
-            Encoding::for_label(locale_charset().as_bytes()).unwrap_or(UTF_8);
-    }
-    &DEFAULT_ENCODING
-}
-
-pub fn get_encoding(
-    encoding: Option<&str>,
-    character_code: Option<i32>,
-) -> Result<&'static Encoding, Error> {
-    let label = if let Some(encoding) = encoding {
-        encoding
-    } else if let Some(codepage) = character_code {
-        match codepage {
-            1 => return Err(Error::Ebcdic),
-            2 | 3 => {
-                // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
-                // respectively.  However, many files have character code 2 but
-                // data which are clearly not ASCII.  Therefore, ignore these
-                // values.
-                return Err(Error::NoEncoding);
-            }
-            4 => "MS_KANJI",
-            _ => CODEPAGE_NUMBER_TO_NAME
-                .get(&codepage)
-                .copied()
-                .ok_or(Error::UnknownCodepage(codepage))?,
-        }
-    } else {
-        return Err(Error::NoEncoding);
-    };
-
-    Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))
-}
-
-/*
-#[cfg(test)]
-mod tests {
-    use std::thread::spawn;
-
-    use encoding_rs::{EUC_JP, UTF_8, WINDOWS_1252};
-
-    #[test]
-    fn round_trip() {
-        let mut threads = Vec::new();
-        for thread in 0..128 {
-            let start: u32 = thread << 25;
-            let end = start + ((1 << 25) - 1);
-            threads.push(spawn(move || {
-                for i in start..=end {
-                    let s = i.to_le_bytes();
-                    let (utf8, replacement) = EUC_JP.decode_without_bom_handling(&s);
-                    if !replacement {
-                        let s2 = UTF_8.encode(&utf8).0;
-                        assert_eq!(s.as_slice(), &*s2);
-                    }
-                }
-            }));
-        }
-        for thread in threads {
-            thread.join().unwrap();
-        }
-    }
-}
-*/
index 3078fffb438479a8f29df4d08648c7c5065ebed3..2d17270b8400eb6a44374329e8e26dbdd2c3b93c 100644 (file)
@@ -13,7 +13,7 @@ use unicode_width::UnicodeWidthStr;
 
 use crate::{
     dictionary::{Value, VarWidth},
-    raw::{self, RawString, VarType},
+    sys::raw::{self, RawString, VarType},
 };
 
 mod display;
@@ -377,7 +377,7 @@ impl Type {
     pub fn default_value(&self) -> Value {
         match self.var_type() {
             VarType::Numeric => Value::sysmis(),
-            VarType::String => Value::String(RawString::default())
+            VarType::String => Value::String(RawString::default()),
         }
     }
 }
index 222d61de50f9103c7c1f2416c12873c7600f2bf4..2f5887370f355916bea6b6b2ecf5020a67dd85a8 100644 (file)
@@ -3,8 +3,8 @@ use crate::{
     dictionary::Value,
     endian::{Endian, Parse},
     format::{DateTemplate, Decimals, Settings, TemplateItem, Type},
-    raw::{EncodedStr, EncodedString},
     settings::{EndianSettings, Settings as PsppSettings},
+    sys::raw::{EncodedStr, EncodedString},
 };
 use encoding_rs::Encoding;
 use smallstr::SmallString;
@@ -911,8 +911,8 @@ mod test {
             parse::{ParseError, ParseErrorKind, Sign},
             Epoch, Format, Settings as FormatSettings, Type,
         },
-        raw::EncodedStr,
         settings::EndianSettings,
+        sys::raw::EncodedStr,
     };
 
     fn test(name: &str, type_: Type) {
index 3540125c81d379af17fbe91ed74816a6b8cf86b2..b78b711bb44d33e614bbf6cbaa4eb9fce72914f8 100644 (file)
@@ -1,8 +1,6 @@
 pub mod calendar;
 pub mod command;
-pub mod cooked;
 pub mod dictionary;
-pub mod encoding;
 pub mod endian;
 pub mod engine;
 pub mod format;
@@ -14,6 +12,5 @@ pub mod macros;
 pub mod message;
 pub mod output;
 pub mod prompt;
-pub mod raw;
-pub mod sack;
 pub mod settings;
+pub mod sys;
index 62ab24337b076a090c4e21016c91ced4f2c7b4b2..35c057a341bc96233d04df5c9167b482a955f5ff 100644 (file)
@@ -17,8 +17,8 @@
 use anyhow::Result;
 use clap::{Parser, ValueEnum};
 use encoding_rs::Encoding;
-use pspp::cooked::{decode, Headers};
-use pspp::raw::{encoding_from_headers, Decoder, Magic, Reader, Record};
+use pspp::sys::cooked::{decode, Headers};
+use pspp::sys::raw::{encoding_from_headers, Decoder, Magic, Reader, Record};
 use std::fs::File;
 use std::io::BufReader;
 use std::path::{Path, PathBuf};
index d79e01a3bd0fdee48cc03eb5010ccf942510a78a..14a6e510557d2b12237728c1a61b76e0982d7cbf 100644 (file)
@@ -54,8 +54,8 @@ use tlo::parse_tlo;
 use crate::{
     dictionary::Value as DataValue,
     format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
-    raw::VarType,
     settings::{Settings, Show},
+    sys::raw::VarType,
 };
 
 pub mod output;
diff --git a/rust/pspp/src/raw.rs b/rust/pspp/src/raw.rs
deleted file mode 100644 (file)
index 5c3eb85..0000000
+++ /dev/null
@@ -1,3008 +0,0 @@
-use crate::{
-    dictionary::{Attributes, Value, VarWidth},
-    encoding::{default_encoding, get_encoding, Error as EncodingError},
-    endian::{Endian, Parse, ToBytes},
-    identifier::{Error as IdError, Identifier},
-};
-
-use encoding_rs::{mem::decode_latin1, Encoding};
-use flate2::read::ZlibDecoder;
-use num::Integer;
-use std::{
-    borrow::Cow,
-    cell::RefCell,
-    collections::{HashMap, VecDeque},
-    fmt::{Debug, Display, Formatter, Result as FmtResult},
-    io::{Error as IoError, Read, Seek, SeekFrom},
-    mem::take,
-    num::NonZeroU8,
-    ops::Range,
-    rc::Rc,
-    str::from_utf8,
-};
-use thiserror::Error as ThisError;
-
-#[derive(ThisError, Debug)]
-pub enum Error {
-    #[error("Not an SPSS system file")]
-    NotASystemFile,
-
-    #[error("Invalid magic number {0:?}")]
-    BadMagic([u8; 4]),
-
-    #[error("I/O error ({0})")]
-    Io(#[from] IoError),
-
-    #[error("Invalid SAV compression code {0}")]
-    InvalidSavCompression(u32),
-
-    #[error("Invalid ZSAV compression code {0}")]
-    InvalidZsavCompression(u32),
-
-    #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
-    BadDocumentLength { offset: u64, n: usize, max: usize },
-
-    #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
-    BadRecordType { offset: u64, rec_type: u32 },
-
-    #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")]
-    BadVariableWidth { start_offset: u64, width: i32 },
-
-    #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
-    BadVariableLabelCode {
-        start_offset: u64,
-        code_offset: u64,
-        code: u32,
-    },
-
-    #[error("At offset {offset:#x}, missing value code ({code}) is not -3, -2, 0, 1, 2, or 3.")]
-    BadMissingValueCode { offset: u64, code: i32 },
-
-    #[error(
-        "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
-    )]
-    BadNumericMissingValueCode { offset: u64, code: i32 },
-
-    #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
-    BadStringMissingValueCode { offset: u64, code: i32 },
-
-    #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
-    BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
-
-    #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
-    ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
-
-    #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
-    TooManyVarIndexes { offset: u64, n: u32, max: u32 },
-
-    #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
-    ExtensionRecordTooLarge {
-        offset: u64,
-        subtype: u32,
-        size: u32,
-        count: u32,
-    },
-
-    #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
-    EofInCase {
-        offset: u64,
-        case_ofs: u64,
-        case_len: usize,
-    },
-
-    #[error(
-        "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
-    )]
-    EofInCompressedCase { offset: u64, case_ofs: u64 },
-
-    #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
-    PartialCompressedCase { offset: u64, case_ofs: u64 },
-
-    #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
-    CompressedNumberExpected { offset: u64, case_ofs: u64 },
-
-    #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
-    CompressedStringExpected { offset: u64, case_ofs: u64 },
-
-    #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
-    BadZlibTrailerNBlocks {
-        offset: u64,
-        n_blocks: u32,
-        expected_n_blocks: u64,
-        ztrailer_len: u64,
-    },
-
-    #[error("{0}")]
-    EncodingError(EncodingError),
-}
-
-#[derive(ThisError, Debug)]
-pub enum Warning {
-    #[error("Unexpected end of data inside extension record.")]
-    UnexpectedEndOfData,
-
-    #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
-    NoVarIndexes { offset: u64 },
-
-    #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
-    MixedVarTypes {
-        offset: u64,
-        var_type: VarType,
-        wrong_types: Vec<u32>,
-    },
-
-    #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}")]
-    InvalidVarIndexes {
-        offset: u64,
-        max: usize,
-        invalid: Vec<u32>,
-    },
-
-    #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
-    BadRecordSize {
-        offset: u64,
-        record: String,
-        size: u32,
-        expected_size: u32,
-    },
-
-    #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
-    BadRecordCount {
-        offset: u64,
-        record: String,
-        count: u32,
-        expected_count: u32,
-    },
-
-    #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
-    BadLongMissingValueLength {
-        record_offset: u64,
-        offset: u64,
-        value_len: u32,
-    },
-
-    #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
-    BadEncodingName { offset: u64 },
-
-    // XXX This is risky because `text` might be arbitarily long.
-    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
-    MalformedString { encoding: String, text: String },
-
-    #[error("Invalid variable measurement level value {0}")]
-    InvalidMeasurement(u32),
-
-    #[error("Invalid variable display alignment value {0}")]
-    InvalidAlignment(u32),
-
-    #[error("Invalid attribute name.  {0}")]
-    InvalidAttributeName(IdError),
-
-    #[error("Invalid variable name in attribute record.  {0}")]
-    InvalidAttributeVariableName(IdError),
-
-    #[error("Invalid short name in long variable name record.  {0}")]
-    InvalidShortName(IdError),
-
-    #[error("Invalid name in long variable name record.  {0}")]
-    InvalidLongName(IdError),
-
-    #[error("Invalid variable name in very long string record.  {0}")]
-    InvalidLongStringName(IdError),
-
-    #[error("Invalid variable name in variable set record.  {0}")]
-    InvalidVariableSetName(IdError),
-
-    #[error("Invalid multiple response set name.  {0}")]
-    InvalidMrSetName(IdError),
-
-    #[error("Invalid multiple response set variable name.  {0}")]
-    InvalidMrSetVariableName(IdError),
-
-    #[error("Invalid variable name in long string missing values record.  {0}")]
-    InvalidLongStringMissingValueVariableName(IdError),
-
-    #[error("Invalid variable name in long string value label record.  {0}")]
-    InvalidLongStringValueLabelName(IdError),
-
-    #[error("{0}")]
-    EncodingError(EncodingError),
-
-    #[error("Details TBD")]
-    TBD,
-}
-
-impl From<IoError> for Warning {
-    fn from(_source: IoError) -> Self {
-        Self::UnexpectedEndOfData
-    }
-}
-
-#[derive(Clone, Debug)]
-pub enum Record {
-    Header(HeaderRecord<RawString>),
-    Variable(VariableRecord<RawString>),
-    ValueLabel(ValueLabelRecord<RawStrArray<8>, RawString>),
-    Document(DocumentRecord<RawDocumentLine>),
-    IntegerInfo(IntegerInfoRecord),
-    FloatInfo(FloatInfoRecord),
-    VarDisplay(VarDisplayRecord),
-    MultipleResponse(MultipleResponseRecord<RawString, RawString>),
-    LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
-    LongStringMissingValues(LongStringMissingValueRecord<RawString>),
-    Encoding(EncodingRecord),
-    NumberOfCases(NumberOfCasesRecord),
-    Text(TextRecord),
-    OtherExtension(Extension),
-    EndOfHeaders(u32),
-    ZHeader(ZHeader),
-    ZTrailer(ZTrailer),
-    Cases(Rc<RefCell<Cases>>),
-}
-
-#[derive(Clone, Debug)]
-pub enum DecodedRecord {
-    Header(HeaderRecord<String>),
-    Variable(VariableRecord<String>),
-    ValueLabel(ValueLabelRecord<RawStrArray<8>, String>),
-    Document(DocumentRecord<String>),
-    IntegerInfo(IntegerInfoRecord),
-    FloatInfo(FloatInfoRecord),
-    VarDisplay(VarDisplayRecord),
-    MultipleResponse(MultipleResponseRecord<Identifier, String>),
-    LongStringValueLabels(LongStringValueLabelRecord<Identifier, String>),
-    LongStringMissingValues(LongStringMissingValueRecord<Identifier>),
-    Encoding(EncodingRecord),
-    NumberOfCases(NumberOfCasesRecord),
-    VariableSets(VariableSetRecord),
-    ProductInfo(ProductInfoRecord),
-    LongNames(LongNamesRecord),
-    VeryLongStrings(VeryLongStringsRecord),
-    FileAttributes(FileAttributeRecord),
-    VariableAttributes(VariableAttributeRecord),
-    OtherExtension(Extension),
-    EndOfHeaders(u32),
-    ZHeader(ZHeader),
-    ZTrailer(ZTrailer),
-    Cases(Rc<RefCell<Cases>>),
-}
-
-impl Record {
-    fn read<R>(
-        reader: &mut R,
-        endian: Endian,
-        var_types: &VarTypes,
-        warn: &dyn Fn(Warning),
-    ) -> Result<Option<Record>, Error>
-    where
-        R: Read + Seek,
-    {
-        let rec_type: u32 = endian.parse(read_bytes(reader)?);
-        match rec_type {
-            2 => Ok(Some(VariableRecord::read(reader, endian, warn)?)),
-            3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
-            6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
-            7 => Extension::read(reader, endian, var_types.n_values(), warn),
-            999 => Ok(Some(Record::EndOfHeaders(
-                endian.parse(read_bytes(reader)?),
-            ))),
-            _ => Err(Error::BadRecordType {
-                offset: reader.stream_position()?,
-                rec_type,
-            }),
-        }
-    }
-
-    pub fn decode(self, decoder: &Decoder) -> Result<DecodedRecord, Error> {
-        Ok(match self {
-            Record::Header(record) => record.decode(decoder),
-            Record::Variable(record) => record.decode(decoder),
-            Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
-            Record::Document(record) => record.decode(decoder),
-            Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
-            Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
-            Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
-            Record::MultipleResponse(record) => record.decode(decoder),
-            Record::LongStringValueLabels(record) => {
-                DecodedRecord::LongStringValueLabels(record.decode(decoder))
-            }
-            Record::LongStringMissingValues(record) => {
-                DecodedRecord::LongStringMissingValues(record.decode(decoder))
-            }
-            Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
-            Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
-            Record::Text(record) => record.decode(decoder),
-            Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
-            Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
-            Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
-            Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
-            Record::Cases(record) => DecodedRecord::Cases(record.clone()),
-        })
-    }
-}
-
-pub fn encoding_from_headers(
-    headers: &Vec<Record>,
-    warn: &impl Fn(Warning),
-) -> Result<&'static Encoding, Error> {
-    let mut encoding_record = None;
-    let mut integer_info_record = None;
-    for record in headers {
-        match record {
-            Record::Encoding(record) => encoding_record = Some(record),
-            Record::IntegerInfo(record) => integer_info_record = Some(record),
-            _ => (),
-        }
-    }
-    let encoding = encoding_record.map(|record| record.0.as_str());
-    let character_code = integer_info_record.map(|record| record.character_code);
-    match get_encoding(encoding, character_code) {
-        Ok(encoding) => Ok(encoding),
-        Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
-        Err(err) => {
-            warn(Warning::EncodingError(err));
-            // Warn that we're using the default encoding.
-            Ok(default_encoding())
-        }
-    }
-}
-
-// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
-// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
-fn default_decode(s: &[u8]) -> Cow<str> {
-    from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum Compression {
-    Simple,
-    ZLib,
-}
-
-#[derive(Clone)]
-pub struct HeaderRecord<S>
-where
-    S: Debug,
-{
-    /// Offset in file.
-    pub offsets: Range<u64>,
-
-    /// Magic number.
-    pub magic: Magic,
-
-    /// Eye-catcher string, product name, in the file's encoding.  Padded
-    /// on the right with spaces.
-    pub eye_catcher: S,
-
-    /// Layout code, normally either 2 or 3.
-    pub layout_code: u32,
-
-    /// Number of variable positions, or `None` if the value in the file is
-    /// questionably trustworthy.
-    pub nominal_case_size: Option<u32>,
-
-    /// Compression type, if any,
-    pub compression: Option<Compression>,
-
-    /// 1-based variable index of the weight variable, or `None` if the file is
-    /// unweighted.
-    pub weight_index: Option<u32>,
-
-    /// Claimed number of cases, if known.
-    pub n_cases: Option<u32>,
-
-    /// Compression bias, usually 100.0.
-    pub bias: f64,
-
-    /// `dd mmm yy` in the file's encoding.
-    pub creation_date: S,
-
-    /// `HH:MM:SS` in the file's encoding.
-    pub creation_time: S,
-
-    /// File label, in the file's encoding.  Padded on the right with spaces.
-    pub file_label: S,
-
-    /// Endianness of the data in the file header.
-    pub endian: Endian,
-}
-
-impl<S> HeaderRecord<S>
-where
-    S: Debug,
-{
-    fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
-    where
-        T: Debug,
-    {
-        writeln!(f, "{name:>17}: {:?}", value)
-    }
-}
-
-impl<S> Debug for HeaderRecord<S>
-where
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        writeln!(f, "File header record:")?;
-        self.debug_field(f, "Magic", self.magic)?;
-        self.debug_field(f, "Product name", &self.eye_catcher)?;
-        self.debug_field(f, "Layout code", self.layout_code)?;
-        self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
-        self.debug_field(f, "Compression", self.compression)?;
-        self.debug_field(f, "Weight index", self.weight_index)?;
-        self.debug_field(f, "Number of cases", self.n_cases)?;
-        self.debug_field(f, "Compression bias", self.bias)?;
-        self.debug_field(f, "Creation date", &self.creation_date)?;
-        self.debug_field(f, "Creation time", &self.creation_time)?;
-        self.debug_field(f, "File label", &self.file_label)?;
-        self.debug_field(f, "Endianness", self.endian)
-    }
-}
-
-impl HeaderRecord<RawString> {
-    fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
-        let start = r.stream_position()?;
-
-        let magic: [u8; 4] = read_bytes(r)?;
-        let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
-
-        let eye_catcher = RawString(read_vec(r, 60)?);
-        let layout_code: [u8; 4] = read_bytes(r)?;
-        let endian = Endian::identify_u32(2, layout_code)
-            .or_else(|| Endian::identify_u32(2, layout_code))
-            .ok_or(Error::NotASystemFile)?;
-        let layout_code = endian.parse(layout_code);
-
-        let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
-        let nominal_case_size =
-            (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
-
-        let compression_code: u32 = endian.parse(read_bytes(r)?);
-        let compression = match (magic, compression_code) {
-            (Magic::Zsav, 2) => Some(Compression::ZLib),
-            (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
-            (_, 0) => None,
-            (_, 1) => Some(Compression::Simple),
-            (_, code) => return Err(Error::InvalidSavCompression(code)),
-        };
-
-        let weight_index: u32 = endian.parse(read_bytes(r)?);
-        let weight_index = (weight_index > 0).then_some(weight_index);
-
-        let n_cases: u32 = endian.parse(read_bytes(r)?);
-        let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
-
-        let bias: f64 = endian.parse(read_bytes(r)?);
-
-        let creation_date = RawString(read_vec(r, 9)?);
-        let creation_time = RawString(read_vec(r, 8)?);
-        let file_label = RawString(read_vec(r, 64)?);
-        let _: [u8; 3] = read_bytes(r)?;
-
-        Ok(HeaderRecord {
-            offsets: start..r.stream_position()?,
-            magic,
-            layout_code,
-            nominal_case_size,
-            compression,
-            weight_index,
-            n_cases,
-            bias,
-            creation_date,
-            creation_time,
-            eye_catcher,
-            file_label,
-            endian,
-        })
-    }
-
-    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
-        let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
-        let file_label = decoder.decode(&self.file_label).to_string();
-        let creation_date = decoder.decode(&self.creation_date).to_string();
-        let creation_time = decoder.decode(&self.creation_time).to_string();
-        DecodedRecord::Header(HeaderRecord {
-            eye_catcher,
-            weight_index: self.weight_index,
-            n_cases: self.n_cases,
-            file_label,
-            offsets: self.offsets.clone(),
-            magic: self.magic,
-            layout_code: self.layout_code,
-            nominal_case_size: self.nominal_case_size,
-            compression: self.compression,
-            bias: self.bias,
-            creation_date,
-            creation_time,
-            endian: self.endian,
-        })
-    }
-}
-
-pub struct Decoder {
-    pub encoding: &'static Encoding,
-    pub warn: Box<dyn Fn(Warning)>,
-}
-
-impl Decoder {
-    pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
-    where
-        F: Fn(Warning) + 'static,
-    {
-        Self {
-            encoding,
-            warn: Box::new(warn),
-        }
-    }
-    fn warn(&self, warning: Warning) {
-        (self.warn)(warning)
-    }
-    fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
-        let (output, malformed) = self.encoding.decode_without_bom_handling(input);
-        if malformed {
-            self.warn(Warning::MalformedString {
-                encoding: self.encoding.name().into(),
-                text: output.clone().into(),
-            });
-        }
-        output
-    }
-
-    fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
-        self.decode_slice(input.0.as_slice())
-    }
-
-    pub fn decode_identifier(&self, input: &RawString) -> Result<Identifier, IdError> {
-        self.new_identifier(&self.decode(input))
-    }
-
-    pub fn new_identifier(&self, name: &str) -> Result<Identifier, IdError> {
-        Identifier::from_encoding(name, self.encoding)
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub enum Magic {
-    /// Regular system file.
-    Sav,
-
-    /// System file with Zlib-compressed data.
-    Zsav,
-
-    /// EBCDIC-encoded system file.
-    Ebcdic,
-}
-
-impl Magic {
-    /// Magic number for a regular system file.
-    pub const SAV: [u8; 4] = *b"$FL2";
-
-    /// Magic number for a system file that contains zlib-compressed data.
-    pub const ZSAV: [u8; 4] = *b"$FL3";
-
-    /// Magic number for an EBCDIC-encoded system file.  This is `$FL2` encoded
-    /// in EBCDIC.
-    pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
-}
-
-impl Debug for Magic {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        let s = match *self {
-            Magic::Sav => "$FL2",
-            Magic::Zsav => "$FL3",
-            Magic::Ebcdic => "($FL2 in EBCDIC)",
-        };
-        write!(f, "{s}")
-    }
-}
-
-impl TryFrom<[u8; 4]> for Magic {
-    type Error = Error;
-
-    fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
-        match value {
-            Magic::SAV => Ok(Magic::Sav),
-            Magic::ZSAV => Ok(Magic::Zsav),
-            Magic::EBCDIC => Ok(Magic::Ebcdic),
-            _ => Err(Error::BadMagic(value)),
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum VarType {
-    Numeric,
-    String,
-}
-
-impl VarType {
-    pub fn opposite(self) -> VarType {
-        match self {
-            Self::Numeric => Self::String,
-            Self::String => Self::Numeric,
-        }
-    }
-}
-
-impl Display for VarType {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        match self {
-            VarType::Numeric => write!(f, "numeric"),
-            VarType::String => write!(f, "string"),
-        }
-    }
-}
-
-impl TryFrom<RawWidth> for VarType {
-    type Error = ();
-
-    fn try_from(value: RawWidth) -> Result<Self, Self::Error> {
-        match value {
-            RawWidth::Continuation => Err(()),
-            RawWidth::Numeric => Ok(VarType::Numeric),
-            RawWidth::String(_) => Ok(VarType::String),
-        }
-    }
-}
-
-impl TryFrom<RawWidth> for VarWidth {
-    type Error = ();
-
-    fn try_from(value: RawWidth) -> Result<Self, Self::Error> {
-        match value {
-            RawWidth::Continuation => Err(()),
-            RawWidth::Numeric => Ok(Self::Numeric),
-            RawWidth::String(width) => Ok(Self::String(width.get() as u16)),
-        }
-    }
-}
-
-type RawValue = Value<RawStrArray<8>>;
-
-impl RawValue {
-    pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
-        match var_type {
-            VarType::String => Value::String(RawStrArray(raw.0)),
-            VarType::Numeric => Value::Number(endian.parse(raw.0)),
-        }
-    }
-
-    fn read_case<R: Read + Seek>(
-        reader: &mut R,
-        var_types: &VarTypes,
-        endian: Endian,
-    ) -> Result<Option<Vec<Self>>, Error> {
-        let case_start = reader.stream_position()?;
-        let mut values = Vec::with_capacity(var_types.n_values());
-        for (i, var_type) in var_types.iter().enumerate() {
-            let Some(raw) = try_read_bytes(reader)? else {
-                if i == 0 {
-                    return Ok(None);
-                } else {
-                    let offset = reader.stream_position()?;
-                    return Err(Error::EofInCase {
-                        offset,
-                        case_ofs: offset - case_start,
-                        case_len: var_types.n_values() * 8,
-                    });
-                }
-            };
-            values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
-        }
-        Ok(Some(values))
-    }
-
-    fn read_compressed_case<R: Read + Seek>(
-        reader: &mut R,
-        var_types: &VarTypes,
-        codes: &mut VecDeque<u8>,
-        endian: Endian,
-        bias: f64,
-    ) -> Result<Option<Vec<Self>>, Error> {
-        let case_start = reader.stream_position()?;
-        let mut values = Vec::with_capacity(var_types.n_values());
-        for (i, var_type) in var_types.iter().enumerate() {
-            let value = loop {
-                let Some(code) = codes.pop_front() else {
-                    let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
-                        if i == 0 {
-                            return Ok(None);
-                        } else {
-                            let offset = reader.stream_position()?;
-                            return Err(Error::EofInCompressedCase {
-                                offset,
-                                case_ofs: offset - case_start,
-                            });
-                        }
-                    };
-                    codes.extend(new_codes.into_iter());
-                    continue;
-                };
-                match code {
-                    0 => (),
-                    1..=251 => match var_type {
-                        VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
-                        VarType::String => {
-                            break Self::String(RawStrArray(endian.to_bytes(code as f64 - bias)))
-                        }
-                    },
-                    252 => {
-                        if i == 0 {
-                            return Ok(None);
-                        } else {
-                            let offset = reader.stream_position()?;
-                            return Err(Error::PartialCompressedCase {
-                                offset,
-                                case_ofs: offset - case_start,
-                            });
-                        }
-                    }
-                    253 => {
-                        break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
-                    }
-                    254 => match var_type {
-                        VarType::String => break Self::String(RawStrArray(*b"        ")), // XXX EBCDIC
-                        VarType::Numeric => {
-                            return Err(Error::CompressedStringExpected {
-                                offset: case_start,
-                                case_ofs: reader.stream_position()? - case_start,
-                            })
-                        }
-                    },
-                    255 => match var_type {
-                        VarType::Numeric => break Self::Number(None),
-                        VarType::String => {
-                            return Err(Error::CompressedNumberExpected {
-                                offset: case_start,
-                                case_ofs: reader.stream_position()? - case_start,
-                            })
-                        }
-                    },
-                }
-            };
-            values.push(value);
-        }
-        Ok(Some(values))
-    }
-
-    pub fn decode(&self, width: VarWidth) -> Value {
-        match self {
-            Self::Number(x) => Value::Number(*x),
-            Self::String(s) => {
-                let width = width.as_string_width().unwrap();
-                Value::String(RawString::from(&s.0[..width]))
-            }
-        }
-    }
-}
-
-struct ZlibDecodeMultiple<R>
-where
-    R: Read + Seek,
-{
-    reader: Option<ZlibDecoder<R>>,
-}
-
-impl<R> ZlibDecodeMultiple<R>
-where
-    R: Read + Seek,
-{
-    fn new(reader: R) -> ZlibDecodeMultiple<R> {
-        ZlibDecodeMultiple {
-            reader: Some(ZlibDecoder::new(reader)),
-        }
-    }
-}
-
-impl<R> Read for ZlibDecodeMultiple<R>
-where
-    R: Read + Seek,
-{
-    fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
-        loop {
-            match self.reader.as_mut().unwrap().read(buf)? {
-                0 => {
-                    let inner = self.reader.take().unwrap().into_inner();
-                    self.reader = Some(ZlibDecoder::new(inner));
-                }
-                n => return Ok(n),
-            };
-        }
-    }
-}
-
-impl<R> Seek for ZlibDecodeMultiple<R>
-where
-    R: Read + Seek,
-{
-    fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
-        self.reader.as_mut().unwrap().get_mut().seek(pos)
-    }
-}
-
-enum ReaderState {
-    Start,
-    Headers,
-    ZlibHeader,
-    ZlibTrailer {
-        ztrailer_offset: u64,
-        ztrailer_len: u64,
-    },
-    Cases,
-    End,
-}
-
-pub struct Reader<R>
-where
-    R: Read + Seek + 'static,
-{
-    reader: Option<R>,
-    warn: Box<dyn Fn(Warning)>,
-
-    header: HeaderRecord<RawString>,
-    var_types: VarTypes,
-
-    state: ReaderState,
-}
-
-impl<R> Reader<R>
-where
-    R: Read + Seek + 'static,
-{
-    pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
-    where
-        F: Fn(Warning) + 'static,
-    {
-        let header = HeaderRecord::read(&mut reader)?;
-        Ok(Self {
-            reader: Some(reader),
-            warn: Box::new(warn),
-            header,
-            var_types: VarTypes::new(),
-            state: ReaderState::Start,
-        })
-    }
-    fn cases(&mut self) -> Cases {
-        self.state = ReaderState::End;
-        Cases::new(
-            self.reader.take().unwrap(),
-            take(&mut self.var_types),
-            &self.header,
-        )
-    }
-    fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
-        match self.state {
-            ReaderState::Start => {
-                self.state = ReaderState::Headers;
-                Some(Ok(Record::Header(self.header.clone())))
-            }
-            ReaderState::Headers => {
-                let record = loop {
-                    match Record::read(
-                        self.reader.as_mut().unwrap(),
-                        self.header.endian,
-                        &self.var_types,
-                        &self.warn,
-                    ) {
-                        Ok(Some(record)) => break record,
-                        Ok(None) => (),
-                        Err(error) => return Some(Err(error)),
-                    }
-                };
-                match record {
-                    Record::Variable(VariableRecord { width, .. }) => self.var_types.push(width),
-                    Record::EndOfHeaders(_) => {
-                        self.state = if let Some(Compression::ZLib) = self.header.compression {
-                            ReaderState::ZlibHeader
-                        } else {
-                            ReaderState::Cases
-                        };
-                    }
-                    _ => (),
-                };
-                Some(Ok(record))
-            }
-            ReaderState::ZlibHeader => {
-                let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
-                {
-                    Ok(zheader) => zheader,
-                    Err(error) => return Some(Err(error)),
-                };
-                self.state = ReaderState::ZlibTrailer {
-                    ztrailer_offset: zheader.ztrailer_offset,
-                    ztrailer_len: zheader.ztrailer_len,
-                };
-                Some(Ok(Record::ZHeader(zheader)))
-            }
-            ReaderState::ZlibTrailer {
-                ztrailer_offset,
-                ztrailer_len,
-            } => {
-                match ZTrailer::read(
-                    self.reader.as_mut().unwrap(),
-                    self.header.endian,
-                    ztrailer_offset,
-                    ztrailer_len,
-                ) {
-                    Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
-                    Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
-                    Err(error) => Some(Err(error)),
-                }
-            }
-            ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
-            ReaderState::End => None,
-        }
-    }
-}
-
-impl<R> Iterator for Reader<R>
-where
-    R: Read + Seek + 'static,
-{
-    type Item = Result<Record, Error>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let retval = self._next();
-        if matches!(retval, Some(Err(_))) {
-            self.state = ReaderState::End;
-        }
-        retval
-    }
-}
-
-trait ReadSeek: Read + Seek {}
-impl<T> ReadSeek for T where T: Read + Seek {}
-
-pub struct Cases {
-    reader: Box<dyn ReadSeek>,
-    var_types: VarTypes,
-    compression: Option<Compression>,
-    bias: f64,
-    endian: Endian,
-    codes: VecDeque<u8>,
-    eof: bool,
-}
-
-impl Debug for Cases {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "Cases")
-    }
-}
-
-impl Cases {
-    fn new<R>(reader: R, var_types: VarTypes, header: &HeaderRecord<RawString>) -> Self
-    where
-        R: Read + Seek + 'static,
-    {
-        Self {
-            reader: if header.compression == Some(Compression::ZLib) {
-                Box::new(ZlibDecodeMultiple::new(reader))
-            } else {
-                Box::new(reader)
-            },
-            var_types,
-            compression: header.compression,
-            bias: header.bias,
-            endian: header.endian,
-            codes: VecDeque::with_capacity(8),
-            eof: false,
-        }
-    }
-}
-
-impl Iterator for Cases {
-    type Item = Result<Vec<RawValue>, Error>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.eof {
-            return None;
-        }
-
-        let retval = if self.compression.is_some() {
-            Value::read_compressed_case(
-                &mut self.reader,
-                &self.var_types,
-                &mut self.codes,
-                self.endian,
-                self.bias,
-            )
-            .transpose()
-        } else {
-            Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
-        };
-        self.eof = matches!(retval, None | Some(Err(_)));
-        retval
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub struct Spec(pub u32);
-
-impl Debug for Spec {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        let type_ = format_name(self.0 >> 16);
-        let w = (self.0 >> 8) & 0xff;
-        let d = self.0 & 0xff;
-        write!(f, "{:06x} ({type_}{w}.{d})", self.0)
-    }
-}
-
-fn format_name(type_: u32) -> Cow<'static, str> {
-    match type_ {
-        1 => "A",
-        2 => "AHEX",
-        3 => "COMMA",
-        4 => "DOLLAR",
-        5 => "F",
-        6 => "IB",
-        7 => "PIBHEX",
-        8 => "P",
-        9 => "PIB",
-        10 => "PK",
-        11 => "RB",
-        12 => "RBHEX",
-        15 => "Z",
-        16 => "N",
-        17 => "E",
-        20 => "DATE",
-        21 => "TIME",
-        22 => "DATETIME",
-        23 => "ADATE",
-        24 => "JDATE",
-        25 => "DTIME",
-        26 => "WKDAY",
-        27 => "MONTH",
-        28 => "MOYR",
-        29 => "QYR",
-        30 => "WKYR",
-        31 => "PCT",
-        32 => "DOT",
-        33 => "CCA",
-        34 => "CCB",
-        35 => "CCC",
-        36 => "CCD",
-        37 => "CCE",
-        38 => "EDATE",
-        39 => "SDATE",
-        40 => "MTIME",
-        41 => "YMDHMS",
-        _ => return format!("<unknown format {type_}>").into(),
-    }
-    .into()
-}
-
-#[derive(Clone)]
-pub struct MissingValues<S = Box<[u8]>>
-where
-    S: Debug,
-{
-    /// Individual missing values, up to 3 of them.
-    pub values: Vec<Value<S>>,
-
-    /// Optional range of missing values.
-    pub range: Option<(Value<S>, Value<S>)>,
-}
-
-impl<S> Debug for MissingValues<S>
-where
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        for (i, value) in self.values.iter().enumerate() {
-            if i > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "{value:?}")?;
-        }
-
-        if let Some((low, high)) = &self.range {
-            if !self.values.is_empty() {
-                write!(f, ", ")?;
-            }
-            write!(f, "{low:?} THRU {high:?}")?;
-        }
-
-        if self.is_empty() {
-            write!(f, "none")?;
-        }
-
-        Ok(())
-    }
-}
-
-impl<S> MissingValues<S>
-where
-    S: Debug,
-{
-    fn is_empty(&self) -> bool {
-        self.values.is_empty() && self.range.is_none()
-    }
-}
-
-impl<S> Default for MissingValues<S>
-where
-    S: Debug,
-{
-    fn default() -> Self {
-        Self {
-            values: Vec::new(),
-            range: None,
-        }
-    }
-}
-
-impl MissingValues {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        offset: u64,
-        width: RawWidth,
-        code: i32,
-        endian: Endian,
-        warn: &dyn Fn(Warning),
-    ) -> Result<Self, Error> {
-        let (individual_values, has_range) = match code {
-            0..=3 => (code as usize, false),
-            -2 => (0, true),
-            -3 => (1, true),
-            _ => return Err(Error::BadMissingValueCode { offset, code }),
-        };
-
-        let mut values = Vec::with_capacity(individual_values);
-        for _ in 0..individual_values {
-            values.push(read_bytes::<8, _>(r)?);
-        }
-        let range = if has_range {
-            let low = read_bytes::<8, _>(r)?;
-            let high = read_bytes::<8, _>(r)?;
-            Some((low, high))
-        } else {
-            None
-        };
-
-        match VarWidth::try_from(width) {
-            Ok(VarWidth::Numeric) => {
-                let values = values
-                    .into_iter()
-                    .map(|v| Value::Number(endian.parse(v)))
-                    .collect();
-                let range = range.map(|(low, high)| {
-                    (
-                        Value::Number(endian.parse(low)),
-                        Value::Number(endian.parse(high)),
-                    )
-                });
-                return Ok(Self { values, range });
-            }
-            Ok(VarWidth::String(width)) if width <= 8 && range.is_none() => {
-                let values = values
-                    .into_iter()
-                    .map(|value| Value::String(Box::from(&value[..width as usize])))
-                    .collect();
-                return Ok(Self {
-                    values,
-                    range: None,
-                });
-            }
-            Ok(VarWidth::String(width)) if width > 8 => warn(Warning::TBD),
-            Ok(VarWidth::String(_)) => warn(Warning::TBD),
-            Err(()) => warn(Warning::TBD),
-        }
-        Ok(Self::default())
-    }
-}
-
-#[derive(Clone)]
-pub struct VariableRecord<S>
-where
-    S: Debug,
-{
-    /// Range of offsets in file.
-    pub offsets: Range<u64>,
-
-    /// Variable width, in the range -1..=255.
-    pub width: RawWidth,
-
-    /// Variable name, padded on the right with spaces.
-    pub name: S,
-
-    /// Print format.
-    pub print_format: Spec,
-
-    /// Write format.
-    pub write_format: Spec,
-
-    /// Missing values.
-    pub missing_values: MissingValues,
-
-    /// Optional variable label.
-    pub label: Option<S>,
-}
-
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub enum RawWidth {
-    Continuation,
-    Numeric,
-    String(NonZeroU8),
-}
-
-impl RawWidth {
-    pub fn n_values(&self) -> Option<usize> {
-        match self {
-            RawWidth::Numeric => Some(1),
-            RawWidth::String(width) => Some((width.get() as usize).div_ceil(8)),
-            _ => None,
-        }
-    }
-}
-
-impl TryFrom<i32> for RawWidth {
-    type Error = ();
-
-    fn try_from(value: i32) -> Result<Self, Self::Error> {
-        match value {
-            -1 => Ok(Self::Continuation),
-            0 => Ok(Self::Numeric),
-            1..=255 => Ok(Self::String(NonZeroU8::new(value as u8).unwrap())),
-            _ => Err(()),
-        }
-    }
-}
-
-impl Display for RawWidth {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        match self {
-            RawWidth::Continuation => write!(f, "long string continuation"),
-            RawWidth::Numeric => write!(f, "numeric"),
-            RawWidth::String(width) => write!(f, "{width}-byte string"),
-        }
-    }
-}
-
-impl<S> Debug for VariableRecord<S>
-where
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        writeln!(f, "Width: {}", self.width,)?;
-        writeln!(f, "Print format: {:?}", self.print_format)?;
-        writeln!(f, "Write format: {:?}", self.write_format)?;
-        writeln!(f, "Name: {:?}", &self.name)?;
-        writeln!(f, "Variable label: {:?}", self.label)?;
-        writeln!(f, "Missing values: {:?}", self.missing_values)
-    }
-}
-
-impl VariableRecord<RawString> {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        warn: &dyn Fn(Warning),
-    ) -> Result<Record, Error> {
-        let start_offset = r.stream_position()?;
-        let width: i32 = endian.parse(read_bytes(r)?);
-        let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
-            start_offset,
-            width,
-        })?;
-        let code_offset = r.stream_position()?;
-        let has_variable_label: u32 = endian.parse(read_bytes(r)?);
-        let missing_value_code: i32 = endian.parse(read_bytes(r)?);
-        let print_format = Spec(endian.parse(read_bytes(r)?));
-        let write_format = Spec(endian.parse(read_bytes(r)?));
-        let name = RawString(read_vec(r, 8)?);
-
-        let label = match has_variable_label {
-            0 => None,
-            1 => {
-                let len: u32 = endian.parse(read_bytes(r)?);
-                let read_len = len.min(65535) as usize;
-                let label = RawString(read_vec(r, read_len)?);
-
-                let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
-                let _ = read_vec(r, padding_bytes as usize)?;
-
-                Some(label)
-            }
-            _ => {
-                return Err(Error::BadVariableLabelCode {
-                    start_offset,
-                    code_offset,
-                    code: has_variable_label,
-                })
-            }
-        };
-
-        let missing_values =
-            MissingValues::read(r, start_offset, width, missing_value_code, endian, warn)?;
-
-        let end_offset = r.stream_position()?;
-
-        Ok(Record::Variable(VariableRecord {
-            offsets: start_offset..end_offset,
-            width,
-            name,
-            print_format,
-            write_format,
-            missing_values,
-            label,
-        }))
-    }
-
-    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
-        DecodedRecord::Variable(VariableRecord {
-            offsets: self.offsets.clone(),
-            width: self.width,
-            name: decoder.decode(&self.name).to_string(),
-            print_format: self.print_format,
-            write_format: self.write_format,
-            missing_values: self.missing_values,
-            label: self
-                .label
-                .as_ref()
-                .map(|label| decoder.decode(label).to_string()),
-        })
-    }
-}
-
-#[derive(Copy, Clone)]
-pub struct UntypedValue(pub [u8; 8]);
-
-impl Debug for UntypedValue {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        let little: f64 = Endian::Little.parse(self.0);
-        let little = format!("{:?}", little);
-        let big: f64 = Endian::Big.parse(self.0);
-        let big = format!("{:?}", big);
-        let number = if little.len() <= big.len() {
-            little
-        } else {
-            big
-        };
-        write!(f, "{number}")?;
-
-        let string = default_decode(&self.0);
-        let string = string
-            .split(|c: char| c == '\0' || c.is_control())
-            .next()
-            .unwrap();
-        write!(f, "{string:?}")?;
-        Ok(())
-    }
-}
-
-#[derive(Clone, PartialEq, Default, Eq, PartialOrd, Ord, Hash)]
-pub struct RawString(pub Vec<u8>);
-
-impl RawString {
-    pub fn spaces(n: usize) -> Self {
-        Self(std::iter::repeat_n(b' ', n).collect())
-    }
-    pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
-        EncodedStr::new(&self.0, encoding)
-    }
-}
-
-impl From<Cow<'_, [u8]>> for RawString {
-    fn from(value: Cow<'_, [u8]>) -> Self {
-        Self(value.into_owned())
-    }
-}
-
-impl From<Vec<u8>> for RawString {
-    fn from(source: Vec<u8>) -> Self {
-        Self(source)
-    }
-}
-
-impl From<&[u8]> for RawString {
-    fn from(source: &[u8]) -> Self {
-        Self(source.into())
-    }
-}
-
-impl Debug for RawString {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{:?}", default_decode(self.0.as_slice()))
-    }
-}
-
-#[derive(Copy, Clone)]
-pub struct RawStrArray<const N: usize>(pub [u8; N]);
-
-impl<const N: usize> From<[u8; N]> for RawStrArray<N> {
-    fn from(source: [u8; N]) -> Self {
-        Self(source)
-    }
-}
-
-impl<const N: usize> Debug for RawStrArray<N> {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{:?}", default_decode(&self.0))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub enum EncodedString {
-    Encoded {
-        bytes: Vec<u8>,
-        encoding: &'static Encoding,
-    },
-    Utf8 {
-        s: String,
-    },
-}
-
-impl EncodedString {
-    pub fn borrowed(&self) -> EncodedStr<'_> {
-        match self {
-            EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded { bytes, encoding },
-            EncodedString::Utf8 { s } => EncodedStr::Utf8 { s },
-        }
-    }
-}
-
-impl<'a> From<EncodedStr<'a>> for EncodedString {
-    fn from(value: EncodedStr<'a>) -> Self {
-        match value {
-            EncodedStr::Encoded { bytes, encoding } => Self::Encoded {
-                bytes: bytes.into(),
-                encoding,
-            },
-            EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
-        }
-    }
-}
-
-pub enum EncodedStr<'a> {
-    Encoded {
-        bytes: &'a [u8],
-        encoding: &'static Encoding,
-    },
-    Utf8 {
-        s: &'a str,
-    },
-}
-
-impl<'a> EncodedStr<'a> {
-    pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
-        Self::Encoded { bytes, encoding }
-    }
-    pub fn as_str(&self) -> Cow<'_, str> {
-        match self {
-            EncodedStr::Encoded { bytes, encoding } => {
-                encoding.decode_without_bom_handling(bytes).0
-            }
-            EncodedStr::Utf8 { s } => Cow::from(*s),
-        }
-    }
-    pub fn as_bytes(&self) -> &[u8] {
-        match self {
-            EncodedStr::Encoded { bytes, .. } => bytes,
-            EncodedStr::Utf8 { s } => s.as_bytes(),
-        }
-    }
-    pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
-        match self {
-            EncodedStr::Encoded { bytes, encoding } => {
-                let utf8 = encoding.decode_without_bom_handling(bytes).0;
-                match encoding.encode(&utf8).0 {
-                    Cow::Borrowed(_) => {
-                        // Recoding into UTF-8 and then back did not change anything.
-                        Cow::from(*bytes)
-                    }
-                    Cow::Owned(owned) => Cow::Owned(owned),
-                }
-            }
-            EncodedStr::Utf8 { s } => encoding.encode(s).0,
-        }
-    }
-    pub fn is_empty(&self) -> bool {
-        match self {
-            EncodedStr::Encoded { bytes, .. } => bytes.is_empty(),
-            EncodedStr::Utf8 { s } => s.is_empty(),
-        }
-    }
-    pub fn quoted(&self) -> QuotedEncodedStr {
-        QuotedEncodedStr(self)
-    }
-}
-
-impl<'a> From<&'a str> for EncodedStr<'a> {
-    fn from(s: &'a str) -> Self {
-        Self::Utf8 { s }
-    }
-}
-
-impl<'a> From<&'a String> for EncodedStr<'a> {
-    fn from(s: &'a String) -> Self {
-        Self::Utf8 { s: s.as_str() }
-    }
-}
-
-pub struct QuotedEncodedStr<'a>(&'a EncodedStr<'a>);
-
-impl Display for QuotedEncodedStr<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self.0.as_str())
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ValueLabel<V, S>
-where
-    V: Debug,
-    S: Debug,
-{
-    pub value: Value<V>,
-    pub label: S,
-}
-
-#[derive(Clone)]
-pub struct ValueLabelRecord<V, S>
-where
-    V: Debug,
-    S: Debug,
-{
-    /// Range of offsets in file.
-    pub offsets: Range<u64>,
-
-    /// The labels.
-    pub labels: Vec<ValueLabel<V, S>>,
-
-    /// The 1-based indexes of the variable indexes.
-    pub dict_indexes: Vec<u32>,
-
-    /// The types of the variables.
-    pub var_type: VarType,
-}
-
-impl<V, S> Debug for ValueLabelRecord<V, S>
-where
-    V: Debug,
-    S: Debug,
-{
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        writeln!(f, "labels: ")?;
-        for label in self.labels.iter() {
-            writeln!(f, "{label:?}")?;
-        }
-        write!(f, "apply to {} variables", self.var_type)?;
-        for dict_index in self.dict_indexes.iter() {
-            write!(f, " #{dict_index}")?;
-        }
-        Ok(())
-    }
-}
-
-impl<V, S> ValueLabelRecord<V, S>
-where
-    V: Debug,
-    S: Debug,
-{
-    /// Maximum number of value labels in a record.
-    pub const MAX_LABELS: u32 = u32::MAX / 8;
-
-    /// Maximum number of variable indexes in a record.
-    pub const MAX_INDEXES: u32 = u32::MAX / 8;
-}
-
-impl ValueLabelRecord<RawStrArray<8>, RawString> {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        var_types: &VarTypes,
-        warn: &dyn Fn(Warning),
-    ) -> Result<Option<Record>, Error> {
-        let label_offset = r.stream_position()?;
-        let n: u32 = endian.parse(read_bytes(r)?);
-        if n > Self::MAX_LABELS {
-            return Err(Error::BadNumberOfValueLabels {
-                offset: label_offset,
-                n,
-                max: Self::MAX_LABELS,
-            });
-        }
-
-        let mut labels = Vec::new();
-        for _ in 0..n {
-            let value = UntypedValue(read_bytes(r)?);
-            let label_len: u8 = endian.parse(read_bytes(r)?);
-            let label_len = label_len as usize;
-            let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
-
-            let mut label = read_vec(r, padded_len - 1)?;
-            label.truncate(label_len);
-            labels.push((value, RawString(label)));
-        }
-
-        let index_offset = r.stream_position()?;
-        let rec_type: u32 = endian.parse(read_bytes(r)?);
-        if rec_type != 4 {
-            return Err(Error::ExpectedVarIndexRecord {
-                offset: index_offset,
-                rec_type,
-            });
-        }
-
-        let n: u32 = endian.parse(read_bytes(r)?);
-        if n > Self::MAX_INDEXES {
-            return Err(Error::TooManyVarIndexes {
-                offset: index_offset,
-                n,
-                max: Self::MAX_INDEXES,
-            });
-        } else if n == 0 {
-            warn(Warning::NoVarIndexes {
-                offset: index_offset,
-            });
-            return Ok(None);
-        }
-
-        let index_offset = r.stream_position()?;
-        let mut dict_indexes = Vec::with_capacity(n as usize);
-        let mut invalid_indexes = Vec::new();
-        for _ in 0..n {
-            let index: u32 = endian.parse(read_bytes(r)?);
-            if var_types.is_valid_index(index as usize) {
-                dict_indexes.push(index);
-            } else {
-                invalid_indexes.push(index);
-            }
-        }
-        if !invalid_indexes.is_empty() {
-            warn(Warning::InvalidVarIndexes {
-                offset: index_offset,
-                max: var_types.n_values(),
-                invalid: invalid_indexes,
-            });
-        }
-
-        let Some(&first_index) = dict_indexes.first() else {
-            return Ok(None);
-        };
-        let var_type = var_types.types[first_index as usize - 1].unwrap();
-        let mut wrong_type_indexes = Vec::new();
-        dict_indexes.retain(|&index| {
-            if var_types.types[index as usize - 1] != Some(var_type) {
-                wrong_type_indexes.push(index);
-                false
-            } else {
-                true
-            }
-        });
-        if !wrong_type_indexes.is_empty() {
-            warn(Warning::MixedVarTypes {
-                offset: index_offset,
-                var_type,
-                wrong_types: wrong_type_indexes,
-            });
-        }
-
-        let labels = labels
-            .into_iter()
-            .map(|(value, label)| ValueLabel {
-                value: Value::from_raw(&value, var_type, endian),
-                label,
-            })
-            .collect();
-
-        let end_offset = r.stream_position()?;
-        Ok(Some(Record::ValueLabel(ValueLabelRecord {
-            offsets: label_offset..end_offset,
-            labels,
-            dict_indexes,
-            var_type,
-        })))
-    }
-
-    fn decode(self, decoder: &Decoder) -> ValueLabelRecord<RawStrArray<8>, String> {
-        let labels = self
-            .labels
-            .iter()
-            .map(|ValueLabel { value, label }| ValueLabel {
-                value: value.clone(),
-                label: decoder.decode(label).to_string(),
-            })
-            .collect();
-        ValueLabelRecord {
-            offsets: self.offsets.clone(),
-            labels,
-            dict_indexes: self.dict_indexes.clone(),
-            var_type: self.var_type,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct DocumentRecord<S>
-where
-    S: Debug,
-{
-    pub offsets: Range<u64>,
-
-    /// The document, as an array of lines.  Raw lines are exactly 80 bytes long
-    /// and are right-padded with spaces without any new-line termination.
-    pub lines: Vec<S>,
-}
-
-pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
-
-/// Length of a line in a document.  Document lines are fixed-length and
-/// padded on the right with spaces.
-pub const DOC_LINE_LEN: usize = 80;
-
-impl DocumentRecord<RawDocumentLine> {
-    /// Maximum number of lines we will accept in a document.  This is simply
-    /// the maximum number that will fit in a 32-bit space.
-    pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
-
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
-        let start_offset = r.stream_position()?;
-        let n: u32 = endian.parse(read_bytes(r)?);
-        let n = n as usize;
-        if n > Self::MAX_LINES {
-            Err(Error::BadDocumentLength {
-                offset: start_offset,
-                n,
-                max: Self::MAX_LINES,
-            })
-        } else {
-            let mut lines = Vec::with_capacity(n);
-            for _ in 0..n {
-                lines.push(RawStrArray(read_bytes(r)?));
-            }
-            let end_offset = r.stream_position()?;
-            Ok(Record::Document(DocumentRecord {
-                offsets: start_offset..end_offset,
-                lines,
-            }))
-        }
-    }
-
-    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
-        DecodedRecord::Document(DocumentRecord {
-            offsets: self.offsets.clone(),
-            lines: self
-                .lines
-                .iter()
-                .map(|s| decoder.decode_slice(&s.0).to_string())
-                .collect(),
-        })
-    }
-}
-
-trait ExtensionRecord {
-    const SUBTYPE: u32;
-    const SIZE: Option<u32>;
-    const COUNT: Option<u32>;
-    const NAME: &'static str;
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
-}
-
-#[derive(Clone, Debug)]
-pub struct IntegerInfoRecord {
-    pub offsets: Range<u64>,
-    pub version: (i32, i32, i32),
-    pub machine_code: i32,
-    pub floating_point_rep: i32,
-    pub compression_code: i32,
-    pub endianness: i32,
-    pub character_code: i32,
-}
-
-impl ExtensionRecord for IntegerInfoRecord {
-    const SUBTYPE: u32 = 3;
-    const SIZE: Option<u32> = Some(4);
-    const COUNT: Option<u32> = Some(8);
-    const NAME: &'static str = "integer record";
-
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let data: Vec<i32> = (0..8)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(Record::IntegerInfo(IntegerInfoRecord {
-            offsets: ext.offsets.clone(),
-            version: (data[0], data[1], data[2]),
-            machine_code: data[3],
-            floating_point_rep: data[4],
-            compression_code: data[5],
-            endianness: data[6],
-            character_code: data[7],
-        }))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct FloatInfoRecord {
-    pub sysmis: f64,
-    pub highest: f64,
-    pub lowest: f64,
-}
-
-impl ExtensionRecord for FloatInfoRecord {
-    const SUBTYPE: u32 = 4;
-    const SIZE: Option<u32> = Some(8);
-    const COUNT: Option<u32> = Some(3);
-    const NAME: &'static str = "floating point record";
-
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let data: Vec<f64> = (0..3)
-            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
-            .collect();
-        Ok(Record::FloatInfo(FloatInfoRecord {
-            sysmis: data[0],
-            highest: data[1],
-            lowest: data[2],
-        }))
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum CategoryLabels {
-    VarLabels,
-    CountedValues,
-}
-
-#[derive(Clone, Debug)]
-pub enum MultipleResponseType {
-    MultipleDichotomy {
-        value: RawString,
-        labels: CategoryLabels,
-    },
-    MultipleCategory,
-}
-
-impl MultipleResponseType {
-    fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
-        let (mr_type, input) = match input.split_first() {
-            Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
-            Some((b'D', input)) => {
-                let (value, input) = parse_counted_string(input)?;
-                (
-                    MultipleResponseType::MultipleDichotomy {
-                        value,
-                        labels: CategoryLabels::VarLabels,
-                    },
-                    input,
-                )
-            }
-            Some((b'E', input)) => {
-                let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
-                    (CategoryLabels::CountedValues, rest)
-                } else if let Some(rest) = input.strip_prefix(b" 11 ") {
-                    (CategoryLabels::VarLabels, rest)
-                } else {
-                    return Err(Warning::TBD);
-                };
-                let (value, input) = parse_counted_string(input)?;
-                (
-                    MultipleResponseType::MultipleDichotomy { value, labels },
-                    input,
-                )
-            }
-            _ => return Err(Warning::TBD),
-        };
-        Ok((mr_type, input))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct MultipleResponseSet<I, S>
-where
-    I: Debug,
-    S: Debug,
-{
-    pub name: I,
-    pub label: S,
-    pub mr_type: MultipleResponseType,
-    pub short_names: Vec<I>,
-}
-
-impl MultipleResponseSet<RawString, RawString> {
-    fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
-        let Some(equals) = input.iter().position(|&b| b == b'=') else {
-            return Err(Warning::TBD);
-        };
-        let (name, input) = input.split_at(equals);
-        let (mr_type, input) = MultipleResponseType::parse(input)?;
-        let Some(input) = input.strip_prefix(b" ") else {
-            return Err(Warning::TBD);
-        };
-        let (label, mut input) = parse_counted_string(input)?;
-        let mut vars = Vec::new();
-        while input.first() != Some(&b'\n') {
-            match input.split_first() {
-                Some((b' ', rest)) => {
-                    let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
-                        return Err(Warning::TBD);
-                    };
-                    let (var, rest) = rest.split_at(length);
-                    if !var.is_empty() {
-                        vars.push(var.into());
-                    }
-                    input = rest;
-                }
-                _ => return Err(Warning::TBD),
-            }
-        }
-        while input.first() == Some(&b'\n') {
-            input = &input[1..];
-        }
-        Ok((
-            MultipleResponseSet {
-                name: name.into(),
-                label,
-                mr_type,
-                short_names: vars,
-            },
-            input,
-        ))
-    }
-
-    fn decode(
-        &self,
-        decoder: &Decoder,
-    ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
-        let mut short_names = Vec::with_capacity(self.short_names.len());
-        for short_name in self.short_names.iter() {
-            if let Some(short_name) = decoder
-                .decode_identifier(short_name)
-                .map_err(Warning::InvalidMrSetName)
-                .issue_warning(&decoder.warn)
-            {
-                short_names.push(short_name);
-            }
-        }
-        Ok(MultipleResponseSet {
-            name: decoder
-                .decode_identifier(&self.name)
-                .map_err(Warning::InvalidMrSetVariableName)?,
-            label: decoder.decode(&self.label).to_string(),
-            mr_type: self.mr_type.clone(),
-            short_names,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
-where
-    I: Debug,
-    S: Debug;
-
-impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
-    const SUBTYPE: u32 = 7;
-    const SIZE: Option<u32> = Some(1);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "multiple response set record";
-
-    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let mut sets = Vec::new();
-        while !input.is_empty() {
-            let (set, rest) = MultipleResponseSet::parse(input)?;
-            sets.push(set);
-            input = rest;
-        }
-        Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
-    }
-}
-
-impl MultipleResponseRecord<RawString, RawString> {
-    fn decode(self, decoder: &Decoder) -> DecodedRecord {
-        let mut sets = Vec::new();
-        for set in self.0.iter() {
-            if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
-                sets.push(set);
-            }
-        }
-        DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
-    }
-}
-
-fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
-    let Some(space) = input.iter().position(|&b| b == b' ') else {
-        return Err(Warning::TBD);
-    };
-    let Ok(length) = from_utf8(&input[..space]) else {
-        return Err(Warning::TBD);
-    };
-    let Ok(length): Result<usize, _> = length.parse() else {
-        return Err(Warning::TBD);
-    };
-
-    let input = &input[space + 1..];
-    if input.len() < length {
-        return Err(Warning::TBD);
-    };
-
-    let (string, rest) = input.split_at(length);
-    Ok((string.into(), rest))
-}
-
-/// [Level of measurement](https://en.wikipedia.org/wiki/Level_of_measurement).
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum Measure {
-    /// Nominal values can only be compared for equality.
-    Nominal,
-
-    /// Ordinal values can be meaningfully ordered.
-    Ordinal,
-
-    /// Scale values can be meaningfully compared for the degree of difference.
-    Scale,
-}
-
-impl Measure {
-    pub fn default_for_type(var_type: VarType) -> Option<Measure> {
-        match var_type {
-            VarType::Numeric => None,
-            VarType::String => Some(Self::Nominal),
-        }
-    }
-
-    fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
-        match source {
-            0 => Ok(None),
-            1 => Ok(Some(Measure::Nominal)),
-            2 => Ok(Some(Measure::Ordinal)),
-            3 => Ok(Some(Measure::Scale)),
-            _ => Err(Warning::InvalidMeasurement(source)),
-        }
-    }
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum Alignment {
-    Left,
-    Right,
-    Center,
-}
-
-impl Alignment {
-    fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
-        match source {
-            0 => Ok(None),
-            1 => Ok(Some(Alignment::Left)),
-            2 => Ok(Some(Alignment::Right)),
-            3 => Ok(Some(Alignment::Center)),
-            _ => Err(Warning::InvalidAlignment(source)),
-        }
-    }
-
-    pub fn default_for_type(var_type: VarType) -> Self {
-        match var_type {
-            VarType::Numeric => Self::Right,
-            VarType::String => Self::Left,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VarDisplay {
-    pub measure: Option<Measure>,
-    pub width: Option<u32>,
-    pub alignment: Option<Alignment>,
-}
-
-#[derive(Clone, Debug)]
-pub struct VarDisplayRecord(pub Vec<VarDisplay>);
-
-impl VarDisplayRecord {
-    const SUBTYPE: u32 = 11;
-
-    fn parse(
-        ext: &Extension,
-        n_vars: usize,
-        endian: Endian,
-        warn: &dyn Fn(Warning),
-    ) -> Result<Record, Warning> {
-        if ext.size != 4 {
-            return Err(Warning::BadRecordSize {
-                offset: ext.offsets.start,
-                record: String::from("variable display record"),
-                size: ext.size,
-                expected_size: 4,
-            });
-        }
-
-        let has_width = if ext.count as usize == 3 * n_vars {
-            true
-        } else if ext.count as usize == 2 * n_vars {
-            false
-        } else {
-            return Err(Warning::TBD);
-        };
-
-        let mut var_displays = Vec::new();
-        let mut input = &ext.data[..];
-        for _ in 0..n_vars {
-            let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
-                .issue_warning(&warn)
-                .flatten();
-            let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
-            let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
-                .issue_warning(&warn)
-                .flatten();
-            var_displays.push(VarDisplay {
-                measure,
-                width,
-                alignment,
-            });
-        }
-        Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringMissingValues<N>
-where
-    N: Debug,
-{
-    /// Variable name.
-    pub var_name: N,
-
-    /// Missing values.
-    pub missing_values: Vec<RawStrArray<8>>,
-}
-
-impl LongStringMissingValues<RawString> {
-    fn decode(&self, decoder: &Decoder) -> Result<LongStringMissingValues<Identifier>, IdError> {
-        Ok(LongStringMissingValues {
-            var_name: decoder.decode_identifier(&self.var_name)?,
-            missing_values: self.missing_values.clone(),
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringMissingValueRecord<N>(pub Vec<LongStringMissingValues<N>>)
-where
-    N: Debug;
-
-impl ExtensionRecord for LongStringMissingValueRecord<RawString> {
-    const SUBTYPE: u32 = 22;
-    const SIZE: Option<u32> = Some(1);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "long string missing values record";
-
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let mut missing_value_set = Vec::new();
-        while !input.is_empty() {
-            let var_name = read_string(&mut input, endian)?;
-            let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
-            let value_len: u32 = endian.parse(read_bytes(&mut input)?);
-            if value_len != 8 {
-                let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
-                return Err(Warning::BadLongMissingValueLength {
-                    record_offset: ext.offsets.start,
-                    offset,
-                    value_len,
-                });
-            }
-            let mut missing_values = Vec::new();
-            for i in 0..n_missing_values {
-                let value: [u8; 8] = read_bytes(&mut input)?;
-                let numeric_value: u64 = endian.parse(value);
-                let value = if i > 0 && numeric_value == 8 {
-                    // Tolerate files written by old, buggy versions of PSPP
-                    // where we believed that the value_length was repeated
-                    // before each missing value.
-                    read_bytes(&mut input)?
-                } else {
-                    value
-                };
-                missing_values.push(RawStrArray(value));
-            }
-            missing_value_set.push(LongStringMissingValues {
-                var_name,
-                missing_values,
-            });
-        }
-        Ok(Record::LongStringMissingValues(
-            LongStringMissingValueRecord(missing_value_set),
-        ))
-    }
-}
-
-impl LongStringMissingValueRecord<RawString> {
-    pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier> {
-        let mut mvs = Vec::with_capacity(self.0.len());
-        for mv in self.0.iter() {
-            if let Some(mv) = mv
-                .decode(decoder)
-                .map_err(Warning::InvalidLongStringMissingValueVariableName)
-                .issue_warning(&decoder.warn)
-            {
-                mvs.push(mv);
-            }
-        }
-        LongStringMissingValueRecord(mvs)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct EncodingRecord(pub String);
-
-impl ExtensionRecord for EncodingRecord {
-    const SUBTYPE: u32 = 20;
-    const SIZE: Option<u32> = Some(1);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "encoding record";
-
-    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        Ok(Record::Encoding(EncodingRecord(
-            String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
-                offset: ext.offsets.start,
-            })?,
-        )))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct NumberOfCasesRecord {
-    /// Always observed as 1.
-    pub one: u64,
-
-    /// Number of cases.
-    pub n_cases: u64,
-}
-
-impl ExtensionRecord for NumberOfCasesRecord {
-    const SUBTYPE: u32 = 16;
-    const SIZE: Option<u32> = Some(8);
-    const COUNT: Option<u32> = Some(2);
-    const NAME: &'static str = "extended number of cases record";
-
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let one = endian.parse(read_bytes(&mut input)?);
-        let n_cases = endian.parse(read_bytes(&mut input)?);
-
-        Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct TextRecord {
-    pub offsets: Range<u64>,
-
-    /// Type of record.
-    pub rec_type: TextRecordType,
-
-    /// The text content of the record.
-    pub text: RawString,
-}
-
-#[derive(Clone, Copy, Debug)]
-pub enum TextRecordType {
-    VariableSets,
-    ProductInfo,
-    LongNames,
-    VeryLongStrings,
-    FileAttributes,
-    VariableAttributes,
-}
-
-impl TextRecord {
-    fn new(extension: Extension, rec_type: TextRecordType) -> Self {
-        Self {
-            offsets: extension.offsets,
-            rec_type,
-            text: extension.data.into(),
-        }
-    }
-    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
-        match self.rec_type {
-            TextRecordType::VariableSets => {
-                DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder))
-            }
-            TextRecordType::ProductInfo => {
-                DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder))
-            }
-            TextRecordType::LongNames => {
-                DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder))
-            }
-            TextRecordType::VeryLongStrings => {
-                DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder))
-            }
-            TextRecordType::FileAttributes => {
-                DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder))
-            }
-            TextRecordType::VariableAttributes => {
-                DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder))
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VeryLongString {
-    pub short_name: Identifier,
-    pub length: u16,
-}
-
-impl VeryLongString {
-    fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
-        let Some((short_name, length)) = input.split_once('=') else {
-            return Err(Warning::TBD);
-        };
-        let short_name = decoder
-            .new_identifier(short_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidLongStringName)?;
-        let length = length.parse().map_err(|_| Warning::TBD)?;
-        Ok(VeryLongString { short_name, length })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VeryLongStringsRecord(pub Vec<VeryLongString>);
-
-impl VeryLongStringsRecord {
-    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
-        let input = decoder.decode(&source.text);
-        let mut very_long_strings = Vec::new();
-        for tuple in input
-            .split('\0')
-            .map(|s| s.trim_end_matches('\t'))
-            .filter(|s| !s.is_empty())
-        {
-            if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
-                very_long_strings.push(vls)
-            }
-        }
-        VeryLongStringsRecord(very_long_strings)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Attribute {
-    pub name: Identifier,
-    pub values: Vec<String>,
-}
-
-impl Attribute {
-    fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
-        let Some((name, mut input)) = input.split_once('(') else {
-            return Err(Warning::TBD);
-        };
-        let name = decoder
-            .new_identifier(name)
-            .map_err(Warning::InvalidAttributeName)?;
-        let mut values = Vec::new();
-        loop {
-            let Some((value, rest)) = input.split_once('\n') else {
-                return Err(Warning::TBD);
-            };
-            if let Some(stripped) = value
-                .strip_prefix('\'')
-                .and_then(|value| value.strip_suffix('\''))
-            {
-                values.push(stripped.into());
-            } else {
-                decoder.warn(Warning::TBD);
-                values.push(value.into());
-            }
-            if let Some(rest) = rest.strip_prefix(')') {
-                let attribute = Attribute { name, values };
-                return Ok((attribute, rest));
-            };
-            input = rest;
-        }
-    }
-}
-
-impl Attributes {
-    fn parse<'a>(
-        decoder: &Decoder,
-        mut input: &'a str,
-        sentinel: Option<char>,
-    ) -> Result<(Attributes, &'a str), Warning> {
-        let mut attributes = HashMap::new();
-        let rest = loop {
-            match input.chars().next() {
-                None => break input,
-                c if c == sentinel => break &input[1..],
-                _ => {
-                    let (attribute, rest) = Attribute::parse(decoder, input)?;
-                    // XXX report duplicate name
-                    attributes.insert(attribute.name, attribute.values);
-                    input = rest;
-                }
-            }
-        };
-        Ok((Attributes(attributes), rest))
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct FileAttributeRecord(pub Attributes);
-
-impl FileAttributeRecord {
-    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
-        let input = decoder.decode(&source.text);
-        match Attributes::parse(decoder, &input, None).issue_warning(&decoder.warn) {
-            Some((set, rest)) => {
-                if !rest.is_empty() {
-                    decoder.warn(Warning::TBD);
-                }
-                FileAttributeRecord(set)
-            }
-            None => FileAttributeRecord::default(),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VarAttributes {
-    pub long_var_name: Identifier,
-    pub attributes: Attributes,
-}
-
-impl VarAttributes {
-    fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributes, &'a str), Warning> {
-        let Some((long_var_name, rest)) = input.split_once(':') else {
-            return Err(Warning::TBD);
-        };
-        let long_var_name = decoder
-            .new_identifier(long_var_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidAttributeVariableName)?;
-        let (attributes, rest) = Attributes::parse(decoder, rest, Some('/'))?;
-        let var_attribute = VarAttributes {
-            long_var_name,
-            attributes,
-        };
-        Ok((var_attribute, rest))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VariableAttributeRecord(pub Vec<VarAttributes>);
-
-impl VariableAttributeRecord {
-    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
-        let decoded = decoder.decode(&source.text);
-        let mut input = decoded.as_ref();
-        let mut var_attribute_sets = Vec::new();
-        while !input.is_empty() {
-            let Some((var_attribute, rest)) =
-                VarAttributes::parse(decoder, input).issue_warning(&decoder.warn)
-            else {
-                break;
-            };
-            var_attribute_sets.push(var_attribute);
-            input = rest;
-        }
-        VariableAttributeRecord(var_attribute_sets)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongName {
-    pub short_name: Identifier,
-    pub long_name: Identifier,
-}
-
-impl LongName {
-    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
-        let Some((short_name, long_name)) = input.split_once('=') else {
-            return Err(Warning::TBD);
-        };
-        let short_name = decoder
-            .new_identifier(short_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidShortName)?;
-        let long_name = decoder
-            .new_identifier(long_name)
-            .and_then(Identifier::must_be_ordinary)
-            .map_err(Warning::InvalidLongName)?;
-        Ok(LongName {
-            short_name,
-            long_name,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongNamesRecord(pub Vec<LongName>);
-
-impl LongNamesRecord {
-    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
-        let input = decoder.decode(&source.text);
-        let mut names = Vec::new();
-        for pair in input.split('\t').filter(|s| !s.is_empty()) {
-            if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) {
-                names.push(long_name);
-            }
-        }
-        LongNamesRecord(names)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ProductInfoRecord(pub String);
-
-impl ProductInfoRecord {
-    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
-        Self(decoder.decode(&source.text).into())
-    }
-}
-#[derive(Clone, Debug)]
-pub struct VariableSet {
-    pub name: Identifier,
-    pub variable_names: Vec<Identifier>,
-}
-
-impl VariableSet {
-    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
-        let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
-        let name = decoder.new_identifier(name).map_err(|_| Warning::TBD)?;
-        let mut vars = Vec::new();
-        for var in input.split_ascii_whitespace() {
-            if let Some(identifier) = decoder
-                .new_identifier(var)
-                .and_then(Identifier::must_be_ordinary)
-                .map_err(Warning::InvalidVariableSetName)
-                .issue_warning(&decoder.warn)
-            {
-                vars.push(identifier);
-            }
-        }
-        Ok(VariableSet {
-            name,
-            variable_names: vars,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VariableSetRecord {
-    pub offsets: Range<u64>,
-    pub sets: Vec<VariableSet>,
-}
-
-impl VariableSetRecord {
-    fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
-        let mut sets = Vec::new();
-        let input = decoder.decode(&source.text);
-        for line in input.lines() {
-            if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) {
-                sets.push(set)
-            }
-        }
-        VariableSetRecord {
-            offsets: source.offsets.clone(),
-            sets,
-        }
-    }
-}
-
-trait IssueWarning<T> {
-    fn issue_warning<F>(self, warn: &F) -> Option<T>
-    where
-        F: Fn(Warning);
-}
-impl<T> IssueWarning<T> for Result<T, Warning> {
-    fn issue_warning<F>(self, warn: &F) -> Option<T>
-    where
-        F: Fn(Warning),
-    {
-        match self {
-            Ok(result) => Some(result),
-            Err(error) => {
-                warn(error);
-                None
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Extension {
-    pub offsets: Range<u64>,
-
-    /// Record subtype.
-    pub subtype: u32,
-
-    /// Size of each data element.
-    pub size: u32,
-
-    /// Number of data elements.
-    pub count: u32,
-
-    /// `size * count` bytes of data.
-    pub data: Vec<u8>,
-}
-
-impl Extension {
-    fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
-        if let Some(expected_size) = E::SIZE {
-            if self.size != expected_size {
-                return Err(Warning::BadRecordSize {
-                    offset: self.offsets.start,
-                    record: E::NAME.into(),
-                    size: self.size,
-                    expected_size,
-                });
-            }
-        }
-        if let Some(expected_count) = E::COUNT {
-            if self.count != expected_count {
-                return Err(Warning::BadRecordCount {
-                    offset: self.offsets.start,
-                    record: E::NAME.into(),
-                    count: self.count,
-                    expected_count,
-                });
-            }
-        }
-        Ok(())
-    }
-
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        n_vars: usize,
-        warn: &dyn Fn(Warning),
-    ) -> Result<Option<Record>, Error> {
-        let subtype = endian.parse(read_bytes(r)?);
-        let header_offset = r.stream_position()?;
-        let size: u32 = endian.parse(read_bytes(r)?);
-        let count = endian.parse(read_bytes(r)?);
-        let Some(product) = size.checked_mul(count) else {
-            return Err(Error::ExtensionRecordTooLarge {
-                offset: header_offset,
-                subtype,
-                size,
-                count,
-            });
-        };
-        let start_offset = r.stream_position()?;
-        let data = read_vec(r, product as usize)?;
-        let end_offset = start_offset + product as u64;
-        let extension = Extension {
-            offsets: start_offset..end_offset,
-            subtype,
-            size,
-            count,
-            data,
-        };
-        let result = match subtype {
-            IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
-            FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
-            VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
-            MultipleResponseRecord::SUBTYPE | 19 => {
-                MultipleResponseRecord::parse(&extension, endian)
-            }
-            LongStringValueLabelRecord::SUBTYPE => {
-                LongStringValueLabelRecord::parse(&extension, endian)
-            }
-            EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
-            NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
-            5 => Ok(Record::Text(TextRecord::new(
-                extension,
-                TextRecordType::VariableSets,
-            ))),
-            10 => Ok(Record::Text(TextRecord::new(
-                extension,
-                TextRecordType::ProductInfo,
-            ))),
-            13 => Ok(Record::Text(TextRecord::new(
-                extension,
-                TextRecordType::LongNames,
-            ))),
-            14 => Ok(Record::Text(TextRecord::new(
-                extension,
-                TextRecordType::VeryLongStrings,
-            ))),
-            17 => Ok(Record::Text(TextRecord::new(
-                extension,
-                TextRecordType::FileAttributes,
-            ))),
-            18 => Ok(Record::Text(TextRecord::new(
-                extension,
-                TextRecordType::VariableAttributes,
-            ))),
-            _ => Ok(Record::OtherExtension(extension)),
-        };
-        match result {
-            Ok(result) => Ok(Some(result)),
-            Err(error) => {
-                warn(error);
-                Ok(None)
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ZHeader {
-    /// File offset to the start of the record.
-    pub offset: u64,
-
-    /// File offset to the ZLIB data header.
-    pub zheader_offset: u64,
-
-    /// File offset to the ZLIB trailer.
-    pub ztrailer_offset: u64,
-
-    /// Length of the ZLIB trailer in bytes.
-    pub ztrailer_len: u64,
-}
-
-impl ZHeader {
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
-        let offset = r.stream_position()?;
-        let zheader_offset: u64 = endian.parse(read_bytes(r)?);
-        let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
-        let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
-
-        Ok(ZHeader {
-            offset,
-            zheader_offset,
-            ztrailer_offset,
-            ztrailer_len,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ZTrailer {
-    /// File offset to the start of the record.
-    pub offset: u64,
-
-    /// Compression bias as a negative integer, e.g. -100.
-    pub int_bias: i64,
-
-    /// Always observed as zero.
-    pub zero: u64,
-
-    /// Uncompressed size of each block, except possibly the last.  Only
-    /// `0x3ff000` has been observed so far.
-    pub block_size: u32,
-
-    /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
-    pub blocks: Vec<ZBlock>,
-}
-
-#[derive(Clone, Debug)]
-pub struct ZBlock {
-    /// Offset of block of data if simple compression were used.
-    pub uncompressed_ofs: u64,
-
-    /// Actual offset within the file of the compressed data block.
-    pub compressed_ofs: u64,
-
-    /// The number of bytes in this data block after decompression.  This is
-    /// `block_size` in every data block but the last, which may be smaller.
-    pub uncompressed_size: u32,
-
-    /// The number of bytes in this data block, as stored compressed in this
-    /// file.
-    pub compressed_size: u32,
-}
-
-impl ZBlock {
-    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
-        Ok(ZBlock {
-            uncompressed_ofs: endian.parse(read_bytes(r)?),
-            compressed_ofs: endian.parse(read_bytes(r)?),
-            uncompressed_size: endian.parse(read_bytes(r)?),
-            compressed_size: endian.parse(read_bytes(r)?),
-        })
-    }
-}
-
-impl ZTrailer {
-    fn read<R: Read + Seek>(
-        reader: &mut R,
-        endian: Endian,
-        ztrailer_ofs: u64,
-        ztrailer_len: u64,
-    ) -> Result<Option<ZTrailer>, Error> {
-        let start_offset = reader.stream_position()?;
-        if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
-            return Ok(None);
-        }
-        let int_bias = endian.parse(read_bytes(reader)?);
-        let zero = endian.parse(read_bytes(reader)?);
-        let block_size = endian.parse(read_bytes(reader)?);
-        let n_blocks: u32 = endian.parse(read_bytes(reader)?);
-        let expected_n_blocks = (ztrailer_len - 24) / 24;
-        if n_blocks as u64 != expected_n_blocks {
-            return Err(Error::BadZlibTrailerNBlocks {
-                offset: ztrailer_ofs,
-                n_blocks,
-                expected_n_blocks,
-                ztrailer_len,
-            });
-        }
-        let blocks = (0..n_blocks)
-            .map(|_| ZBlock::read(reader, endian))
-            .collect::<Result<Vec<_>, _>>()?;
-        reader.seek(SeekFrom::Start(start_offset))?;
-        Ok(Some(ZTrailer {
-            offset: ztrailer_ofs,
-            int_bias,
-            zero,
-            block_size,
-            blocks,
-        }))
-    }
-}
-
-fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
-    let mut buf = [0; N];
-    let n = r.read(&mut buf)?;
-    if n > 0 {
-        if n < N {
-            r.read_exact(&mut buf[n..])?;
-        }
-        Ok(Some(buf))
-    } else {
-        Ok(None)
-    }
-}
-
-fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
-    let mut buf = [0; N];
-    r.read_exact(&mut buf)?;
-    Ok(buf)
-}
-
-fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
-    let mut vec = vec![0; n];
-    r.read_exact(&mut vec)?;
-    Ok(vec)
-}
-
-fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
-    let length: u32 = endian.parse(read_bytes(r)?);
-    Ok(read_vec(r, length as usize)?.into())
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabels<N, S>
-where
-    S: Debug,
-{
-    pub var_name: N,
-    pub width: u32,
-
-    /// `(value, label)` pairs, where each value is `width` bytes.
-    pub labels: Vec<(RawString, S)>,
-}
-
-impl LongStringValueLabels<RawString, RawString> {
-    fn decode(
-        &self,
-        decoder: &Decoder,
-    ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
-        let var_name = decoder.decode(&self.var_name);
-        let var_name = Identifier::from_encoding(var_name.trim_end(), decoder.encoding)
-            .map_err(Warning::InvalidLongStringValueLabelName)?;
-
-        let mut labels = Vec::with_capacity(self.labels.len());
-        for (value, label) in self.labels.iter() {
-            let label = decoder.decode(label).to_string();
-            labels.push((value.clone(), label));
-        }
-
-        Ok(LongStringValueLabels {
-            var_name,
-            width: self.width,
-            labels,
-        })
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
-where
-    N: Debug,
-    S: Debug;
-
-impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
-    const SUBTYPE: u32 = 21;
-    const SIZE: Option<u32> = Some(1);
-    const COUNT: Option<u32> = None;
-    const NAME: &'static str = "long string value labels record";
-
-    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
-        ext.check_size::<Self>()?;
-
-        let mut input = &ext.data[..];
-        let mut label_set = Vec::new();
-        while !input.is_empty() {
-            let var_name = read_string(&mut input, endian)?;
-            let width: u32 = endian.parse(read_bytes(&mut input)?);
-            let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
-            let mut labels = Vec::new();
-            for _ in 0..n_labels {
-                let value = read_string(&mut input, endian)?;
-                let label = read_string(&mut input, endian)?;
-                labels.push((value, label));
-            }
-            label_set.push(LongStringValueLabels {
-                var_name,
-                width,
-                labels,
-            })
-        }
-        Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
-            label_set,
-        )))
-    }
-}
-
-impl LongStringValueLabelRecord<RawString, RawString> {
-    fn decode(self, decoder: &Decoder) -> LongStringValueLabelRecord<Identifier, String> {
-        let mut labels = Vec::with_capacity(self.0.len());
-        for label in &self.0 {
-            match label.decode(decoder) {
-                Ok(set) => labels.push(set),
-                Err(error) => decoder.warn(error),
-            }
-        }
-        LongStringValueLabelRecord(labels)
-    }
-}
-
-#[derive(Default)]
-pub struct VarTypes {
-    pub types: Vec<Option<VarType>>,
-}
-
-impl VarTypes {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    pub fn push(&mut self, width: RawWidth) {
-        if let Ok(var_type) = VarType::try_from(width) {
-            self.types.push(Some(var_type));
-            for _ in 1..width.n_values().unwrap() {
-                self.types.push(None);
-            }
-        }
-    }
-
-    pub fn n_values(&self) -> usize {
-        self.types.len()
-    }
-
-    pub fn is_valid_index(&self, index: usize) -> bool {
-        self.var_type_at(index).is_some()
-    }
-
-    pub fn var_type_at(&self, index: usize) -> Option<VarType> {
-        if index >= 1 && index <= self.types.len() {
-            self.types[index - 1]
-        } else {
-            None
-        }
-    }
-
-    pub fn iter(&self) -> impl Iterator<Item = VarType> + use<'_> {
-        self.types
-            .iter()
-            .map(|var_type| var_type.unwrap_or(VarType::String))
-    }
-}
diff --git a/rust/pspp/src/sack.rs b/rust/pspp/src/sack.rs
deleted file mode 100644 (file)
index 8eec1eb..0000000
+++ /dev/null
@@ -1,633 +0,0 @@
-use float_next_after::NextAfter;
-use num::{Bounded, Zero};
-use ordered_float::OrderedFloat;
-use std::{
-    collections::{hash_map::Entry, HashMap},
-    error::Error as StdError,
-    fmt::{Display, Formatter, Result as FmtResult},
-    iter::repeat_n,
-};
-
-use crate::endian::{Endian, ToBytes};
-
-pub type Result<T, F = Error> = std::result::Result<T, F>;
-
-#[derive(Debug)]
-pub struct Error {
-    pub file_name: Option<String>,
-    pub line_number: Option<usize>,
-    pub token: Option<String>,
-    pub message: String,
-}
-
-impl Error {
-    fn new(
-        file_name: Option<&str>,
-        line_number: Option<usize>,
-        token: Option<&str>,
-        message: String,
-    ) -> Error {
-        Error {
-            file_name: file_name.map(String::from),
-            line_number,
-            token: token.map(String::from),
-            message,
-        }
-    }
-}
-
-impl StdError for Error {}
-
-impl Display for Error {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        match (self.file_name.as_ref(), self.line_number) {
-            (Some(ref file_name), Some(line_number)) => write!(f, "{file_name}:{line_number}: ")?,
-            (Some(ref file_name), None) => write!(f, "{file_name}: ")?,
-            (None, Some(line_number)) => write!(f, "line {line_number}: ")?,
-            (None, None) => (),
-        }
-        if let Some(ref token) = self.token {
-            write!(f, "at '{token}': ")?;
-        }
-        write!(f, "{}", self.message)
-    }
-}
-
-pub fn sack(input: &str, input_file_name: Option<&str>, endian: Endian) -> Result<Vec<u8>> {
-    let mut symbol_table = HashMap::new();
-    let output = _sack(input, input_file_name, endian, &mut symbol_table)?;
-    let output = if !symbol_table.is_empty() {
-        for (k, v) in symbol_table.iter() {
-            println!("{k} => {v:?}");
-        }
-        for (k, v) in symbol_table.iter() {
-            if v.is_none() {
-                Err(Error::new(
-                    input_file_name,
-                    None,
-                    None,
-                    format!("label {k} used but never defined"),
-                ))?
-            }
-        }
-        _sack(input, input_file_name, endian, &mut symbol_table)?
-    } else {
-        output
-    };
-    Ok(output)
-}
-
-fn _sack(
-    input: &str,
-    input_file_name: Option<&str>,
-    endian: Endian,
-    symbol_table: &mut HashMap<String, Option<u32>>,
-) -> Result<Vec<u8>> {
-    let mut lexer = Lexer::new(input, input_file_name, endian)?;
-    let mut output = Vec::new();
-    while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
-    Ok(output)
-}
-
-fn parse_data_item(
-    lexer: &mut Lexer,
-    output: &mut Vec<u8>,
-    symbol_table: &mut HashMap<String, Option<u32>>,
-) -> Result<bool> {
-    if lexer.token.is_none() {
-        return Ok(false);
-    };
-
-    let initial_len = output.len();
-    match lexer.take()? {
-        Token::Integer(integer) => {
-            if let Ok(integer) = TryInto::<i32>::try_into(integer) {
-                output.extend_from_slice(&lexer.endian.to_bytes(integer));
-            } else if let Ok(integer) = TryInto::<u32>::try_into(integer) {
-                output.extend_from_slice(&lexer.endian.to_bytes(integer));
-            } else {
-                Err(lexer.error(format!(
-                    "{integer} is not in the valid range [{},{}]",
-                    i32::MIN,
-                    u32::MAX
-                )))?;
-            };
-        }
-        Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
-        Token::PcSysmis => {
-            output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
-        }
-        Token::I8 => put_integers::<u8, 1>(lexer, "i8", output)?,
-        Token::I16 => put_integers::<u16, 2>(lexer, "i16", output)?,
-        Token::I64 => put_integers::<i64, 8>(lexer, "i64", output)?,
-        Token::String(string) => output.extend_from_slice(string.as_bytes()),
-        Token::S(size) => {
-            let Some((Token::String(ref string), _)) = lexer.token else {
-                Err(lexer.error(format!("string expected after 's{size}'")))?
-            };
-            let len = string.len();
-            if len > size {
-                Err(lexer.error(format!(
-                    "{len}-byte string is longer than pad length {size}"
-                )))?
-            }
-            output.extend_from_slice(string.as_bytes());
-            output.extend(repeat_n(b' ', size - len));
-            lexer.get()?;
-        }
-        Token::LParen => {
-            while !matches!(lexer.token, Some((Token::RParen, _))) {
-                parse_data_item(lexer, output, symbol_table)?;
-            }
-            lexer.get()?;
-        }
-        Token::Count => put_counted_items::<u32, 4>(lexer, "COUNT", output, symbol_table)?,
-        Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
-        Token::Hex => {
-            let Some((Token::String(ref string), _)) = lexer.token else {
-                Err(lexer.error(String::from("string expected after 'hex'")))?
-            };
-            let mut string = &string[..];
-            loop {
-                string = string.trim_start();
-                if string.is_empty() {
-                    break;
-                };
-
-                let mut i = string.chars();
-                let Some(c0) = i.next() else { return Ok(true) };
-                let Some(c1) = i.next() else {
-                    Err(lexer.error(String::from("hex string has odd number of characters")))?
-                };
-
-                let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
-                    Err(lexer.error(String::from("invalid digit in hex string")))?
-                };
-                let byte = digit0 * 16 + digit1;
-                output.push(byte as u8);
-
-                string = i.as_str();
-            }
-            lexer.get()?;
-        }
-        Token::Label(name) => {
-            println!("define {name}");
-            let value = output.len() as u32;
-            match symbol_table.entry(name.clone()) {
-                Entry::Vacant(v) => {
-                    v.insert(Some(value));
-                }
-                Entry::Occupied(mut o) => {
-                    match o.get() {
-                        Some(v) => {
-                            if *v != value {
-                                Err(lexer.error(format!("{name}: can't redefine label for offset {:#x} with offset {:#x}", *v, value)))?
-                            }
-                        }
-                        None => drop(o.insert(Some(value))),
-                    }
-                }
-            };
-            return Ok(true);
-        }
-        Token::At(name) => {
-            let mut value = *symbol_table.entry(name.clone()).or_insert(None);
-            loop {
-                let plus = match lexer.token {
-                    Some((Token::Plus, _)) => true,
-                    Some((Token::Minus, _)) => false,
-                    _ => break,
-                };
-                lexer.get()?;
-
-                let operand = match lexer.token {
-                    Some((Token::At(ref name), _)) => {
-                        *symbol_table.entry(name.clone()).or_insert(None)
-                    }
-                    Some((Token::Integer(integer), _)) => Some(
-                        integer
-                            .try_into()
-                            .map_err(|msg| lexer.error(format!("bad offset literal ({msg})")))?,
-                    ),
-                    _ => Err(lexer.error(String::from("expecting @label or integer literal")))?,
-                };
-                lexer.get()?;
-
-                value = match (value, operand) {
-                    (Some(a), Some(b)) => Some(
-                        if plus {
-                            a.checked_add(b)
-                        } else {
-                            a.checked_sub(b)
-                        }
-                        .ok_or_else(|| {
-                            lexer.error(String::from("overflow in offset arithmetic"))
-                        })?,
-                    ),
-                    _ => None,
-                };
-            }
-            let value = value.unwrap_or(0);
-            output.extend_from_slice(&lexer.endian.to_bytes(value));
-        }
-        _ => (),
-    };
-    if let Some((Token::Asterisk, _)) = lexer.token {
-        lexer.get()?;
-        let Token::Integer(count) = lexer.take()? else {
-            Err(lexer.error(String::from("positive integer expected after '*'")))?
-        };
-        if count < 1 {
-            Err(lexer.error(String::from("positive integer expected after '*'")))?
-        };
-        let final_len = output.len();
-        for _ in 1..count {
-            output.extend_from_within(initial_len..final_len);
-        }
-    }
-    match lexer.token {
-        Some((Token::Semicolon, _)) => {
-            lexer.get()?;
-        }
-        Some((Token::RParen, _)) => (),
-        _ => Err(lexer.error(String::from("';' expected")))?,
-    }
-    Ok(true)
-}
-
-fn put_counted_items<T, const N: usize>(
-    lexer: &mut Lexer,
-    name: &str,
-    output: &mut Vec<u8>,
-    symbol_table: &mut HashMap<String, Option<u32>>,
-) -> Result<()>
-where
-    T: Zero + TryFrom<usize>,
-    Endian: ToBytes<T, N>,
-{
-    let old_size = output.len();
-    output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
-    let start = output.len();
-    if !matches!(lexer.token, Some((Token::LParen, _))) {
-        Err(lexer.error(format!("'(' expected after '{name}'")))?
-    }
-    lexer.get()?;
-    while !matches!(lexer.token, Some((Token::RParen, _))) {
-        parse_data_item(lexer, output, symbol_table)?;
-    }
-    lexer.get()?;
-    let delta = output.len() - start;
-    let Ok(delta): Result<T, _> = delta.try_into() else {
-        Err(lexer.error(format!("{delta} bytes is too much for '{name}'")))?
-    };
-    let dest = &mut output[old_size..old_size + N];
-    dest.copy_from_slice(&lexer.endian.to_bytes(delta));
-    Ok(())
-}
-
-fn put_integers<T, const N: usize>(
-    lexer: &mut Lexer,
-    name: &str,
-    output: &mut Vec<u8>,
-) -> Result<()>
-where
-    T: Bounded + Display + TryFrom<i64> + Copy,
-    Endian: ToBytes<T, N>,
-{
-    println!("put_integers {:?}", lexer.token);
-    let mut n = 0;
-    while let Some(integer) = lexer.take_if(|t| match t {
-        Token::Integer(integer) => Some(*integer),
-        _ => None,
-    })? {
-        println!("got integer {integer}");
-        let Ok(integer) = integer.try_into() else {
-            Err(lexer.error(format!(
-                "{integer} is not in the valid range [{},{}]",
-                T::min_value(),
-                T::max_value()
-            )))?
-        };
-        output.extend_from_slice(&lexer.endian.to_bytes(integer));
-        n += 1;
-    }
-    println!("put_integers {:?} {n}", lexer.token);
-    if n == 0 {
-        Err(lexer.error(format!("integer expected after '{name}'")))?
-    }
-    Ok(())
-}
-
-#[derive(PartialEq, Eq, Clone, Debug)]
-enum Token {
-    Integer(i64),
-    Float(OrderedFloat<f64>),
-    PcSysmis,
-    String(String),
-    Semicolon,
-    Asterisk,
-    LParen,
-    RParen,
-    I8,
-    I16,
-    I64,
-    S(usize),
-    Count,
-    Count8,
-    Hex,
-    Label(String),
-    At(String),
-    Minus,
-    Plus,
-}
-
-struct Lexer<'a> {
-    input: &'a str,
-    token: Option<(Token, &'a str)>,
-    input_file_name: Option<&'a str>,
-    line_number: usize,
-    endian: Endian,
-}
-
-fn skip_comments(mut s: &str) -> (&str, usize) {
-    let mut n_newlines = 0;
-    let s = loop {
-        s = s.trim_start_matches([' ', '\t', '\r', '<', '>']);
-        if let Some(remainder) = s.strip_prefix('#') {
-            let Some((_, remainder)) = remainder.split_once('\n') else {
-                break "";
-            };
-            s = remainder;
-            n_newlines += 1;
-        } else if let Some(remainder) = s.strip_prefix('\n') {
-            s = remainder;
-            n_newlines += 1;
-        } else {
-            break s;
-        }
-    };
-    (s, n_newlines)
-}
-
-impl<'a> Lexer<'a> {
-    fn new(input: &'a str, input_file_name: Option<&'a str>, endian: Endian) -> Result<Lexer<'a>> {
-        let mut lexer = Lexer {
-            input,
-            token: None,
-            input_file_name,
-            line_number: 1,
-            endian,
-        };
-        lexer.token = lexer.next()?;
-        Ok(lexer)
-    }
-    fn error(&self, message: String) -> Error {
-        let repr = self.token.as_ref().map(|(_, repr)| *repr);
-        Error::new(self.input_file_name, Some(self.line_number), repr, message)
-    }
-    fn take(&mut self) -> Result<Token> {
-        let Some(token) = self.token.take() else {
-            Err(self.error(String::from("unexpected end of input")))?
-        };
-        self.token = self.next()?;
-        Ok(token.0)
-    }
-    fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
-    where
-        F: FnOnce(&Token) -> Option<T>,
-    {
-        let Some(ref token) = self.token else {
-            return Ok(None);
-        };
-        match condition(&token.0) {
-            Some(value) => {
-                self.token = self.next()?;
-                Ok(Some(value))
-            }
-            None => Ok(None),
-        }
-    }
-    fn get(&mut self) -> Result<Option<&Token>> {
-        if self.token.is_none() {
-            Err(self.error(String::from("unexpected end of input")))?
-        } else {
-            self.token = self.next()?;
-            match self.token {
-                Some((ref token, _)) => Ok(Some(token)),
-                None => Ok(None),
-            }
-        }
-    }
-
-    fn next(&mut self) -> Result<Option<(Token, &'a str)>> {
-        // Get the first character of the token, skipping past white space and
-        // comments.
-        let (s, n_newlines) = skip_comments(self.input);
-        self.line_number += n_newlines;
-        self.input = s;
-
-        let start = s;
-        let mut iter = s.chars();
-        let Some(c) = iter.next() else {
-            return Ok(None);
-        };
-        let (token, rest) = match c {
-            c if c.is_ascii_digit() || c == '-' => {
-                let len = s
-                    .find(|c: char| {
-                        !(c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '-')
-                    })
-                    .unwrap_or(s.len());
-                let (number, rest) = s.split_at(len);
-                let token = if number == "-" {
-                    Token::Minus
-                } else if let Some(digits) = number.strip_prefix("0x") {
-                    Token::Integer(i64::from_str_radix(digits, 16).map_err(|msg| {
-                        self.error(format!("bad integer literal '{number}' ({msg})"))
-                    })?)
-                } else if !number.contains('.') {
-                    Token::Integer(number.parse().map_err(|msg| {
-                        self.error(format!("bad integer literal '{number}' ({msg})"))
-                    })?)
-                } else {
-                    Token::Float(number.parse().map_err(|msg| {
-                        self.error(format!("bad float literal '{number}' ({msg})"))
-                    })?)
-                };
-                (token, rest)
-            }
-            '"' => {
-                let s = iter.as_str();
-                let Some(len) = s.find(['\n', '"']) else {
-                    Err(self.error(String::from("end-of-file inside string")))?
-                };
-                let (string, rest) = s.split_at(len);
-                let Some(rest) = rest.strip_prefix('"') else {
-                    Err(self.error(format!("new-line inside string ({string}...{rest})")))?
-                };
-                (Token::String(string.into()), rest)
-            }
-            ';' => (Token::Semicolon, iter.as_str()),
-            '*' => (Token::Asterisk, iter.as_str()),
-            '+' => (Token::Plus, iter.as_str()),
-            '(' => (Token::LParen, iter.as_str()),
-            ')' => (Token::RParen, iter.as_str()),
-            c if c.is_alphabetic() || c == '@' || c == '_' => {
-                let len = s
-                    .find(|c: char| {
-                        !(c.is_ascii_digit()
-                            || c.is_alphabetic()
-                            || c == '@'
-                            || c == '.'
-                            || c == '_')
-                    })
-                    .unwrap_or(s.len());
-                let (s, rest) = s.split_at(len);
-                if let Some(rest) = rest.strip_prefix(':') {
-                    (Token::Label(s.into()), rest)
-                } else if let Some(name) = s.strip_prefix('@') {
-                    (Token::At(name.into()), rest)
-                } else if let Some(count) = s.strip_prefix('s') {
-                    let token =
-                        Token::S(count.parse().map_err(|msg| {
-                            self.error(format!("bad counted string '{s}' ({msg})"))
-                        })?);
-                    (token, rest)
-                } else {
-                    let token = match s {
-                        "i8" => Token::I8,
-                        "i16" => Token::I16,
-                        "i64" => Token::I64,
-                        "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
-                        "PCSYSMIS" => Token::PcSysmis,
-                        "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
-                        "HIGHEST" => Token::Float(f64::MAX.into()),
-                        "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
-                        "COUNT" => Token::Count,
-                        "COUNT8" => Token::Count8,
-                        "hex" => Token::Hex,
-                        _ => Err(self.error(format!("invalid token '{s}'")))?,
-                    };
-                    (token, rest)
-                }
-            }
-            _ => Err(self.error(format!("invalid input byte '{c}'")))?,
-        };
-        self.input = rest;
-        let repr = &start[..start.len() - rest.len()];
-        println!("{token:?} {repr}");
-        Ok(Some((token, repr)))
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use crate::endian::Endian;
-    use crate::sack::sack;
-    use anyhow::Result;
-    use hexplay::HexView;
-
-    #[test]
-    fn basic_sack() -> Result<()> {
-        let input = r#"
-"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
-2; # Layout code
-28; # Nominal case size
-0; # Not compressed
-0; # Not weighted
-1; # 1 case.
-100.0; # Bias.
-"01 Jan 11"; "20:53:52";
-"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
-i8 0 *3;
-"#;
-        let output = sack(input, None, Endian::Big)?;
-        HexView::new(&output).print()?;
-        Ok(())
-    }
-
-    #[test]
-    fn pcp_sack() -> Result<()> {
-        let input = r#"
-# File header.
-2; 0;
-@MAIN; @MAIN_END - @MAIN;
-@VARS; @VARS_END - @VARS;
-@LABELS; @LABELS_END - @LABELS;
-@DATA; @DATA_END - @DATA;
-(0; 0) * 11;
-i8 0 * 128;
-
-MAIN:
-    i16 1;         # Fixed.
-    s62 "PCSPSS PSPP synthetic test product";
-    PCSYSMIS;
-    0; 0; i16 1;   # Fixed.
-    i16 0;
-    i16 15;
-    1;
-    i16 0;         # Fixed.
-    1;
-    s8 "11/28/14";
-    s8 "15:11:00";
-    s64 "PSPP synthetic test file";
-MAIN_END:
-
-VARS:
-    0; 0; 0; 0x050800; s8 "$CASENUM"; PCSYSMIS;
-    0; 0; 0; 0x010800; s8 "$DATE"; PCSYSMIS;
-    0; 0; 0; 0x050802; s8 "$WEIGHT"; PCSYSMIS;
-
-    # Numeric variable, no label or missing values.
-    0; 0; 0; 0x050800; s8 "NUM1"; PCSYSMIS;
-
-    # Numeric variable, variable label.
-    0; 0; @NUM2_LABEL - @LABELS_OFS; 0x050800; s8 "NUM2"; PCSYSMIS;
-
-    # Numeric variable with missing value.
-    0; 0; 0; 0x050800; s8 "NUM3"; 1.0;
-
-    # Numeric variable, variable label and missing value.
-    0; 0; @NUM4_LABEL - @LABELS_OFS; 0x050800; s8 "NUM4"; 2.0;
-
-    # String variable, no label or missing values.
-    0; 0; 0; 0x010800; s8 "STR1"; PCSYSMIS;
-
-    # String variable, variable label.
-    0; 0; @STR2_LABEL - @LABELS_OFS; 0x010400; s8 "STR2"; PCSYSMIS;
-
-    # String variable with missing value.
-    0; 0; 0; 0x010500; s8 "STR3"; s8 "MISS";
-
-    # String variable, variable label and missing value.
-    0; 0; @STR4_LABEL - @LABELS_OFS; 0x010100; s8 "STR4"; s8 "OTHR";
-
-    # Long string variable
-    0; 0; 0; 0x010b00; s8 "STR5"; PCSYSMIS;
-    0 * 8;
-
-    # Long string variable with variable label
-    0; 0; @STR6_LABEL - @LABELS_OFS; 0x010b00; s8 "STR6"; PCSYSMIS;
-    0 * 8;
-VARS_END:
-
-LABELS:
-    3; i8 0 0 0; LABELS_OFS: i8 0;
-    NUM2_LABEL: COUNT8("Numeric variable 2's label");
-    NUM4_LABEL: COUNT8("Another numeric variable label");
-    STR2_LABEL: COUNT8("STR2's variable label");
-    STR4_LABEL: COUNT8("STR4's variable label");
-    STR6_LABEL: COUNT8("Another string variable's label");
-LABELS_END:
-
-DATA:
-    0.0; "11/28/14"; 1.0;
-    0.0; 1.0; 2.0; PCSYSMIS; s8 "abcdefgh"; s8 "ijkl"; s8 "mnopq"; s8 "r";
-    s16 "stuvwxyzAB"; s16 "CDEFGHIJKLM";
-DATA_END:
-"#;
-        let output = sack(input, None, Endian::Big)?;
-        HexView::new(&output).print()?;
-        Ok(())
-    }
-}
index 6aad3406058f20011b4f1077b3ab7767f69092ef..3bfb4f014186690dbb747b7c4c719420ae45903c 100644 (file)
@@ -128,7 +128,7 @@ impl Default for Settings {
 impl Settings {
     pub fn global() -> &'static Settings {
         static GLOBAL: OnceLock<Settings> = OnceLock::new();
-        GLOBAL.get_or_init( Settings::default)
+        GLOBAL.get_or_init(Settings::default)
     }
 }
 
diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs
new file mode 100644 (file)
index 0000000..43e4aa3
--- /dev/null
@@ -0,0 +1,904 @@
+use core::str;
+use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
+
+use crate::{
+    dictionary::{
+        Dictionary, InvalidRole, MultipleResponseSet, MultipleResponseType, Value, VarWidth,
+        Variable, VariableSet,
+    },
+    endian::Endian,
+    format::{Error as FormatError, Format, UncheckedFormat},
+    identifier::{ByIdentifier, Error as IdError, Identifier},
+    sys::encoding::Error as EncodingError,
+    sys::raw::{
+        self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
+        FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
+        LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues,
+        MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStrArray, RawWidth,
+        ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord,
+        VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
+    },
+};
+use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
+use encoding_rs::Encoding;
+use indexmap::set::MutableValues;
+use thiserror::Error as ThisError;
+
+pub use crate::sys::raw::{CategoryLabels, Compression};
+
+#[derive(ThisError, Debug)]
+pub enum Error {
+    #[error("Missing header record")]
+    MissingHeaderRecord,
+
+    // XXX this is an internal error
+    #[error("More than one file header record")]
+    DuplicateHeaderRecord,
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+
+    #[error("Using default encoding {0}.")]
+    UsingDefaultEncoding(String),
+
+    #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)]
+    InvalidVariableWidth { offsets: Range<u64>, width: i32 },
+
+    #[error("This file has corrupted metadata written by a buggy version of PSPP.  To ensure that other software can read it correctly, save a new copy of the file.")]
+    InvalidLongMissingValueFormat,
+
+    #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format.  Using 01 Jan 1970.")]
+    InvalidCreationDate { creation_date: String },
+
+    #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format.  Using midnight.")]
+    InvalidCreationTime { creation_time: String },
+
+    #[error("{id_error}  Renaming variable to {new_name}.")]
+    InvalidVariableName {
+        id_error: IdError,
+        new_name: Identifier,
+    },
+
+    #[error(
+        "Substituting {new_spec} for invalid print format on variable {variable}.  {format_error}"
+    )]
+    InvalidPrintFormat {
+        new_spec: Format,
+        variable: Identifier,
+        format_error: FormatError,
+    },
+
+    #[error(
+        "Substituting {new_spec} for invalid write format on variable {variable}.  {format_error}"
+    )]
+    InvalidWriteFormat {
+        new_spec: Format,
+        variable: Identifier,
+        format_error: FormatError,
+    },
+
+    #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
+    DuplicateVariableName {
+        duplicate_name: Identifier,
+        new_name: Identifier,
+    },
+
+    #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
+    InvalidDictIndex { dict_index: usize, max_index: usize },
+
+    #[error("Dictionary index {0} refers to a long string continuation.")]
+    DictIndexIsContinuation(usize),
+
+    #[error("At offset {offset:#x}, one or more variable indexes for value labels referred to long string continuation records: {indexes:?}")]
+    LongStringContinuationIndexes { offset: u64, indexes: Vec<u32> },
+
+    #[error(
+        "At offsets {:#x}...{:#x}, record types 3 and 4 may not add value labels to one or more long string variables: {variables:?}", .offsets.start, .offsets.end
+    )]
+    InvalidLongStringValueLabels {
+        offsets: Range<u64>,
+        variables: Vec<Identifier>,
+    },
+
+    #[error("Variables associated with value label are not all of identical type.  Variable {numeric_var} is numeric, but variable {string_var} is string.")]
+    ValueLabelsDifferentTypes {
+        numeric_var: Identifier,
+        string_var: Identifier,
+    },
+
+    #[error("Invalid multiple response set name.  {0}")]
+    InvalidMrSetName(IdError),
+
+    #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
+    UnknownMrSetVariable {
+        mr_set: Identifier,
+        short_name: Identifier,
+    },
+
+    #[error("Multiple response set {0} has no variables.")]
+    EmptyMrSet(Identifier),
+
+    #[error("Multiple response set {0} has only one variable.")]
+    OneVarMrSet(Identifier),
+
+    #[error("Multiple response set {0} contains both string and numeric variables.")]
+    MixedMrSet(Identifier),
+
+    #[error(
+        "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
+    )]
+    InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
+
+    #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
+    TooWideMDGroupCountedValue {
+        mr_set: Identifier,
+        value: String,
+        width: usize,
+        max_width: u16,
+    },
+
+    #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
+    InvalidLongValueLabelWidth {
+        name: Identifier,
+        width: u32,
+        min_width: u16,
+        max_width: u16,
+    },
+
+    #[error("Invalid attribute name.  {0}")]
+    InvalidAttributeName(IdError),
+
+    #[error("Invalid short name in long variable name record.  {0}")]
+    InvalidShortName(IdError),
+
+    #[error("Invalid name in long variable name record.  {0}")]
+    InvalidLongName(IdError),
+
+    #[error("Invalid variable name in very long string record.  {0}")]
+    InvalidLongStringName(IdError),
+
+    #[error("Invalid variable name in long string value label record.  {0}")]
+    InvalidLongStringValueLabelName(IdError),
+
+    #[error("Invalid variable name in attribute record.  {0}")]
+    InvalidAttributeVariableName(IdError),
+
+    // XXX This is risky because `text` might be arbitarily long.
+    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+    MalformedString { encoding: String, text: String },
+
+    #[error("Details TBD")]
+    TBD,
+}
+
+#[derive(Clone, Debug)]
+pub struct Headers {
+    pub header: HeaderRecord<String>,
+    pub variable: Vec<VariableRecord<String>>,
+    pub value_label: Vec<ValueLabelRecord<RawStrArray<8>, String>>,
+    pub document: Vec<DocumentRecord<String>>,
+    pub integer_info: Option<IntegerInfoRecord>,
+    pub float_info: Option<FloatInfoRecord>,
+    pub var_display: Option<VarDisplayRecord>,
+    pub multiple_response: Vec<MultipleResponseRecord<Identifier, String>>,
+    pub long_string_value_labels: Vec<LongStringValueLabelRecord<Identifier, String>>,
+    pub long_string_missing_values: Vec<LongStringMissingValueRecord<Identifier>>,
+    pub encoding: Option<EncodingRecord>,
+    pub number_of_cases: Option<NumberOfCasesRecord>,
+    pub variable_sets: Vec<VariableSetRecord>,
+    pub product_info: Option<ProductInfoRecord>,
+    pub long_names: Vec<LongNamesRecord>,
+    pub very_long_strings: Vec<VeryLongStringsRecord>,
+    pub file_attributes: Vec<FileAttributeRecord>,
+    pub variable_attributes: Vec<VariableAttributeRecord>,
+    pub other_extension: Vec<Extension>,
+    pub end_of_headers: Option<u32>,
+    pub z_header: Option<ZHeader>,
+    pub z_trailer: Option<ZTrailer>,
+    pub cases: Option<Rc<RefCell<Cases>>>,
+}
+
+fn take_first<T, F>(mut vec: Vec<T>, more_than_one: F) -> Option<T>
+where
+    F: FnOnce(),
+{
+    if vec.len() > 1 {
+        more_than_one();
+    }
+    vec.drain(..).next()
+}
+
+impl Headers {
+    pub fn new(headers: Vec<raw::DecodedRecord>, warn: &impl Fn(Error)) -> Result<Headers, Error> {
+        let mut file_header = Vec::new();
+        let mut variable = Vec::new();
+        let mut value_label = Vec::new();
+        let mut document = Vec::new();
+        let mut integer_info = Vec::new();
+        let mut float_info = Vec::new();
+        let mut var_display = Vec::new();
+        let mut multiple_response = Vec::new();
+        let mut long_string_value_labels = Vec::new();
+        let mut long_string_missing_values = Vec::new();
+        let mut encoding = Vec::new();
+        let mut number_of_cases = Vec::new();
+        let mut variable_sets = Vec::new();
+        let mut product_info = Vec::new();
+        let mut long_names = Vec::new();
+        let mut very_long_strings = Vec::new();
+        let mut file_attributes = Vec::new();
+        let mut variable_attributes = Vec::new();
+        let mut other_extension = Vec::new();
+        let mut end_of_headers = Vec::new();
+        let mut z_header = Vec::new();
+        let mut z_trailer = Vec::new();
+        let mut cases = Vec::new();
+
+        for header in headers {
+            match header {
+                DecodedRecord::Header(record) => {
+                    file_header.push(record);
+                }
+                DecodedRecord::Variable(record) => {
+                    variable.push(record);
+                }
+                DecodedRecord::ValueLabel(record) => {
+                    value_label.push(record);
+                }
+                DecodedRecord::Document(record) => {
+                    document.push(record);
+                }
+                DecodedRecord::IntegerInfo(record) => {
+                    integer_info.push(record);
+                }
+                DecodedRecord::FloatInfo(record) => {
+                    float_info.push(record);
+                }
+                DecodedRecord::VariableSets(record) => {
+                    variable_sets.push(record);
+                }
+                DecodedRecord::VarDisplay(record) => {
+                    var_display.push(record);
+                }
+                DecodedRecord::MultipleResponse(record) => {
+                    multiple_response.push(record);
+                }
+                DecodedRecord::LongStringValueLabels(record) => {
+                    long_string_value_labels.push(record)
+                }
+                DecodedRecord::LongStringMissingValues(record) => {
+                    long_string_missing_values.push(record);
+                }
+                DecodedRecord::Encoding(record) => {
+                    encoding.push(record);
+                }
+                DecodedRecord::NumberOfCases(record) => {
+                    number_of_cases.push(record);
+                }
+                DecodedRecord::ProductInfo(record) => {
+                    product_info.push(record);
+                }
+                DecodedRecord::LongNames(record) => {
+                    long_names.push(record);
+                }
+                DecodedRecord::VeryLongStrings(record) => {
+                    very_long_strings.push(record);
+                }
+                DecodedRecord::FileAttributes(record) => {
+                    file_attributes.push(record);
+                }
+                DecodedRecord::VariableAttributes(record) => {
+                    variable_attributes.push(record);
+                }
+                DecodedRecord::OtherExtension(record) => {
+                    other_extension.push(record);
+                }
+                DecodedRecord::EndOfHeaders(record) => {
+                    end_of_headers.push(record);
+                }
+                DecodedRecord::ZHeader(record) => {
+                    z_header.push(record);
+                }
+                DecodedRecord::ZTrailer(record) => {
+                    z_trailer.push(record);
+                }
+                DecodedRecord::Cases(record) => {
+                    cases.push(record);
+                }
+            }
+        }
+
+        let Some(file_header) = take_first(file_header, || warn(Error::DuplicateHeaderRecord))
+        else {
+            return Err(Error::MissingHeaderRecord);
+        };
+
+        Ok(Headers {
+            header: file_header,
+            variable,
+            value_label,
+            document,
+            integer_info: take_first(integer_info, || warn(Error::TBD)),
+            float_info: take_first(float_info, || warn(Error::TBD)),
+            var_display: take_first(var_display, || warn(Error::TBD)),
+            multiple_response,
+            long_string_value_labels,
+            long_string_missing_values,
+            encoding: take_first(encoding, || warn(Error::TBD)),
+            number_of_cases: take_first(number_of_cases, || warn(Error::TBD)),
+            variable_sets,
+            product_info: take_first(product_info, || warn(Error::TBD)),
+            long_names,
+            very_long_strings,
+            file_attributes,
+            variable_attributes,
+            other_extension,
+            end_of_headers: take_first(end_of_headers, || warn(Error::TBD)),
+            z_header: take_first(z_header, || warn(Error::TBD)),
+            z_trailer: take_first(z_trailer, || warn(Error::TBD)),
+            cases: take_first(cases, || warn(Error::TBD)),
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct Metadata {
+    pub creation: NaiveDateTime,
+    pub endian: Endian,
+    pub compression: Option<Compression>,
+    pub n_cases: Option<u64>,
+    pub product: String,
+    pub product_ext: Option<String>,
+    pub version: Option<(i32, i32, i32)>,
+}
+
+impl Metadata {
+    fn decode(headers: &Headers, warn: impl Fn(Error)) -> Self {
+        let header = &headers.header;
+        let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %Y")
+            .unwrap_or_else(|_| {
+                warn(Error::InvalidCreationDate {
+                    creation_date: header.creation_date.to_string(),
+                });
+                Default::default()
+            });
+        let creation_time = NaiveTime::parse_from_str(&header.creation_time, "%H:%M:%S")
+            .unwrap_or_else(|_| {
+                warn(Error::InvalidCreationTime {
+                    creation_time: header.creation_time.to_string(),
+                });
+                Default::default()
+            });
+        let creation = NaiveDateTime::new(creation_date, creation_time);
+
+        let product = header
+            .eye_catcher
+            .trim_start_matches("@(#) SPSS DATA FILE")
+            .trim_end()
+            .to_string();
+
+        Self {
+            creation,
+            endian: header.endian,
+            compression: header.compression,
+            n_cases: header.n_cases.map(|n| n as u64),
+            product,
+            product_ext: headers.product_info.as_ref().map(|pe| fix_line_ends(&pe.0)),
+            version: headers.integer_info.as_ref().map(|ii| ii.version),
+        }
+    }
+}
+
+struct Decoder {
+    pub encoding: &'static Encoding,
+    n_generated_names: usize,
+}
+
+impl Decoder {
+    fn generate_name(&mut self, dictionary: &Dictionary) -> Identifier {
+        loop {
+            self.n_generated_names += 1;
+            let name = Identifier::from_encoding(
+                format!("VAR{:03}", self.n_generated_names),
+                self.encoding,
+            )
+            .unwrap();
+            if !dictionary.variables.contains(&name.0) {
+                return name;
+            }
+            assert!(self.n_generated_names < usize::MAX);
+        }
+    }
+}
+
+pub fn decode(
+    mut headers: Headers,
+    encoding: &'static Encoding,
+    warn: impl Fn(Error),
+) -> Result<(Dictionary, Metadata), Error> {
+    let mut dictionary = Dictionary::new(encoding);
+
+    let file_label = fix_line_ends(headers.header.file_label.trim_end_matches(' '));
+    if !file_label.is_empty() {
+        dictionary.file_label = Some(file_label);
+    }
+
+    for mut attributes in headers.file_attributes.drain(..) {
+        dictionary.attributes.append(&mut attributes.0)
+    }
+
+    // Concatenate all the document records (really there should only be one)
+    // and trim off the trailing spaces that pad them to 80 bytes.
+    dictionary.documents = headers
+        .document
+        .drain(..)
+        .flat_map(|record| record.lines)
+        .map(trim_end_spaces)
+        .collect();
+
+    // XXX warn for weird integer format
+    // XXX warn for weird floating-point format, etc.
+
+    let mut decoder = Decoder {
+        encoding,
+        n_generated_names: 0,
+    };
+
+    let mut var_index_map = HashMap::new();
+    let mut value_index = 0;
+    for (index, input) in headers
+        .variable
+        .iter()
+        .enumerate()
+        .filter(|(_index, record)| record.width != RawWidth::Continuation)
+    {
+        let name = trim_end_spaces(input.name.to_string());
+        let name = match Identifier::from_encoding(name, encoding) {
+            Ok(name) => {
+                if !dictionary.variables.contains(&name.0) {
+                    name
+                } else {
+                    let new_name = decoder.generate_name(&dictionary);
+                    warn(Error::DuplicateVariableName {
+                        duplicate_name: name.clone(),
+                        new_name: new_name.clone(),
+                    });
+                    new_name
+                }
+            }
+            Err(id_error) => {
+                let new_name = decoder.generate_name(&dictionary);
+                warn(Error::InvalidVariableName {
+                    id_error,
+                    new_name: new_name.clone(),
+                });
+                new_name
+            }
+        };
+        let mut variable = Variable::new(name.clone(), VarWidth::try_from(input.width).unwrap());
+
+        // Set the short name the same as the long name (even if we renamed it).
+        variable.short_names = vec![name];
+
+        variable.label = input.label.clone();
+
+        variable.missing_values = input.missing_values.clone();
+
+        variable.print_format = decode_format(
+            input.print_format,
+            variable.width,
+            |new_spec, format_error| {
+                warn(Error::InvalidPrintFormat {
+                    new_spec,
+                    variable: variable.name.clone(),
+                    format_error,
+                })
+            },
+        );
+        variable.write_format = decode_format(
+            input.write_format,
+            variable.width,
+            |new_spec, format_error| {
+                warn(Error::InvalidWriteFormat {
+                    new_spec,
+                    variable: variable.name.clone(),
+                    format_error,
+                })
+            },
+        );
+
+        // Check for long string continuation records.
+        let n_values = input.width.n_values().unwrap();
+        for offset in 1..n_values {
+            if headers
+                .variable
+                .get(index + offset)
+                .is_none_or(|record| record.width != RawWidth::Continuation)
+            {
+                warn(Error::TBD);
+                break;
+            }
+        }
+
+        let dict_index = dictionary.add_var(variable).unwrap();
+        assert_eq!(var_index_map.insert(value_index, dict_index), None);
+        value_index += n_values;
+    }
+
+    if let Some(weight_index) = headers.header.weight_index {
+        if let Some(dict_index) = var_index_map.get(&(weight_index as usize - 1)) {
+            let variable = &dictionary.variables[*dict_index];
+            if variable.is_numeric() {
+                dictionary.weight = Some(*dict_index);
+            } else {
+                warn(Error::TBD);
+            }
+        } else {
+            warn(Error::TBD);
+        }
+    }
+
+    for record in headers.value_label.drain(..) {
+        let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
+        let mut long_string_variables = Vec::new();
+        for value_index in record.dict_indexes.iter() {
+            let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
+                unreachable!()
+            };
+            let variable = &dictionary.variables[*dict_index];
+            if variable.width.is_long_string() {
+                long_string_variables.push(variable.name.clone());
+            } else {
+                dict_indexes.push(*dict_index);
+            }
+        }
+        if !long_string_variables.is_empty() {
+            warn(Error::InvalidLongStringValueLabels {
+                offsets: record.offsets.clone(),
+                variables: long_string_variables,
+            });
+        }
+
+        for dict_index in dict_indexes {
+            let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
+            for ValueLabel { value, label } in record.labels.iter().cloned() {
+                let value = value.decode(variable.width);
+                variable.value_labels.insert(value, label);
+            }
+        }
+    }
+
+    if let Some(display) = &headers.var_display {
+        for (index, display) in display.0.iter().enumerate() {
+            if let Some(variable) = dictionary.variables.get_index_mut2(index) {
+                if let Some(width) = display.width {
+                    variable.display_width = width;
+                }
+                if let Some(alignment) = display.alignment {
+                    variable.alignment = alignment;
+                }
+                if let Some(measure) = display.measure {
+                    variable.measure = Some(measure);
+                }
+            } else {
+                warn(Error::TBD);
+            }
+        }
+    }
+
+    for record in headers
+        .multiple_response
+        .iter()
+        .flat_map(|record| record.0.iter())
+    {
+        match MultipleResponseSet::decode(&dictionary, record, &warn) {
+            Ok(mrset) => {
+                dictionary.mrsets.insert(ByIdentifier::new(mrset));
+            }
+            Err(error) => warn(error),
+        }
+    }
+
+    'outer: for record in headers
+        .very_long_strings
+        .drain(..)
+        .flat_map(|record| record.0.into_iter())
+    {
+        let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
+            warn(Error::TBD);
+            continue;
+        };
+        let width = VarWidth::String(record.length);
+        let n_segments = width.n_segments();
+        if n_segments == 1 {
+            warn(Error::TBD);
+            continue;
+        }
+        if index + n_segments > dictionary.variables.len() {
+            warn(Error::TBD);
+            continue;
+        }
+        let mut short_names = Vec::with_capacity(n_segments);
+        for i in 0..n_segments {
+            let alloc_width = width.segment_alloc_width(i);
+            let segment = &dictionary.variables[index + i];
+            short_names.push(segment.short_names[0].clone());
+            let segment_width = segment.width.as_string_width().unwrap_or(0);
+            if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
+                warn(Error::TBD);
+                continue 'outer;
+            }
+        }
+        dictionary.delete_vars(index + 1..index + n_segments);
+        let variable = dictionary.variables.get_index_mut2(index).unwrap();
+        variable.short_names = short_names;
+        variable.width = width;
+    }
+
+    if headers.long_names.is_empty() {
+        // There are no long variable names.  Use the short variable names,
+        // converted to lowercase, as the long variable names.
+        for index in 0..dictionary.variables.len() {
+            let lower = dictionary.variables[index].name.0.as_ref().to_lowercase();
+            if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) {
+                dictionary.try_rename_var(index, new_name);
+            }
+        }
+    } else {
+        // Rename each of the variables, one by one.  (In a correctly
+        // constructed system file, this cannot create any intermediate
+        // duplicate variable names, because all of the new variable names are
+        // longer than any of the old variable names and thus there cannot be
+        // any overlaps.)
+        for renaming in headers
+            .long_names
+            .iter()
+            .flat_map(|record| record.0.iter().cloned())
+        {
+            let LongName {
+                short_name,
+                long_name,
+            } = renaming;
+            if let Some(index) = dictionary.variables.get_index_of(&short_name.0) {
+                dictionary.try_rename_var(index, long_name);
+                dictionary
+                    .variables
+                    .get_index_mut2(index)
+                    .unwrap()
+                    .short_names = vec![short_name];
+            } else {
+                warn(Error::TBD);
+            }
+        }
+    }
+
+    for mut attr_set in headers
+        .variable_attributes
+        .drain(..)
+        .flat_map(|record| record.0.into_iter())
+    {
+        if let Some((_, variable)) = dictionary
+            .variables
+            .get_full_mut2(&attr_set.long_var_name.0)
+        {
+            variable.attributes.append(&mut attr_set.attributes);
+        } else {
+            warn(Error::TBD);
+        }
+    }
+
+    // Assign variable roles.
+    for index in 0..dictionary.variables.len() {
+        let variable = dictionary.variables.get_index_mut2(index).unwrap();
+        match variable.attributes.role() {
+            Ok(role) => variable.role = role,
+            Err(InvalidRole) => warn(Error::TBD),
+        }
+    }
+
+    // Long string value labels.
+    for record in headers
+        .long_string_value_labels
+        .drain(..)
+        .flat_map(|record| record.0.into_iter())
+    {
+        let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+            warn(Error::TBD);
+            continue;
+        };
+        let Some(width) = variable.width.as_string_width() else {
+            warn(Error::TBD);
+            continue;
+        };
+        for (mut value, label) in record.labels.into_iter() {
+            // XXX warn about too-long value?
+            value.0.resize(width, b' ');
+            // XXX warn abouat duplicate value labels?
+            variable.value_labels.insert(Value::String(value), label);
+        }
+    }
+
+    let mut value = Vec::new();
+    for record in headers
+        .long_string_missing_values
+        .drain(..)
+        .flat_map(|record| record.0.into_iter())
+    {
+        let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+            warn(Error::TBD);
+            continue;
+        };
+        let values = record
+            .missing_values
+            .into_iter()
+            .map(|v| {
+                value.clear();
+                value.extend_from_slice(v.0.as_slice());
+                value.resize(variable.width.as_string_width().unwrap(), b' ');
+                Value::String(Box::from(value.as_slice()))
+            })
+            .collect::<Vec<_>>();
+        variable.missing_values = MissingValues {
+            values,
+            range: None,
+        };
+    }
+
+    for record in headers
+        .variable_sets
+        .drain(..)
+        .flat_map(|record| record.sets.into_iter())
+    {
+        let mut variables = Vec::with_capacity(record.variable_names.len());
+        for variable_name in record.variable_names {
+            let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else {
+                warn(Error::TBD);
+                continue;
+            };
+            variables.push(dict_index);
+        }
+        if !variables.is_empty() {
+            let variable_set = VariableSet {
+                name: record.name,
+                variables,
+            };
+            dictionary
+                .variable_sets
+                .insert(ByIdentifier::new(variable_set));
+        }
+    }
+
+    let metadata = Metadata::decode(&headers, warn);
+    Ok((dictionary, metadata))
+}
+
+impl MultipleResponseSet {
+    fn decode(
+        dictionary: &Dictionary,
+        input: &raw::MultipleResponseSet<Identifier, String>,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let mr_set_name = input.name.clone();
+        let mut variables = Vec::with_capacity(input.short_names.len());
+        for short_name in input.short_names.iter() {
+            let Some(dict_index) = dictionary.variables.get_index_of(&short_name.0) else {
+                warn(Error::UnknownMrSetVariable {
+                    mr_set: mr_set_name.clone(),
+                    short_name: short_name.clone(),
+                });
+                continue;
+            };
+            variables.push(dict_index);
+        }
+
+        match variables.len() {
+            0 => return Err(Error::EmptyMrSet(mr_set_name)),
+            1 => return Err(Error::OneVarMrSet(mr_set_name)),
+            _ => (),
+        }
+
+        let Some((Some(min_width), Some(max_width))) = variables
+            .iter()
+            .copied()
+            .map(|dict_index| dictionary.variables[dict_index].width)
+            .map(|w| (Some(w), Some(w)))
+            .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
+        else {
+            return Err(Error::MixedMrSet(mr_set_name));
+        };
+
+        let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width)?;
+
+        Ok(MultipleResponseSet {
+            name: mr_set_name,
+            width: min_width..=max_width,
+            label: input.label.to_string(),
+            mr_type,
+            variables,
+        })
+    }
+}
+
+fn trim_end_spaces(mut s: String) -> String {
+    s.truncate(s.trim_end_matches(' ').len());
+    s
+}
+
+/// Returns a copy of `s` in which all lone CR and CR LF pairs have been
+/// replaced by LF.
+///
+/// (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
+/// files that use CR-only line ends in the file label and extra product info.)
+fn fix_line_ends(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut s = s.chars().peekable();
+    while let Some(c) = s.next() {
+        match c {
+            '\r' => {
+                s.next_if_eq(&'\n');
+                out.push('\n')
+            }
+            c => out.push(c),
+        }
+    }
+    out
+}
+
+fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Format, FormatError)) -> Format {
+    UncheckedFormat::try_from(raw)
+        .and_then(Format::try_from)
+        .and_then(|x| x.check_width_compatibility(width))
+        .unwrap_or_else(|error| {
+            let new_format = Format::default_for_width(width);
+            warn(new_format, error);
+            new_format
+        })
+}
+
+impl MultipleResponseType {
+    fn decode(
+        mr_set: &Identifier,
+        input: &raw::MultipleResponseType,
+        min_width: VarWidth,
+    ) -> Result<Self, Error> {
+        match input {
+            raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
+                let value = match min_width {
+                    VarWidth::Numeric => {
+                        let string = String::from_utf8_lossy(&value.0);
+                        let number: f64 = string.trim().parse().map_err(|_| {
+                            Error::InvalidMDGroupCountedValue {
+                                mr_set: mr_set.clone(),
+                                number: string.into(),
+                            }
+                        })?;
+                        Value::Number(Some(number))
+                    }
+                    VarWidth::String(max_width) => {
+                        let mut value = value.0.as_slice();
+                        while value.ends_with(b" ") {
+                            value = &value[..value.len() - 1];
+                        }
+                        let width = value.len();
+                        if width > max_width as usize {
+                            return Err(Error::TooWideMDGroupCountedValue {
+                                mr_set: mr_set.clone(),
+                                value: String::from_utf8_lossy(value).into(),
+                                width,
+                                max_width,
+                            });
+                        };
+                        Value::String(value.into())
+                    }
+                };
+                Ok(MultipleResponseType::MultipleDichotomy {
+                    value,
+                    labels: *labels,
+                })
+            }
+            raw::MultipleResponseType::MultipleCategory => {
+                Ok(MultipleResponseType::MultipleCategory)
+            }
+        }
+    }
+}
diff --git a/rust/pspp/src/sys/encoding.rs b/rust/pspp/src/sys/encoding.rs
new file mode 100644 (file)
index 0000000..c408bf5
--- /dev/null
@@ -0,0 +1,95 @@
+use crate::locale_charset::locale_charset;
+use encoding_rs::{Encoding, UTF_8};
+
+include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
+
+pub fn codepage_from_encoding(encoding: &str) -> Option<u32> {
+    CODEPAGE_NAME_TO_NUMBER
+        .get(encoding.to_ascii_lowercase().as_str())
+        .copied()
+}
+
+use thiserror::Error as ThisError;
+
+#[derive(ThisError, Debug)]
+pub enum Error {
+    #[error("This system file does not indicate its own character encoding.  For best results, specify an encoding explicitly.  Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
+    NoEncoding,
+
+    #[error("This system file encodes text strings with unknown code page {0}.")]
+    UnknownCodepage(i32),
+
+    #[error("This system file encodes text strings with unknown encoding {0}.")]
+    UnknownEncoding(String),
+
+    #[error("This system file is encoded in EBCDIC, which is not supported.")]
+    Ebcdic,
+}
+
+pub fn default_encoding() -> &'static Encoding {
+    lazy_static! {
+        static ref DEFAULT_ENCODING: &'static Encoding =
+            Encoding::for_label(locale_charset().as_bytes()).unwrap_or(UTF_8);
+    }
+    &DEFAULT_ENCODING
+}
+
+pub fn get_encoding(
+    encoding: Option<&str>,
+    character_code: Option<i32>,
+) -> Result<&'static Encoding, Error> {
+    let label = if let Some(encoding) = encoding {
+        encoding
+    } else if let Some(codepage) = character_code {
+        match codepage {
+            1 => return Err(Error::Ebcdic),
+            2 | 3 => {
+                // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+                // respectively.  However, many files have character code 2 but
+                // data which are clearly not ASCII.  Therefore, ignore these
+                // values.
+                return Err(Error::NoEncoding);
+            }
+            4 => "MS_KANJI",
+            _ => CODEPAGE_NUMBER_TO_NAME
+                .get(&codepage)
+                .copied()
+                .ok_or(Error::UnknownCodepage(codepage))?,
+        }
+    } else {
+        return Err(Error::NoEncoding);
+    };
+
+    Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))
+}
+
+/*
+#[cfg(test)]
+mod tests {
+    use std::thread::spawn;
+
+    use encoding_rs::{EUC_JP, UTF_8, WINDOWS_1252};
+
+    #[test]
+    fn round_trip() {
+        let mut threads = Vec::new();
+        for thread in 0..128 {
+            let start: u32 = thread << 25;
+            let end = start + ((1 << 25) - 1);
+            threads.push(spawn(move || {
+                for i in start..=end {
+                    let s = i.to_le_bytes();
+                    let (utf8, replacement) = EUC_JP.decode_without_bom_handling(&s);
+                    if !replacement {
+                        let s2 = UTF_8.encode(&utf8).0;
+                        assert_eq!(s.as_slice(), &*s2);
+                    }
+                }
+            }));
+        }
+        for thread in threads {
+            thread.join().unwrap();
+        }
+    }
+}
+*/
diff --git a/rust/pspp/src/sys/mod.rs b/rust/pspp/src/sys/mod.rs
new file mode 100644 (file)
index 0000000..57a1d00
--- /dev/null
@@ -0,0 +1,4 @@
+pub mod cooked;
+pub mod encoding;
+pub mod raw;
+pub mod sack;
diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs
new file mode 100644 (file)
index 0000000..7a0af59
--- /dev/null
@@ -0,0 +1,3008 @@
+use crate::{
+    dictionary::{Attributes, Value, VarWidth},
+    endian::{Endian, Parse, ToBytes},
+    identifier::{Error as IdError, Identifier},
+    sys::encoding::{default_encoding, get_encoding, Error as EncodingError},
+};
+
+use encoding_rs::{mem::decode_latin1, Encoding};
+use flate2::read::ZlibDecoder;
+use num::Integer;
+use std::{
+    borrow::Cow,
+    cell::RefCell,
+    collections::{HashMap, VecDeque},
+    fmt::{Debug, Display, Formatter, Result as FmtResult},
+    io::{Error as IoError, Read, Seek, SeekFrom},
+    mem::take,
+    num::NonZeroU8,
+    ops::Range,
+    rc::Rc,
+    str::from_utf8,
+};
+use thiserror::Error as ThisError;
+
+#[derive(ThisError, Debug)]
+pub enum Error {
+    #[error("Not an SPSS system file")]
+    NotASystemFile,
+
+    #[error("Invalid magic number {0:?}")]
+    BadMagic([u8; 4]),
+
+    #[error("I/O error ({0})")]
+    Io(#[from] IoError),
+
+    #[error("Invalid SAV compression code {0}")]
+    InvalidSavCompression(u32),
+
+    #[error("Invalid ZSAV compression code {0}")]
+    InvalidZsavCompression(u32),
+
+    #[error("Document record at offset {offset:#x} has document line count ({n}) greater than the maximum number {max}.")]
+    BadDocumentLength { offset: u64, n: usize, max: usize },
+
+    #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
+    BadRecordType { offset: u64, rec_type: u32 },
+
+    #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")]
+    BadVariableWidth { start_offset: u64, width: i32 },
+
+    #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
+    BadVariableLabelCode {
+        start_offset: u64,
+        code_offset: u64,
+        code: u32,
+    },
+
+    #[error("At offset {offset:#x}, missing value code ({code}) is not -3, -2, 0, 1, 2, or 3.")]
+    BadMissingValueCode { offset: u64, code: i32 },
+
+    #[error(
+        "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
+    )]
+    BadNumericMissingValueCode { offset: u64, code: i32 },
+
+    #[error("At offset {offset:#x}, string missing value code ({code}) is not 0, 1, 2, or 3.")]
+    BadStringMissingValueCode { offset: u64, code: i32 },
+
+    #[error("At offset {offset:#x}, number of value labels ({n}) is greater than the maximum number {max}.")]
+    BadNumberOfValueLabels { offset: u64, n: u32, max: u32 },
+
+    #[error("At offset {offset:#x}, following value label record, found record type {rec_type} instead of expected type 4 for variable index record")]
+    ExpectedVarIndexRecord { offset: u64, rec_type: u32 },
+
+    #[error("At offset {offset:#x}, number of variables indexes for value labels ({n}) is greater than the maximum number ({max}).")]
+    TooManyVarIndexes { offset: u64, n: u32, max: u32 },
+
+    #[error("At offset {offset:#x}, record type 7 subtype {subtype} is too large with element size {size} and {count} elements.")]
+    ExtensionRecordTooLarge {
+        offset: u64,
+        subtype: u32,
+        size: u32,
+        count: u32,
+    },
+
+    #[error("Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a {case_len}-byte case.")]
+    EofInCase {
+        offset: u64,
+        case_ofs: u64,
+        case_len: usize,
+    },
+
+    #[error(
+        "Unexpected end of file at offset {offset:#x}, {case_ofs} bytes into a compressed case."
+    )]
+    EofInCompressedCase { offset: u64, case_ofs: u64 },
+
+    #[error("Data ends at offset {offset:#x}, {case_ofs} bytes into a compressed case.")]
+    PartialCompressedCase { offset: u64, case_ofs: u64 },
+
+    #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a string was found where a number was expected.")]
+    CompressedNumberExpected { offset: u64, case_ofs: u64 },
+
+    #[error("At {case_ofs} bytes into compressed case starting at offset {offset:#x}, a number was found where a string was expected.")]
+    CompressedStringExpected { offset: u64, case_ofs: u64 },
+
+    #[error("Block count {n_blocks} in ZLIB trailer at offset {offset:#x} differs from expected block count {expected_n_blocks} calculated from trailer length {ztrailer_len}.")]
+    BadZlibTrailerNBlocks {
+        offset: u64,
+        n_blocks: u32,
+        expected_n_blocks: u64,
+        ztrailer_len: u64,
+    },
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+}
+
+#[derive(ThisError, Debug)]
+pub enum Warning {
+    #[error("Unexpected end of data inside extension record.")]
+    UnexpectedEndOfData,
+
+    #[error("At offset {offset:#x}, at least one valid variable index for value labels is required but none were specified.")]
+    NoVarIndexes { offset: u64 },
+
+    #[error("At offset {offset:#x}, the first variable index is for a {var_type} variable but the following variable indexes are for {} variables: {wrong_types:?}", var_type.opposite())]
+    MixedVarTypes {
+        offset: u64,
+        var_type: VarType,
+        wrong_types: Vec<u32>,
+    },
+
+    #[error("At offset {offset:#x}, one or more variable indexes for value labels were not in the valid range [1,{max}] or referred to string continuations: {invalid:?}")]
+    InvalidVarIndexes {
+        offset: u64,
+        max: usize,
+        invalid: Vec<u32>,
+    },
+
+    #[error("At offset {offset:#x}, {record} has bad size {size} bytes instead of the expected {expected_size}.")]
+    BadRecordSize {
+        offset: u64,
+        record: String,
+        size: u32,
+        expected_size: u32,
+    },
+
+    #[error("At offset {offset:#x}, {record} has bad count {count} instead of the expected {expected_count}.")]
+    BadRecordCount {
+        offset: u64,
+        record: String,
+        count: u32,
+        expected_count: u32,
+    },
+
+    #[error("In long string missing values record starting at offset {record_offset:#x}, value length at offset {offset:#x} is {value_len} instead of the expected 8.")]
+    BadLongMissingValueLength {
+        record_offset: u64,
+        offset: u64,
+        value_len: u32,
+    },
+
+    #[error("The encoding record at offset {offset:#x} contains an encoding name that is not valid UTF-8.")]
+    BadEncodingName { offset: u64 },
+
+    // XXX This is risky because `text` might be arbitarily long.
+    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+    MalformedString { encoding: String, text: String },
+
+    #[error("Invalid variable measurement level value {0}")]
+    InvalidMeasurement(u32),
+
+    #[error("Invalid variable display alignment value {0}")]
+    InvalidAlignment(u32),
+
+    #[error("Invalid attribute name.  {0}")]
+    InvalidAttributeName(IdError),
+
+    #[error("Invalid variable name in attribute record.  {0}")]
+    InvalidAttributeVariableName(IdError),
+
+    #[error("Invalid short name in long variable name record.  {0}")]
+    InvalidShortName(IdError),
+
+    #[error("Invalid name in long variable name record.  {0}")]
+    InvalidLongName(IdError),
+
+    #[error("Invalid variable name in very long string record.  {0}")]
+    InvalidLongStringName(IdError),
+
+    #[error("Invalid variable name in variable set record.  {0}")]
+    InvalidVariableSetName(IdError),
+
+    #[error("Invalid multiple response set name.  {0}")]
+    InvalidMrSetName(IdError),
+
+    #[error("Invalid multiple response set variable name.  {0}")]
+    InvalidMrSetVariableName(IdError),
+
+    #[error("Invalid variable name in long string missing values record.  {0}")]
+    InvalidLongStringMissingValueVariableName(IdError),
+
+    #[error("Invalid variable name in long string value label record.  {0}")]
+    InvalidLongStringValueLabelName(IdError),
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+
+    #[error("Details TBD")]
+    TBD,
+}
+
+impl From<IoError> for Warning {
+    fn from(_source: IoError) -> Self {
+        Self::UnexpectedEndOfData
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum Record {
+    Header(HeaderRecord<RawString>),
+    Variable(VariableRecord<RawString>),
+    ValueLabel(ValueLabelRecord<RawStrArray<8>, RawString>),
+    Document(DocumentRecord<RawDocumentLine>),
+    IntegerInfo(IntegerInfoRecord),
+    FloatInfo(FloatInfoRecord),
+    VarDisplay(VarDisplayRecord),
+    MultipleResponse(MultipleResponseRecord<RawString, RawString>),
+    LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
+    LongStringMissingValues(LongStringMissingValueRecord<RawString>),
+    Encoding(EncodingRecord),
+    NumberOfCases(NumberOfCasesRecord),
+    Text(TextRecord),
+    OtherExtension(Extension),
+    EndOfHeaders(u32),
+    ZHeader(ZHeader),
+    ZTrailer(ZTrailer),
+    Cases(Rc<RefCell<Cases>>),
+}
+
+#[derive(Clone, Debug)]
+pub enum DecodedRecord {
+    Header(HeaderRecord<String>),
+    Variable(VariableRecord<String>),
+    ValueLabel(ValueLabelRecord<RawStrArray<8>, String>),
+    Document(DocumentRecord<String>),
+    IntegerInfo(IntegerInfoRecord),
+    FloatInfo(FloatInfoRecord),
+    VarDisplay(VarDisplayRecord),
+    MultipleResponse(MultipleResponseRecord<Identifier, String>),
+    LongStringValueLabels(LongStringValueLabelRecord<Identifier, String>),
+    LongStringMissingValues(LongStringMissingValueRecord<Identifier>),
+    Encoding(EncodingRecord),
+    NumberOfCases(NumberOfCasesRecord),
+    VariableSets(VariableSetRecord),
+    ProductInfo(ProductInfoRecord),
+    LongNames(LongNamesRecord),
+    VeryLongStrings(VeryLongStringsRecord),
+    FileAttributes(FileAttributeRecord),
+    VariableAttributes(VariableAttributeRecord),
+    OtherExtension(Extension),
+    EndOfHeaders(u32),
+    ZHeader(ZHeader),
+    ZTrailer(ZTrailer),
+    Cases(Rc<RefCell<Cases>>),
+}
+
+impl Record {
+    fn read<R>(
+        reader: &mut R,
+        endian: Endian,
+        var_types: &VarTypes,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Option<Record>, Error>
+    where
+        R: Read + Seek,
+    {
+        let rec_type: u32 = endian.parse(read_bytes(reader)?);
+        match rec_type {
+            2 => Ok(Some(VariableRecord::read(reader, endian, warn)?)),
+            3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
+            6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
+            7 => Extension::read(reader, endian, var_types.n_values(), warn),
+            999 => Ok(Some(Record::EndOfHeaders(
+                endian.parse(read_bytes(reader)?),
+            ))),
+            _ => Err(Error::BadRecordType {
+                offset: reader.stream_position()?,
+                rec_type,
+            }),
+        }
+    }
+
+    pub fn decode(self, decoder: &Decoder) -> Result<DecodedRecord, Error> {
+        Ok(match self {
+            Record::Header(record) => record.decode(decoder),
+            Record::Variable(record) => record.decode(decoder),
+            Record::ValueLabel(record) => DecodedRecord::ValueLabel(record.decode(decoder)),
+            Record::Document(record) => record.decode(decoder),
+            Record::IntegerInfo(record) => DecodedRecord::IntegerInfo(record.clone()),
+            Record::FloatInfo(record) => DecodedRecord::FloatInfo(record.clone()),
+            Record::VarDisplay(record) => DecodedRecord::VarDisplay(record.clone()),
+            Record::MultipleResponse(record) => record.decode(decoder),
+            Record::LongStringValueLabels(record) => {
+                DecodedRecord::LongStringValueLabels(record.decode(decoder))
+            }
+            Record::LongStringMissingValues(record) => {
+                DecodedRecord::LongStringMissingValues(record.decode(decoder))
+            }
+            Record::Encoding(record) => DecodedRecord::Encoding(record.clone()),
+            Record::NumberOfCases(record) => DecodedRecord::NumberOfCases(record.clone()),
+            Record::Text(record) => record.decode(decoder),
+            Record::OtherExtension(record) => DecodedRecord::OtherExtension(record.clone()),
+            Record::EndOfHeaders(record) => DecodedRecord::EndOfHeaders(record),
+            Record::ZHeader(record) => DecodedRecord::ZHeader(record.clone()),
+            Record::ZTrailer(record) => DecodedRecord::ZTrailer(record.clone()),
+            Record::Cases(record) => DecodedRecord::Cases(record.clone()),
+        })
+    }
+}
+
+pub fn encoding_from_headers(
+    headers: &Vec<Record>,
+    warn: &impl Fn(Warning),
+) -> Result<&'static Encoding, Error> {
+    let mut encoding_record = None;
+    let mut integer_info_record = None;
+    for record in headers {
+        match record {
+            Record::Encoding(record) => encoding_record = Some(record),
+            Record::IntegerInfo(record) => integer_info_record = Some(record),
+            _ => (),
+        }
+    }
+    let encoding = encoding_record.map(|record| record.0.as_str());
+    let character_code = integer_info_record.map(|record| record.character_code);
+    match get_encoding(encoding, character_code) {
+        Ok(encoding) => Ok(encoding),
+        Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)),
+        Err(err) => {
+            warn(Warning::EncodingError(err));
+            // Warn that we're using the default encoding.
+            Ok(default_encoding())
+        }
+    }
+}
+
+// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
+// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
+fn default_decode(s: &[u8]) -> Cow<str> {
+    from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Compression {
+    Simple,
+    ZLib,
+}
+
+#[derive(Clone)]
+pub struct HeaderRecord<S>
+where
+    S: Debug,
+{
+    /// Offset in file.
+    pub offsets: Range<u64>,
+
+    /// Magic number.
+    pub magic: Magic,
+
+    /// Eye-catcher string, product name, in the file's encoding.  Padded
+    /// on the right with spaces.
+    pub eye_catcher: S,
+
+    /// Layout code, normally either 2 or 3.
+    pub layout_code: u32,
+
+    /// Number of variable positions, or `None` if the value in the file is
+    /// questionably trustworthy.
+    pub nominal_case_size: Option<u32>,
+
+    /// Compression type, if any,
+    pub compression: Option<Compression>,
+
+    /// 1-based variable index of the weight variable, or `None` if the file is
+    /// unweighted.
+    pub weight_index: Option<u32>,
+
+    /// Claimed number of cases, if known.
+    pub n_cases: Option<u32>,
+
+    /// Compression bias, usually 100.0.
+    pub bias: f64,
+
+    /// `dd mmm yy` in the file's encoding.
+    pub creation_date: S,
+
+    /// `HH:MM:SS` in the file's encoding.
+    pub creation_time: S,
+
+    /// File label, in the file's encoding.  Padded on the right with spaces.
+    pub file_label: S,
+
+    /// Endianness of the data in the file header.
+    pub endian: Endian,
+}
+
+impl<S> HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn debug_field<T>(&self, f: &mut Formatter, name: &str, value: T) -> FmtResult
+    where
+        T: Debug,
+    {
+        writeln!(f, "{name:>17}: {:?}", value)
+    }
+}
+
+impl<S> Debug for HeaderRecord<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        writeln!(f, "File header record:")?;
+        self.debug_field(f, "Magic", self.magic)?;
+        self.debug_field(f, "Product name", &self.eye_catcher)?;
+        self.debug_field(f, "Layout code", self.layout_code)?;
+        self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
+        self.debug_field(f, "Compression", self.compression)?;
+        self.debug_field(f, "Weight index", self.weight_index)?;
+        self.debug_field(f, "Number of cases", self.n_cases)?;
+        self.debug_field(f, "Compression bias", self.bias)?;
+        self.debug_field(f, "Creation date", &self.creation_date)?;
+        self.debug_field(f, "Creation time", &self.creation_time)?;
+        self.debug_field(f, "File label", &self.file_label)?;
+        self.debug_field(f, "Endianness", self.endian)
+    }
+}
+
+impl HeaderRecord<RawString> {
+    fn read<R: Read + Seek>(r: &mut R) -> Result<Self, Error> {
+        let start = r.stream_position()?;
+
+        let magic: [u8; 4] = read_bytes(r)?;
+        let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
+
+        let eye_catcher = RawString(read_vec(r, 60)?);
+        let layout_code: [u8; 4] = read_bytes(r)?;
+        let endian = Endian::identify_u32(2, layout_code)
+            .or_else(|| Endian::identify_u32(2, layout_code))
+            .ok_or(Error::NotASystemFile)?;
+        let layout_code = endian.parse(layout_code);
+
+        let nominal_case_size: u32 = endian.parse(read_bytes(r)?);
+        let nominal_case_size =
+            (nominal_case_size <= i32::MAX as u32 / 16).then_some(nominal_case_size);
+
+        let compression_code: u32 = endian.parse(read_bytes(r)?);
+        let compression = match (magic, compression_code) {
+            (Magic::Zsav, 2) => Some(Compression::ZLib),
+            (Magic::Zsav, code) => return Err(Error::InvalidZsavCompression(code)),
+            (_, 0) => None,
+            (_, 1) => Some(Compression::Simple),
+            (_, code) => return Err(Error::InvalidSavCompression(code)),
+        };
+
+        let weight_index: u32 = endian.parse(read_bytes(r)?);
+        let weight_index = (weight_index > 0).then_some(weight_index);
+
+        let n_cases: u32 = endian.parse(read_bytes(r)?);
+        let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
+
+        let bias: f64 = endian.parse(read_bytes(r)?);
+
+        let creation_date = RawString(read_vec(r, 9)?);
+        let creation_time = RawString(read_vec(r, 8)?);
+        let file_label = RawString(read_vec(r, 64)?);
+        let _: [u8; 3] = read_bytes(r)?;
+
+        Ok(HeaderRecord {
+            offsets: start..r.stream_position()?,
+            magic,
+            layout_code,
+            nominal_case_size,
+            compression,
+            weight_index,
+            n_cases,
+            bias,
+            creation_date,
+            creation_time,
+            eye_catcher,
+            file_label,
+            endian,
+        })
+    }
+
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        let eye_catcher = decoder.decode(&self.eye_catcher).to_string();
+        let file_label = decoder.decode(&self.file_label).to_string();
+        let creation_date = decoder.decode(&self.creation_date).to_string();
+        let creation_time = decoder.decode(&self.creation_time).to_string();
+        DecodedRecord::Header(HeaderRecord {
+            eye_catcher,
+            weight_index: self.weight_index,
+            n_cases: self.n_cases,
+            file_label,
+            offsets: self.offsets.clone(),
+            magic: self.magic,
+            layout_code: self.layout_code,
+            nominal_case_size: self.nominal_case_size,
+            compression: self.compression,
+            bias: self.bias,
+            creation_date,
+            creation_time,
+            endian: self.endian,
+        })
+    }
+}
+
+pub struct Decoder {
+    pub encoding: &'static Encoding,
+    pub warn: Box<dyn Fn(Warning)>,
+}
+
+impl Decoder {
+    pub fn new<F>(encoding: &'static Encoding, warn: F) -> Self
+    where
+        F: Fn(Warning) + 'static,
+    {
+        Self {
+            encoding,
+            warn: Box::new(warn),
+        }
+    }
+    fn warn(&self, warning: Warning) {
+        (self.warn)(warning)
+    }
+    fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
+        let (output, malformed) = self.encoding.decode_without_bom_handling(input);
+        if malformed {
+            self.warn(Warning::MalformedString {
+                encoding: self.encoding.name().into(),
+                text: output.clone().into(),
+            });
+        }
+        output
+    }
+
+    fn decode<'a>(&self, input: &'a RawString) -> Cow<'a, str> {
+        self.decode_slice(input.0.as_slice())
+    }
+
+    pub fn decode_identifier(&self, input: &RawString) -> Result<Identifier, IdError> {
+        self.new_identifier(&self.decode(input))
+    }
+
+    pub fn new_identifier(&self, name: &str) -> Result<Identifier, IdError> {
+        Identifier::from_encoding(name, self.encoding)
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub enum Magic {
+    /// Regular system file.
+    Sav,
+
+    /// System file with Zlib-compressed data.
+    Zsav,
+
+    /// EBCDIC-encoded system file.
+    Ebcdic,
+}
+
+impl Magic {
+    /// Magic number for a regular system file.
+    pub const SAV: [u8; 4] = *b"$FL2";
+
+    /// Magic number for a system file that contains zlib-compressed data.
+    pub const ZSAV: [u8; 4] = *b"$FL3";
+
+    /// Magic number for an EBCDIC-encoded system file.  This is `$FL2` encoded
+    /// in EBCDIC.
+    pub const EBCDIC: [u8; 4] = [0x5b, 0xc6, 0xd3, 0xf2];
+}
+
+impl Debug for Magic {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let s = match *self {
+            Magic::Sav => "$FL2",
+            Magic::Zsav => "$FL3",
+            Magic::Ebcdic => "($FL2 in EBCDIC)",
+        };
+        write!(f, "{s}")
+    }
+}
+
+impl TryFrom<[u8; 4]> for Magic {
+    type Error = Error;
+
+    fn try_from(value: [u8; 4]) -> Result<Self, Self::Error> {
+        match value {
+            Magic::SAV => Ok(Magic::Sav),
+            Magic::ZSAV => Ok(Magic::Zsav),
+            Magic::EBCDIC => Ok(Magic::Ebcdic),
+            _ => Err(Error::BadMagic(value)),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum VarType {
+    Numeric,
+    String,
+}
+
+impl VarType {
+    pub fn opposite(self) -> VarType {
+        match self {
+            Self::Numeric => Self::String,
+            Self::String => Self::Numeric,
+        }
+    }
+}
+
+impl Display for VarType {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        match self {
+            VarType::Numeric => write!(f, "numeric"),
+            VarType::String => write!(f, "string"),
+        }
+    }
+}
+
+impl TryFrom<RawWidth> for VarType {
+    type Error = ();
+
+    fn try_from(value: RawWidth) -> Result<Self, Self::Error> {
+        match value {
+            RawWidth::Continuation => Err(()),
+            RawWidth::Numeric => Ok(VarType::Numeric),
+            RawWidth::String(_) => Ok(VarType::String),
+        }
+    }
+}
+
+impl TryFrom<RawWidth> for VarWidth {
+    type Error = ();
+
+    fn try_from(value: RawWidth) -> Result<Self, Self::Error> {
+        match value {
+            RawWidth::Continuation => Err(()),
+            RawWidth::Numeric => Ok(Self::Numeric),
+            RawWidth::String(width) => Ok(Self::String(width.get() as u16)),
+        }
+    }
+}
+
+type RawValue = Value<RawStrArray<8>>;
+
+impl RawValue {
+    pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
+        match var_type {
+            VarType::String => Value::String(RawStrArray(raw.0)),
+            VarType::Numeric => Value::Number(endian.parse(raw.0)),
+        }
+    }
+
+    fn read_case<R: Read + Seek>(
+        reader: &mut R,
+        var_types: &VarTypes,
+        endian: Endian,
+    ) -> Result<Option<Vec<Self>>, Error> {
+        let case_start = reader.stream_position()?;
+        let mut values = Vec::with_capacity(var_types.n_values());
+        for (i, var_type) in var_types.iter().enumerate() {
+            let Some(raw) = try_read_bytes(reader)? else {
+                if i == 0 {
+                    return Ok(None);
+                } else {
+                    let offset = reader.stream_position()?;
+                    return Err(Error::EofInCase {
+                        offset,
+                        case_ofs: offset - case_start,
+                        case_len: var_types.n_values() * 8,
+                    });
+                }
+            };
+            values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
+        }
+        Ok(Some(values))
+    }
+
+    fn read_compressed_case<R: Read + Seek>(
+        reader: &mut R,
+        var_types: &VarTypes,
+        codes: &mut VecDeque<u8>,
+        endian: Endian,
+        bias: f64,
+    ) -> Result<Option<Vec<Self>>, Error> {
+        let case_start = reader.stream_position()?;
+        let mut values = Vec::with_capacity(var_types.n_values());
+        for (i, var_type) in var_types.iter().enumerate() {
+            let value = loop {
+                let Some(code) = codes.pop_front() else {
+                    let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
+                        if i == 0 {
+                            return Ok(None);
+                        } else {
+                            let offset = reader.stream_position()?;
+                            return Err(Error::EofInCompressedCase {
+                                offset,
+                                case_ofs: offset - case_start,
+                            });
+                        }
+                    };
+                    codes.extend(new_codes.into_iter());
+                    continue;
+                };
+                match code {
+                    0 => (),
+                    1..=251 => match var_type {
+                        VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
+                        VarType::String => {
+                            break Self::String(RawStrArray(endian.to_bytes(code as f64 - bias)))
+                        }
+                    },
+                    252 => {
+                        if i == 0 {
+                            return Ok(None);
+                        } else {
+                            let offset = reader.stream_position()?;
+                            return Err(Error::PartialCompressedCase {
+                                offset,
+                                case_ofs: offset - case_start,
+                            });
+                        }
+                    }
+                    253 => {
+                        break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
+                    }
+                    254 => match var_type {
+                        VarType::String => break Self::String(RawStrArray(*b"        ")), // XXX EBCDIC
+                        VarType::Numeric => {
+                            return Err(Error::CompressedStringExpected {
+                                offset: case_start,
+                                case_ofs: reader.stream_position()? - case_start,
+                            })
+                        }
+                    },
+                    255 => match var_type {
+                        VarType::Numeric => break Self::Number(None),
+                        VarType::String => {
+                            return Err(Error::CompressedNumberExpected {
+                                offset: case_start,
+                                case_ofs: reader.stream_position()? - case_start,
+                            })
+                        }
+                    },
+                }
+            };
+            values.push(value);
+        }
+        Ok(Some(values))
+    }
+
+    pub fn decode(&self, width: VarWidth) -> Value {
+        match self {
+            Self::Number(x) => Value::Number(*x),
+            Self::String(s) => {
+                let width = width.as_string_width().unwrap();
+                Value::String(RawString::from(&s.0[..width]))
+            }
+        }
+    }
+}
+
+struct ZlibDecodeMultiple<R>
+where
+    R: Read + Seek,
+{
+    reader: Option<ZlibDecoder<R>>,
+}
+
+impl<R> ZlibDecodeMultiple<R>
+where
+    R: Read + Seek,
+{
+    fn new(reader: R) -> ZlibDecodeMultiple<R> {
+        ZlibDecodeMultiple {
+            reader: Some(ZlibDecoder::new(reader)),
+        }
+    }
+}
+
+impl<R> Read for ZlibDecodeMultiple<R>
+where
+    R: Read + Seek,
+{
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
+        loop {
+            match self.reader.as_mut().unwrap().read(buf)? {
+                0 => {
+                    let inner = self.reader.take().unwrap().into_inner();
+                    self.reader = Some(ZlibDecoder::new(inner));
+                }
+                n => return Ok(n),
+            };
+        }
+    }
+}
+
+impl<R> Seek for ZlibDecodeMultiple<R>
+where
+    R: Read + Seek,
+{
+    fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
+        self.reader.as_mut().unwrap().get_mut().seek(pos)
+    }
+}
+
+enum ReaderState {
+    Start,
+    Headers,
+    ZlibHeader,
+    ZlibTrailer {
+        ztrailer_offset: u64,
+        ztrailer_len: u64,
+    },
+    Cases,
+    End,
+}
+
+pub struct Reader<R>
+where
+    R: Read + Seek + 'static,
+{
+    reader: Option<R>,
+    warn: Box<dyn Fn(Warning)>,
+
+    header: HeaderRecord<RawString>,
+    var_types: VarTypes,
+
+    state: ReaderState,
+}
+
+impl<R> Reader<R>
+where
+    R: Read + Seek + 'static,
+{
+    pub fn new<F>(mut reader: R, warn: F) -> Result<Self, Error>
+    where
+        F: Fn(Warning) + 'static,
+    {
+        let header = HeaderRecord::read(&mut reader)?;
+        Ok(Self {
+            reader: Some(reader),
+            warn: Box::new(warn),
+            header,
+            var_types: VarTypes::new(),
+            state: ReaderState::Start,
+        })
+    }
+    fn cases(&mut self) -> Cases {
+        self.state = ReaderState::End;
+        Cases::new(
+            self.reader.take().unwrap(),
+            take(&mut self.var_types),
+            &self.header,
+        )
+    }
+    fn _next(&mut self) -> Option<<Self as Iterator>::Item> {
+        match self.state {
+            ReaderState::Start => {
+                self.state = ReaderState::Headers;
+                Some(Ok(Record::Header(self.header.clone())))
+            }
+            ReaderState::Headers => {
+                let record = loop {
+                    match Record::read(
+                        self.reader.as_mut().unwrap(),
+                        self.header.endian,
+                        &self.var_types,
+                        &self.warn,
+                    ) {
+                        Ok(Some(record)) => break record,
+                        Ok(None) => (),
+                        Err(error) => return Some(Err(error)),
+                    }
+                };
+                match record {
+                    Record::Variable(VariableRecord { width, .. }) => self.var_types.push(width),
+                    Record::EndOfHeaders(_) => {
+                        self.state = if let Some(Compression::ZLib) = self.header.compression {
+                            ReaderState::ZlibHeader
+                        } else {
+                            ReaderState::Cases
+                        };
+                    }
+                    _ => (),
+                };
+                Some(Ok(record))
+            }
+            ReaderState::ZlibHeader => {
+                let zheader = match ZHeader::read(self.reader.as_mut().unwrap(), self.header.endian)
+                {
+                    Ok(zheader) => zheader,
+                    Err(error) => return Some(Err(error)),
+                };
+                self.state = ReaderState::ZlibTrailer {
+                    ztrailer_offset: zheader.ztrailer_offset,
+                    ztrailer_len: zheader.ztrailer_len,
+                };
+                Some(Ok(Record::ZHeader(zheader)))
+            }
+            ReaderState::ZlibTrailer {
+                ztrailer_offset,
+                ztrailer_len,
+            } => {
+                match ZTrailer::read(
+                    self.reader.as_mut().unwrap(),
+                    self.header.endian,
+                    ztrailer_offset,
+                    ztrailer_len,
+                ) {
+                    Ok(None) => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
+                    Ok(Some(ztrailer)) => Some(Ok(Record::ZTrailer(ztrailer))),
+                    Err(error) => Some(Err(error)),
+                }
+            }
+            ReaderState::Cases => Some(Ok(Record::Cases(Rc::new(RefCell::new(self.cases()))))),
+            ReaderState::End => None,
+        }
+    }
+}
+
+impl<R> Iterator for Reader<R>
+where
+    R: Read + Seek + 'static,
+{
+    type Item = Result<Record, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let retval = self._next();
+        if matches!(retval, Some(Err(_))) {
+            self.state = ReaderState::End;
+        }
+        retval
+    }
+}
+
+trait ReadSeek: Read + Seek {}
+impl<T> ReadSeek for T where T: Read + Seek {}
+
+pub struct Cases {
+    reader: Box<dyn ReadSeek>,
+    var_types: VarTypes,
+    compression: Option<Compression>,
+    bias: f64,
+    endian: Endian,
+    codes: VecDeque<u8>,
+    eof: bool,
+}
+
+impl Debug for Cases {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "Cases")
+    }
+}
+
+impl Cases {
+    fn new<R>(reader: R, var_types: VarTypes, header: &HeaderRecord<RawString>) -> Self
+    where
+        R: Read + Seek + 'static,
+    {
+        Self {
+            reader: if header.compression == Some(Compression::ZLib) {
+                Box::new(ZlibDecodeMultiple::new(reader))
+            } else {
+                Box::new(reader)
+            },
+            var_types,
+            compression: header.compression,
+            bias: header.bias,
+            endian: header.endian,
+            codes: VecDeque::with_capacity(8),
+            eof: false,
+        }
+    }
+}
+
+impl Iterator for Cases {
+    type Item = Result<Vec<RawValue>, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.eof {
+            return None;
+        }
+
+        let retval = if self.compression.is_some() {
+            Value::read_compressed_case(
+                &mut self.reader,
+                &self.var_types,
+                &mut self.codes,
+                self.endian,
+                self.bias,
+            )
+            .transpose()
+        } else {
+            Value::read_case(&mut self.reader, &self.var_types, self.endian).transpose()
+        };
+        self.eof = matches!(retval, None | Some(Err(_)));
+        retval
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Spec(pub u32);
+
+impl Debug for Spec {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let type_ = format_name(self.0 >> 16);
+        let w = (self.0 >> 8) & 0xff;
+        let d = self.0 & 0xff;
+        write!(f, "{:06x} ({type_}{w}.{d})", self.0)
+    }
+}
+
+fn format_name(type_: u32) -> Cow<'static, str> {
+    match type_ {
+        1 => "A",
+        2 => "AHEX",
+        3 => "COMMA",
+        4 => "DOLLAR",
+        5 => "F",
+        6 => "IB",
+        7 => "PIBHEX",
+        8 => "P",
+        9 => "PIB",
+        10 => "PK",
+        11 => "RB",
+        12 => "RBHEX",
+        15 => "Z",
+        16 => "N",
+        17 => "E",
+        20 => "DATE",
+        21 => "TIME",
+        22 => "DATETIME",
+        23 => "ADATE",
+        24 => "JDATE",
+        25 => "DTIME",
+        26 => "WKDAY",
+        27 => "MONTH",
+        28 => "MOYR",
+        29 => "QYR",
+        30 => "WKYR",
+        31 => "PCT",
+        32 => "DOT",
+        33 => "CCA",
+        34 => "CCB",
+        35 => "CCC",
+        36 => "CCD",
+        37 => "CCE",
+        38 => "EDATE",
+        39 => "SDATE",
+        40 => "MTIME",
+        41 => "YMDHMS",
+        _ => return format!("<unknown format {type_}>").into(),
+    }
+    .into()
+}
+
+#[derive(Clone)]
+pub struct MissingValues<S = Box<[u8]>>
+where
+    S: Debug,
+{
+    /// Individual missing values, up to 3 of them.
+    pub values: Vec<Value<S>>,
+
+    /// Optional range of missing values.
+    pub range: Option<(Value<S>, Value<S>)>,
+}
+
+impl<S> Debug for MissingValues<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        for (i, value) in self.values.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{value:?}")?;
+        }
+
+        if let Some((low, high)) = &self.range {
+            if !self.values.is_empty() {
+                write!(f, ", ")?;
+            }
+            write!(f, "{low:?} THRU {high:?}")?;
+        }
+
+        if self.is_empty() {
+            write!(f, "none")?;
+        }
+
+        Ok(())
+    }
+}
+
+impl<S> MissingValues<S>
+where
+    S: Debug,
+{
+    fn is_empty(&self) -> bool {
+        self.values.is_empty() && self.range.is_none()
+    }
+}
+
+impl<S> Default for MissingValues<S>
+where
+    S: Debug,
+{
+    fn default() -> Self {
+        Self {
+            values: Vec::new(),
+            range: None,
+        }
+    }
+}
+
+impl MissingValues {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        offset: u64,
+        width: RawWidth,
+        code: i32,
+        endian: Endian,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Self, Error> {
+        let (individual_values, has_range) = match code {
+            0..=3 => (code as usize, false),
+            -2 => (0, true),
+            -3 => (1, true),
+            _ => return Err(Error::BadMissingValueCode { offset, code }),
+        };
+
+        let mut values = Vec::with_capacity(individual_values);
+        for _ in 0..individual_values {
+            values.push(read_bytes::<8, _>(r)?);
+        }
+        let range = if has_range {
+            let low = read_bytes::<8, _>(r)?;
+            let high = read_bytes::<8, _>(r)?;
+            Some((low, high))
+        } else {
+            None
+        };
+
+        match VarWidth::try_from(width) {
+            Ok(VarWidth::Numeric) => {
+                let values = values
+                    .into_iter()
+                    .map(|v| Value::Number(endian.parse(v)))
+                    .collect();
+                let range = range.map(|(low, high)| {
+                    (
+                        Value::Number(endian.parse(low)),
+                        Value::Number(endian.parse(high)),
+                    )
+                });
+                return Ok(Self { values, range });
+            }
+            Ok(VarWidth::String(width)) if width <= 8 && range.is_none() => {
+                let values = values
+                    .into_iter()
+                    .map(|value| Value::String(Box::from(&value[..width as usize])))
+                    .collect();
+                return Ok(Self {
+                    values,
+                    range: None,
+                });
+            }
+            Ok(VarWidth::String(width)) if width > 8 => warn(Warning::TBD),
+            Ok(VarWidth::String(_)) => warn(Warning::TBD),
+            Err(()) => warn(Warning::TBD),
+        }
+        Ok(Self::default())
+    }
+}
+
+#[derive(Clone)]
+pub struct VariableRecord<S>
+where
+    S: Debug,
+{
+    /// Range of offsets in file.
+    pub offsets: Range<u64>,
+
+    /// Variable width, in the range -1..=255.
+    pub width: RawWidth,
+
+    /// Variable name, padded on the right with spaces.
+    pub name: S,
+
+    /// Print format.
+    pub print_format: Spec,
+
+    /// Write format.
+    pub write_format: Spec,
+
+    /// Missing values.
+    pub missing_values: MissingValues,
+
+    /// Optional variable label.
+    pub label: Option<S>,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum RawWidth {
+    Continuation,
+    Numeric,
+    String(NonZeroU8),
+}
+
+impl RawWidth {
+    pub fn n_values(&self) -> Option<usize> {
+        match self {
+            RawWidth::Numeric => Some(1),
+            RawWidth::String(width) => Some((width.get() as usize).div_ceil(8)),
+            _ => None,
+        }
+    }
+}
+
+impl TryFrom<i32> for RawWidth {
+    type Error = ();
+
+    fn try_from(value: i32) -> Result<Self, Self::Error> {
+        match value {
+            -1 => Ok(Self::Continuation),
+            0 => Ok(Self::Numeric),
+            1..=255 => Ok(Self::String(NonZeroU8::new(value as u8).unwrap())),
+            _ => Err(()),
+        }
+    }
+}
+
+impl Display for RawWidth {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        match self {
+            RawWidth::Continuation => write!(f, "long string continuation"),
+            RawWidth::Numeric => write!(f, "numeric"),
+            RawWidth::String(width) => write!(f, "{width}-byte string"),
+        }
+    }
+}
+
+impl<S> Debug for VariableRecord<S>
+where
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        writeln!(f, "Width: {}", self.width,)?;
+        writeln!(f, "Print format: {:?}", self.print_format)?;
+        writeln!(f, "Write format: {:?}", self.write_format)?;
+        writeln!(f, "Name: {:?}", &self.name)?;
+        writeln!(f, "Variable label: {:?}", self.label)?;
+        writeln!(f, "Missing values: {:?}", self.missing_values)
+    }
+}
+
+impl VariableRecord<RawString> {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Record, Error> {
+        let start_offset = r.stream_position()?;
+        let width: i32 = endian.parse(read_bytes(r)?);
+        let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
+            start_offset,
+            width,
+        })?;
+        let code_offset = r.stream_position()?;
+        let has_variable_label: u32 = endian.parse(read_bytes(r)?);
+        let missing_value_code: i32 = endian.parse(read_bytes(r)?);
+        let print_format = Spec(endian.parse(read_bytes(r)?));
+        let write_format = Spec(endian.parse(read_bytes(r)?));
+        let name = RawString(read_vec(r, 8)?);
+
+        let label = match has_variable_label {
+            0 => None,
+            1 => {
+                let len: u32 = endian.parse(read_bytes(r)?);
+                let read_len = len.min(65535) as usize;
+                let label = RawString(read_vec(r, read_len)?);
+
+                let padding_bytes = Integer::next_multiple_of(&len, &4) - len;
+                let _ = read_vec(r, padding_bytes as usize)?;
+
+                Some(label)
+            }
+            _ => {
+                return Err(Error::BadVariableLabelCode {
+                    start_offset,
+                    code_offset,
+                    code: has_variable_label,
+                })
+            }
+        };
+
+        let missing_values =
+            MissingValues::read(r, start_offset, width, missing_value_code, endian, warn)?;
+
+        let end_offset = r.stream_position()?;
+
+        Ok(Record::Variable(VariableRecord {
+            offsets: start_offset..end_offset,
+            width,
+            name,
+            print_format,
+            write_format,
+            missing_values,
+            label,
+        }))
+    }
+
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        DecodedRecord::Variable(VariableRecord {
+            offsets: self.offsets.clone(),
+            width: self.width,
+            name: decoder.decode(&self.name).to_string(),
+            print_format: self.print_format,
+            write_format: self.write_format,
+            missing_values: self.missing_values,
+            label: self
+                .label
+                .as_ref()
+                .map(|label| decoder.decode(label).to_string()),
+        })
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct UntypedValue(pub [u8; 8]);
+
+impl Debug for UntypedValue {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        let little: f64 = Endian::Little.parse(self.0);
+        let little = format!("{:?}", little);
+        let big: f64 = Endian::Big.parse(self.0);
+        let big = format!("{:?}", big);
+        let number = if little.len() <= big.len() {
+            little
+        } else {
+            big
+        };
+        write!(f, "{number}")?;
+
+        let string = default_decode(&self.0);
+        let string = string
+            .split(|c: char| c == '\0' || c.is_control())
+            .next()
+            .unwrap();
+        write!(f, "{string:?}")?;
+        Ok(())
+    }
+}
+
+#[derive(Clone, PartialEq, Default, Eq, PartialOrd, Ord, Hash)]
+pub struct RawString(pub Vec<u8>);
+
+impl RawString {
+    pub fn spaces(n: usize) -> Self {
+        Self(std::iter::repeat_n(b' ', n).collect())
+    }
+    pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
+        EncodedStr::new(&self.0, encoding)
+    }
+}
+
+impl From<Cow<'_, [u8]>> for RawString {
+    fn from(value: Cow<'_, [u8]>) -> Self {
+        Self(value.into_owned())
+    }
+}
+
+impl From<Vec<u8>> for RawString {
+    fn from(source: Vec<u8>) -> Self {
+        Self(source)
+    }
+}
+
+impl From<&[u8]> for RawString {
+    fn from(source: &[u8]) -> Self {
+        Self(source.into())
+    }
+}
+
+impl Debug for RawString {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{:?}", default_decode(self.0.as_slice()))
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct RawStrArray<const N: usize>(pub [u8; N]);
+
+impl<const N: usize> From<[u8; N]> for RawStrArray<N> {
+    fn from(source: [u8; N]) -> Self {
+        Self(source)
+    }
+}
+
+impl<const N: usize> Debug for RawStrArray<N> {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{:?}", default_decode(&self.0))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum EncodedString {
+    Encoded {
+        bytes: Vec<u8>,
+        encoding: &'static Encoding,
+    },
+    Utf8 {
+        s: String,
+    },
+}
+
+impl EncodedString {
+    pub fn borrowed(&self) -> EncodedStr<'_> {
+        match self {
+            EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded { bytes, encoding },
+            EncodedString::Utf8 { s } => EncodedStr::Utf8 { s },
+        }
+    }
+}
+
+impl<'a> From<EncodedStr<'a>> for EncodedString {
+    fn from(value: EncodedStr<'a>) -> Self {
+        match value {
+            EncodedStr::Encoded { bytes, encoding } => Self::Encoded {
+                bytes: bytes.into(),
+                encoding,
+            },
+            EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
+        }
+    }
+}
+
+pub enum EncodedStr<'a> {
+    Encoded {
+        bytes: &'a [u8],
+        encoding: &'static Encoding,
+    },
+    Utf8 {
+        s: &'a str,
+    },
+}
+
+impl<'a> EncodedStr<'a> {
+    pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
+        Self::Encoded { bytes, encoding }
+    }
+    pub fn as_str(&self) -> Cow<'_, str> {
+        match self {
+            EncodedStr::Encoded { bytes, encoding } => {
+                encoding.decode_without_bom_handling(bytes).0
+            }
+            EncodedStr::Utf8 { s } => Cow::from(*s),
+        }
+    }
+    pub fn as_bytes(&self) -> &[u8] {
+        match self {
+            EncodedStr::Encoded { bytes, .. } => bytes,
+            EncodedStr::Utf8 { s } => s.as_bytes(),
+        }
+    }
+    pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
+        match self {
+            EncodedStr::Encoded { bytes, encoding } => {
+                let utf8 = encoding.decode_without_bom_handling(bytes).0;
+                match encoding.encode(&utf8).0 {
+                    Cow::Borrowed(_) => {
+                        // Recoding into UTF-8 and then back did not change anything.
+                        Cow::from(*bytes)
+                    }
+                    Cow::Owned(owned) => Cow::Owned(owned),
+                }
+            }
+            EncodedStr::Utf8 { s } => encoding.encode(s).0,
+        }
+    }
+    pub fn is_empty(&self) -> bool {
+        match self {
+            EncodedStr::Encoded { bytes, .. } => bytes.is_empty(),
+            EncodedStr::Utf8 { s } => s.is_empty(),
+        }
+    }
+    pub fn quoted(&self) -> QuotedEncodedStr {
+        QuotedEncodedStr(self)
+    }
+}
+
+impl<'a> From<&'a str> for EncodedStr<'a> {
+    fn from(s: &'a str) -> Self {
+        Self::Utf8 { s }
+    }
+}
+
+impl<'a> From<&'a String> for EncodedStr<'a> {
+    fn from(s: &'a String) -> Self {
+        Self::Utf8 { s: s.as_str() }
+    }
+}
+
+pub struct QuotedEncodedStr<'a>(&'a EncodedStr<'a>);
+
+impl Display for QuotedEncodedStr<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self.0.as_str())
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ValueLabel<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    pub value: Value<V>,
+    pub label: S,
+}
+
+#[derive(Clone)]
+pub struct ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    /// Range of offsets in file.
+    pub offsets: Range<u64>,
+
+    /// The labels.
+    pub labels: Vec<ValueLabel<V, S>>,
+
+    /// The 1-based indexes of the variable indexes.
+    pub dict_indexes: Vec<u32>,
+
+    /// The types of the variables.
+    pub var_type: VarType,
+}
+
+impl<V, S> Debug for ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        writeln!(f, "labels: ")?;
+        for label in self.labels.iter() {
+            writeln!(f, "{label:?}")?;
+        }
+        write!(f, "apply to {} variables", self.var_type)?;
+        for dict_index in self.dict_indexes.iter() {
+            write!(f, " #{dict_index}")?;
+        }
+        Ok(())
+    }
+}
+
+impl<V, S> ValueLabelRecord<V, S>
+where
+    V: Debug,
+    S: Debug,
+{
+    /// Maximum number of value labels in a record.
+    pub const MAX_LABELS: u32 = u32::MAX / 8;
+
+    /// Maximum number of variable indexes in a record.
+    pub const MAX_INDEXES: u32 = u32::MAX / 8;
+}
+
+impl ValueLabelRecord<RawStrArray<8>, RawString> {
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        var_types: &VarTypes,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Option<Record>, Error> {
+        let label_offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        if n > Self::MAX_LABELS {
+            return Err(Error::BadNumberOfValueLabels {
+                offset: label_offset,
+                n,
+                max: Self::MAX_LABELS,
+            });
+        }
+
+        let mut labels = Vec::new();
+        for _ in 0..n {
+            let value = UntypedValue(read_bytes(r)?);
+            let label_len: u8 = endian.parse(read_bytes(r)?);
+            let label_len = label_len as usize;
+            let padded_len = Integer::next_multiple_of(&(label_len + 1), &8);
+
+            let mut label = read_vec(r, padded_len - 1)?;
+            label.truncate(label_len);
+            labels.push((value, RawString(label)));
+        }
+
+        let index_offset = r.stream_position()?;
+        let rec_type: u32 = endian.parse(read_bytes(r)?);
+        if rec_type != 4 {
+            return Err(Error::ExpectedVarIndexRecord {
+                offset: index_offset,
+                rec_type,
+            });
+        }
+
+        let n: u32 = endian.parse(read_bytes(r)?);
+        if n > Self::MAX_INDEXES {
+            return Err(Error::TooManyVarIndexes {
+                offset: index_offset,
+                n,
+                max: Self::MAX_INDEXES,
+            });
+        } else if n == 0 {
+            warn(Warning::NoVarIndexes {
+                offset: index_offset,
+            });
+            return Ok(None);
+        }
+
+        let index_offset = r.stream_position()?;
+        let mut dict_indexes = Vec::with_capacity(n as usize);
+        let mut invalid_indexes = Vec::new();
+        for _ in 0..n {
+            let index: u32 = endian.parse(read_bytes(r)?);
+            if var_types.is_valid_index(index as usize) {
+                dict_indexes.push(index);
+            } else {
+                invalid_indexes.push(index);
+            }
+        }
+        if !invalid_indexes.is_empty() {
+            warn(Warning::InvalidVarIndexes {
+                offset: index_offset,
+                max: var_types.n_values(),
+                invalid: invalid_indexes,
+            });
+        }
+
+        let Some(&first_index) = dict_indexes.first() else {
+            return Ok(None);
+        };
+        let var_type = var_types.types[first_index as usize - 1].unwrap();
+        let mut wrong_type_indexes = Vec::new();
+        dict_indexes.retain(|&index| {
+            if var_types.types[index as usize - 1] != Some(var_type) {
+                wrong_type_indexes.push(index);
+                false
+            } else {
+                true
+            }
+        });
+        if !wrong_type_indexes.is_empty() {
+            warn(Warning::MixedVarTypes {
+                offset: index_offset,
+                var_type,
+                wrong_types: wrong_type_indexes,
+            });
+        }
+
+        let labels = labels
+            .into_iter()
+            .map(|(value, label)| ValueLabel {
+                value: Value::from_raw(&value, var_type, endian),
+                label,
+            })
+            .collect();
+
+        let end_offset = r.stream_position()?;
+        Ok(Some(Record::ValueLabel(ValueLabelRecord {
+            offsets: label_offset..end_offset,
+            labels,
+            dict_indexes,
+            var_type,
+        })))
+    }
+
+    fn decode(self, decoder: &Decoder) -> ValueLabelRecord<RawStrArray<8>, String> {
+        let labels = self
+            .labels
+            .iter()
+            .map(|ValueLabel { value, label }| ValueLabel {
+                value: value.clone(),
+                label: decoder.decode(label).to_string(),
+            })
+            .collect();
+        ValueLabelRecord {
+            offsets: self.offsets.clone(),
+            labels,
+            dict_indexes: self.dict_indexes.clone(),
+            var_type: self.var_type,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct DocumentRecord<S>
+where
+    S: Debug,
+{
+    pub offsets: Range<u64>,
+
+    /// The document, as an array of lines.  Raw lines are exactly 80 bytes long
+    /// and are right-padded with spaces without any new-line termination.
+    pub lines: Vec<S>,
+}
+
+pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
+
+/// Length of a line in a document.  Document lines are fixed-length and
+/// padded on the right with spaces.
+pub const DOC_LINE_LEN: usize = 80;
+
+impl DocumentRecord<RawDocumentLine> {
+    /// Maximum number of lines we will accept in a document.  This is simply
+    /// the maximum number that will fit in a 32-bit space.
+    pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
+
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+        let start_offset = r.stream_position()?;
+        let n: u32 = endian.parse(read_bytes(r)?);
+        let n = n as usize;
+        if n > Self::MAX_LINES {
+            Err(Error::BadDocumentLength {
+                offset: start_offset,
+                n,
+                max: Self::MAX_LINES,
+            })
+        } else {
+            let mut lines = Vec::with_capacity(n);
+            for _ in 0..n {
+                lines.push(RawStrArray(read_bytes(r)?));
+            }
+            let end_offset = r.stream_position()?;
+            Ok(Record::Document(DocumentRecord {
+                offsets: start_offset..end_offset,
+                lines,
+            }))
+        }
+    }
+
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        DecodedRecord::Document(DocumentRecord {
+            offsets: self.offsets.clone(),
+            lines: self
+                .lines
+                .iter()
+                .map(|s| decoder.decode_slice(&s.0).to_string())
+                .collect(),
+        })
+    }
+}
+
+trait ExtensionRecord {
+    const SUBTYPE: u32;
+    const SIZE: Option<u32>;
+    const COUNT: Option<u32>;
+    const NAME: &'static str;
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning>;
+}
+
+#[derive(Clone, Debug)]
+pub struct IntegerInfoRecord {
+    pub offsets: Range<u64>,
+    pub version: (i32, i32, i32),
+    pub machine_code: i32,
+    pub floating_point_rep: i32,
+    pub compression_code: i32,
+    pub endianness: i32,
+    pub character_code: i32,
+}
+
+impl ExtensionRecord for IntegerInfoRecord {
+    const SUBTYPE: u32 = 3;
+    const SIZE: Option<u32> = Some(4);
+    const COUNT: Option<u32> = Some(8);
+    const NAME: &'static str = "integer record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let data: Vec<i32> = (0..8)
+            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
+            .collect();
+        Ok(Record::IntegerInfo(IntegerInfoRecord {
+            offsets: ext.offsets.clone(),
+            version: (data[0], data[1], data[2]),
+            machine_code: data[3],
+            floating_point_rep: data[4],
+            compression_code: data[5],
+            endianness: data[6],
+            character_code: data[7],
+        }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct FloatInfoRecord {
+    pub sysmis: f64,
+    pub highest: f64,
+    pub lowest: f64,
+}
+
+impl ExtensionRecord for FloatInfoRecord {
+    const SUBTYPE: u32 = 4;
+    const SIZE: Option<u32> = Some(8);
+    const COUNT: Option<u32> = Some(3);
+    const NAME: &'static str = "floating point record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let data: Vec<f64> = (0..3)
+            .map(|_| endian.parse(read_bytes(&mut input).unwrap()))
+            .collect();
+        Ok(Record::FloatInfo(FloatInfoRecord {
+            sysmis: data[0],
+            highest: data[1],
+            lowest: data[2],
+        }))
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum CategoryLabels {
+    VarLabels,
+    CountedValues,
+}
+
+#[derive(Clone, Debug)]
+pub enum MultipleResponseType {
+    MultipleDichotomy {
+        value: RawString,
+        labels: CategoryLabels,
+    },
+    MultipleCategory,
+}
+
+impl MultipleResponseType {
+    fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Warning> {
+        let (mr_type, input) = match input.split_first() {
+            Some((b'C', input)) => (MultipleResponseType::MultipleCategory, input),
+            Some((b'D', input)) => {
+                let (value, input) = parse_counted_string(input)?;
+                (
+                    MultipleResponseType::MultipleDichotomy {
+                        value,
+                        labels: CategoryLabels::VarLabels,
+                    },
+                    input,
+                )
+            }
+            Some((b'E', input)) => {
+                let (labels, input) = if let Some(rest) = input.strip_prefix(b" 1 ") {
+                    (CategoryLabels::CountedValues, rest)
+                } else if let Some(rest) = input.strip_prefix(b" 11 ") {
+                    (CategoryLabels::VarLabels, rest)
+                } else {
+                    return Err(Warning::TBD);
+                };
+                let (value, input) = parse_counted_string(input)?;
+                (
+                    MultipleResponseType::MultipleDichotomy { value, labels },
+                    input,
+                )
+            }
+            _ => return Err(Warning::TBD),
+        };
+        Ok((mr_type, input))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet<I, S>
+where
+    I: Debug,
+    S: Debug,
+{
+    pub name: I,
+    pub label: S,
+    pub mr_type: MultipleResponseType,
+    pub short_names: Vec<I>,
+}
+
+impl MultipleResponseSet<RawString, RawString> {
+    fn parse(input: &[u8]) -> Result<(Self, &[u8]), Warning> {
+        let Some(equals) = input.iter().position(|&b| b == b'=') else {
+            return Err(Warning::TBD);
+        };
+        let (name, input) = input.split_at(equals);
+        let (mr_type, input) = MultipleResponseType::parse(input)?;
+        let Some(input) = input.strip_prefix(b" ") else {
+            return Err(Warning::TBD);
+        };
+        let (label, mut input) = parse_counted_string(input)?;
+        let mut vars = Vec::new();
+        while input.first() != Some(&b'\n') {
+            match input.split_first() {
+                Some((b' ', rest)) => {
+                    let Some(length) = rest.iter().position(|b| b" \n".contains(b)) else {
+                        return Err(Warning::TBD);
+                    };
+                    let (var, rest) = rest.split_at(length);
+                    if !var.is_empty() {
+                        vars.push(var.into());
+                    }
+                    input = rest;
+                }
+                _ => return Err(Warning::TBD),
+            }
+        }
+        while input.first() == Some(&b'\n') {
+            input = &input[1..];
+        }
+        Ok((
+            MultipleResponseSet {
+                name: name.into(),
+                label,
+                mr_type,
+                short_names: vars,
+            },
+            input,
+        ))
+    }
+
+    fn decode(
+        &self,
+        decoder: &Decoder,
+    ) -> Result<MultipleResponseSet<Identifier, String>, Warning> {
+        let mut short_names = Vec::with_capacity(self.short_names.len());
+        for short_name in self.short_names.iter() {
+            if let Some(short_name) = decoder
+                .decode_identifier(short_name)
+                .map_err(Warning::InvalidMrSetName)
+                .issue_warning(&decoder.warn)
+            {
+                short_names.push(short_name);
+            }
+        }
+        Ok(MultipleResponseSet {
+            name: decoder
+                .decode_identifier(&self.name)
+                .map_err(Warning::InvalidMrSetVariableName)?,
+            label: decoder.decode(&self.label).to_string(),
+            mr_type: self.mr_type.clone(),
+            short_names,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseRecord<I, S>(pub Vec<MultipleResponseSet<I, S>>)
+where
+    I: Debug,
+    S: Debug;
+
+impl ExtensionRecord for MultipleResponseRecord<RawString, RawString> {
+    const SUBTYPE: u32 = 7;
+    const SIZE: Option<u32> = Some(1);
+    const COUNT: Option<u32> = None;
+    const NAME: &'static str = "multiple response set record";
+
+    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let mut sets = Vec::new();
+        while !input.is_empty() {
+            let (set, rest) = MultipleResponseSet::parse(input)?;
+            sets.push(set);
+            input = rest;
+        }
+        Ok(Record::MultipleResponse(MultipleResponseRecord(sets)))
+    }
+}
+
+impl MultipleResponseRecord<RawString, RawString> {
+    fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        let mut sets = Vec::new();
+        for set in self.0.iter() {
+            if let Some(set) = set.decode(decoder).issue_warning(&decoder.warn) {
+                sets.push(set);
+            }
+        }
+        DecodedRecord::MultipleResponse(MultipleResponseRecord(sets))
+    }
+}
+
+fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), Warning> {
+    let Some(space) = input.iter().position(|&b| b == b' ') else {
+        return Err(Warning::TBD);
+    };
+    let Ok(length) = from_utf8(&input[..space]) else {
+        return Err(Warning::TBD);
+    };
+    let Ok(length): Result<usize, _> = length.parse() else {
+        return Err(Warning::TBD);
+    };
+
+    let input = &input[space + 1..];
+    if input.len() < length {
+        return Err(Warning::TBD);
+    };
+
+    let (string, rest) = input.split_at(length);
+    Ok((string.into(), rest))
+}
+
+/// [Level of measurement](https://en.wikipedia.org/wiki/Level_of_measurement).
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Measure {
+    /// Nominal values can only be compared for equality.
+    Nominal,
+
+    /// Ordinal values can be meaningfully ordered.
+    Ordinal,
+
+    /// Scale values can be meaningfully compared for the degree of difference.
+    Scale,
+}
+
+impl Measure {
+    pub fn default_for_type(var_type: VarType) -> Option<Measure> {
+        match var_type {
+            VarType::Numeric => None,
+            VarType::String => Some(Self::Nominal),
+        }
+    }
+
+    fn try_decode(source: u32) -> Result<Option<Measure>, Warning> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Measure::Nominal)),
+            2 => Ok(Some(Measure::Ordinal)),
+            3 => Ok(Some(Measure::Scale)),
+            _ => Err(Warning::InvalidMeasurement(source)),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Alignment {
+    Left,
+    Right,
+    Center,
+}
+
+impl Alignment {
+    fn try_decode(source: u32) -> Result<Option<Alignment>, Warning> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Alignment::Left)),
+            2 => Ok(Some(Alignment::Right)),
+            3 => Ok(Some(Alignment::Center)),
+            _ => Err(Warning::InvalidAlignment(source)),
+        }
+    }
+
+    pub fn default_for_type(var_type: VarType) -> Self {
+        match var_type {
+            VarType::Numeric => Self::Right,
+            VarType::String => Self::Left,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VarDisplay {
+    pub measure: Option<Measure>,
+    pub width: Option<u32>,
+    pub alignment: Option<Alignment>,
+}
+
+#[derive(Clone, Debug)]
+pub struct VarDisplayRecord(pub Vec<VarDisplay>);
+
+impl VarDisplayRecord {
+    const SUBTYPE: u32 = 11;
+
+    fn parse(
+        ext: &Extension,
+        n_vars: usize,
+        endian: Endian,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Record, Warning> {
+        if ext.size != 4 {
+            return Err(Warning::BadRecordSize {
+                offset: ext.offsets.start,
+                record: String::from("variable display record"),
+                size: ext.size,
+                expected_size: 4,
+            });
+        }
+
+        let has_width = if ext.count as usize == 3 * n_vars {
+            true
+        } else if ext.count as usize == 2 * n_vars {
+            false
+        } else {
+            return Err(Warning::TBD);
+        };
+
+        let mut var_displays = Vec::new();
+        let mut input = &ext.data[..];
+        for _ in 0..n_vars {
+            let measure = Measure::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .issue_warning(&warn)
+                .flatten();
+            let width = has_width.then(|| endian.parse(read_bytes(&mut input).unwrap()));
+            let alignment = Alignment::try_decode(endian.parse(read_bytes(&mut input).unwrap()))
+                .issue_warning(&warn)
+                .flatten();
+            var_displays.push(VarDisplay {
+                measure,
+                width,
+                alignment,
+            });
+        }
+        Ok(Record::VarDisplay(VarDisplayRecord(var_displays)))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValues<N>
+where
+    N: Debug,
+{
+    /// Variable name.
+    pub var_name: N,
+
+    /// Missing values.
+    pub missing_values: Vec<RawStrArray<8>>,
+}
+
+impl LongStringMissingValues<RawString> {
+    fn decode(&self, decoder: &Decoder) -> Result<LongStringMissingValues<Identifier>, IdError> {
+        Ok(LongStringMissingValues {
+            var_name: decoder.decode_identifier(&self.var_name)?,
+            missing_values: self.missing_values.clone(),
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValueRecord<N>(pub Vec<LongStringMissingValues<N>>)
+where
+    N: Debug;
+
+impl ExtensionRecord for LongStringMissingValueRecord<RawString> {
+    const SUBTYPE: u32 = 22;
+    const SIZE: Option<u32> = Some(1);
+    const COUNT: Option<u32> = None;
+    const NAME: &'static str = "long string missing values record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let mut missing_value_set = Vec::new();
+        while !input.is_empty() {
+            let var_name = read_string(&mut input, endian)?;
+            let n_missing_values: u8 = endian.parse(read_bytes(&mut input)?);
+            let value_len: u32 = endian.parse(read_bytes(&mut input)?);
+            if value_len != 8 {
+                let offset = (ext.data.len() - input.len() - 8) as u64 + ext.offsets.start;
+                return Err(Warning::BadLongMissingValueLength {
+                    record_offset: ext.offsets.start,
+                    offset,
+                    value_len,
+                });
+            }
+            let mut missing_values = Vec::new();
+            for i in 0..n_missing_values {
+                let value: [u8; 8] = read_bytes(&mut input)?;
+                let numeric_value: u64 = endian.parse(value);
+                let value = if i > 0 && numeric_value == 8 {
+                    // Tolerate files written by old, buggy versions of PSPP
+                    // where we believed that the value_length was repeated
+                    // before each missing value.
+                    read_bytes(&mut input)?
+                } else {
+                    value
+                };
+                missing_values.push(RawStrArray(value));
+            }
+            missing_value_set.push(LongStringMissingValues {
+                var_name,
+                missing_values,
+            });
+        }
+        Ok(Record::LongStringMissingValues(
+            LongStringMissingValueRecord(missing_value_set),
+        ))
+    }
+}
+
+impl LongStringMissingValueRecord<RawString> {
+    pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier> {
+        let mut mvs = Vec::with_capacity(self.0.len());
+        for mv in self.0.iter() {
+            if let Some(mv) = mv
+                .decode(decoder)
+                .map_err(Warning::InvalidLongStringMissingValueVariableName)
+                .issue_warning(&decoder.warn)
+            {
+                mvs.push(mv);
+            }
+        }
+        LongStringMissingValueRecord(mvs)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct EncodingRecord(pub String);
+
+impl ExtensionRecord for EncodingRecord {
+    const SUBTYPE: u32 = 20;
+    const SIZE: Option<u32> = Some(1);
+    const COUNT: Option<u32> = None;
+    const NAME: &'static str = "encoding record";
+
+    fn parse(ext: &Extension, _endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        Ok(Record::Encoding(EncodingRecord(
+            String::from_utf8(ext.data.clone()).map_err(|_| Warning::BadEncodingName {
+                offset: ext.offsets.start,
+            })?,
+        )))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct NumberOfCasesRecord {
+    /// Always observed as 1.
+    pub one: u64,
+
+    /// Number of cases.
+    pub n_cases: u64,
+}
+
+impl ExtensionRecord for NumberOfCasesRecord {
+    const SUBTYPE: u32 = 16;
+    const SIZE: Option<u32> = Some(8);
+    const COUNT: Option<u32> = Some(2);
+    const NAME: &'static str = "extended number of cases record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let one = endian.parse(read_bytes(&mut input)?);
+        let n_cases = endian.parse(read_bytes(&mut input)?);
+
+        Ok(Record::NumberOfCases(NumberOfCasesRecord { one, n_cases }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct TextRecord {
+    pub offsets: Range<u64>,
+
+    /// Type of record.
+    pub rec_type: TextRecordType,
+
+    /// The text content of the record.
+    pub text: RawString,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum TextRecordType {
+    VariableSets,
+    ProductInfo,
+    LongNames,
+    VeryLongStrings,
+    FileAttributes,
+    VariableAttributes,
+}
+
+impl TextRecord {
+    fn new(extension: Extension, rec_type: TextRecordType) -> Self {
+        Self {
+            offsets: extension.offsets,
+            rec_type,
+            text: extension.data.into(),
+        }
+    }
+    pub fn decode(self, decoder: &Decoder) -> DecodedRecord {
+        match self.rec_type {
+            TextRecordType::VariableSets => {
+                DecodedRecord::VariableSets(VariableSetRecord::decode(&self, decoder))
+            }
+            TextRecordType::ProductInfo => {
+                DecodedRecord::ProductInfo(ProductInfoRecord::decode(&self, decoder))
+            }
+            TextRecordType::LongNames => {
+                DecodedRecord::LongNames(LongNamesRecord::decode(&self, decoder))
+            }
+            TextRecordType::VeryLongStrings => {
+                DecodedRecord::VeryLongStrings(VeryLongStringsRecord::decode(&self, decoder))
+            }
+            TextRecordType::FileAttributes => {
+                DecodedRecord::FileAttributes(FileAttributeRecord::decode(&self, decoder))
+            }
+            TextRecordType::VariableAttributes => {
+                DecodedRecord::VariableAttributes(VariableAttributeRecord::decode(&self, decoder))
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongString {
+    pub short_name: Identifier,
+    pub length: u16,
+}
+
+impl VeryLongString {
+    fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Warning> {
+        let Some((short_name, length)) = input.split_once('=') else {
+            return Err(Warning::TBD);
+        };
+        let short_name = decoder
+            .new_identifier(short_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidLongStringName)?;
+        let length = length.parse().map_err(|_| Warning::TBD)?;
+        Ok(VeryLongString { short_name, length })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongStringsRecord(pub Vec<VeryLongString>);
+
+impl VeryLongStringsRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let input = decoder.decode(&source.text);
+        let mut very_long_strings = Vec::new();
+        for tuple in input
+            .split('\0')
+            .map(|s| s.trim_end_matches('\t'))
+            .filter(|s| !s.is_empty())
+        {
+            if let Some(vls) = VeryLongString::parse(decoder, tuple).issue_warning(&decoder.warn) {
+                very_long_strings.push(vls)
+            }
+        }
+        VeryLongStringsRecord(very_long_strings)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Attribute {
+    pub name: Identifier,
+    pub values: Vec<String>,
+}
+
+impl Attribute {
+    fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Warning> {
+        let Some((name, mut input)) = input.split_once('(') else {
+            return Err(Warning::TBD);
+        };
+        let name = decoder
+            .new_identifier(name)
+            .map_err(Warning::InvalidAttributeName)?;
+        let mut values = Vec::new();
+        loop {
+            let Some((value, rest)) = input.split_once('\n') else {
+                return Err(Warning::TBD);
+            };
+            if let Some(stripped) = value
+                .strip_prefix('\'')
+                .and_then(|value| value.strip_suffix('\''))
+            {
+                values.push(stripped.into());
+            } else {
+                decoder.warn(Warning::TBD);
+                values.push(value.into());
+            }
+            if let Some(rest) = rest.strip_prefix(')') {
+                let attribute = Attribute { name, values };
+                return Ok((attribute, rest));
+            };
+            input = rest;
+        }
+    }
+}
+
+impl Attributes {
+    fn parse<'a>(
+        decoder: &Decoder,
+        mut input: &'a str,
+        sentinel: Option<char>,
+    ) -> Result<(Attributes, &'a str), Warning> {
+        let mut attributes = HashMap::new();
+        let rest = loop {
+            match input.chars().next() {
+                None => break input,
+                c if c == sentinel => break &input[1..],
+                _ => {
+                    let (attribute, rest) = Attribute::parse(decoder, input)?;
+                    // XXX report duplicate name
+                    attributes.insert(attribute.name, attribute.values);
+                    input = rest;
+                }
+            }
+        };
+        Ok((Attributes(attributes), rest))
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct FileAttributeRecord(pub Attributes);
+
+impl FileAttributeRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let input = decoder.decode(&source.text);
+        match Attributes::parse(decoder, &input, None).issue_warning(&decoder.warn) {
+            Some((set, rest)) => {
+                if !rest.is_empty() {
+                    decoder.warn(Warning::TBD);
+                }
+                FileAttributeRecord(set)
+            }
+            None => FileAttributeRecord::default(),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VarAttributes {
+    pub long_var_name: Identifier,
+    pub attributes: Attributes,
+}
+
+impl VarAttributes {
+    fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributes, &'a str), Warning> {
+        let Some((long_var_name, rest)) = input.split_once(':') else {
+            return Err(Warning::TBD);
+        };
+        let long_var_name = decoder
+            .new_identifier(long_var_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidAttributeVariableName)?;
+        let (attributes, rest) = Attributes::parse(decoder, rest, Some('/'))?;
+        let var_attribute = VarAttributes {
+            long_var_name,
+            attributes,
+        };
+        Ok((var_attribute, rest))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableAttributeRecord(pub Vec<VarAttributes>);
+
+impl VariableAttributeRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let decoded = decoder.decode(&source.text);
+        let mut input = decoded.as_ref();
+        let mut var_attribute_sets = Vec::new();
+        while !input.is_empty() {
+            let Some((var_attribute, rest)) =
+                VarAttributes::parse(decoder, input).issue_warning(&decoder.warn)
+            else {
+                break;
+            };
+            var_attribute_sets.push(var_attribute);
+            input = rest;
+        }
+        VariableAttributeRecord(var_attribute_sets)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongName {
+    pub short_name: Identifier,
+    pub long_name: Identifier,
+}
+
+impl LongName {
+    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
+        let Some((short_name, long_name)) = input.split_once('=') else {
+            return Err(Warning::TBD);
+        };
+        let short_name = decoder
+            .new_identifier(short_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidShortName)?;
+        let long_name = decoder
+            .new_identifier(long_name)
+            .and_then(Identifier::must_be_ordinary)
+            .map_err(Warning::InvalidLongName)?;
+        Ok(LongName {
+            short_name,
+            long_name,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongNamesRecord(pub Vec<LongName>);
+
+impl LongNamesRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        let input = decoder.decode(&source.text);
+        let mut names = Vec::new();
+        for pair in input.split('\t').filter(|s| !s.is_empty()) {
+            if let Some(long_name) = LongName::parse(pair, decoder).issue_warning(&decoder.warn) {
+                names.push(long_name);
+            }
+        }
+        LongNamesRecord(names)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ProductInfoRecord(pub String);
+
+impl ProductInfoRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> Self {
+        Self(decoder.decode(&source.text).into())
+    }
+}
+#[derive(Clone, Debug)]
+pub struct VariableSet {
+    pub name: Identifier,
+    pub variable_names: Vec<Identifier>,
+}
+
+impl VariableSet {
+    fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
+        let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
+        let name = decoder.new_identifier(name).map_err(|_| Warning::TBD)?;
+        let mut vars = Vec::new();
+        for var in input.split_ascii_whitespace() {
+            if let Some(identifier) = decoder
+                .new_identifier(var)
+                .and_then(Identifier::must_be_ordinary)
+                .map_err(Warning::InvalidVariableSetName)
+                .issue_warning(&decoder.warn)
+            {
+                vars.push(identifier);
+            }
+        }
+        Ok(VariableSet {
+            name,
+            variable_names: vars,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableSetRecord {
+    pub offsets: Range<u64>,
+    pub sets: Vec<VariableSet>,
+}
+
+impl VariableSetRecord {
+    fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord {
+        let mut sets = Vec::new();
+        let input = decoder.decode(&source.text);
+        for line in input.lines() {
+            if let Some(set) = VariableSet::parse(line, decoder).issue_warning(&decoder.warn) {
+                sets.push(set)
+            }
+        }
+        VariableSetRecord {
+            offsets: source.offsets.clone(),
+            sets,
+        }
+    }
+}
+
+trait IssueWarning<T> {
+    fn issue_warning<F>(self, warn: &F) -> Option<T>
+    where
+        F: Fn(Warning);
+}
+impl<T> IssueWarning<T> for Result<T, Warning> {
+    fn issue_warning<F>(self, warn: &F) -> Option<T>
+    where
+        F: Fn(Warning),
+    {
+        match self {
+            Ok(result) => Some(result),
+            Err(error) => {
+                warn(error);
+                None
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Extension {
+    pub offsets: Range<u64>,
+
+    /// Record subtype.
+    pub subtype: u32,
+
+    /// Size of each data element.
+    pub size: u32,
+
+    /// Number of data elements.
+    pub count: u32,
+
+    /// `size * count` bytes of data.
+    pub data: Vec<u8>,
+}
+
+impl Extension {
+    fn check_size<E: ExtensionRecord>(&self) -> Result<(), Warning> {
+        if let Some(expected_size) = E::SIZE {
+            if self.size != expected_size {
+                return Err(Warning::BadRecordSize {
+                    offset: self.offsets.start,
+                    record: E::NAME.into(),
+                    size: self.size,
+                    expected_size,
+                });
+            }
+        }
+        if let Some(expected_count) = E::COUNT {
+            if self.count != expected_count {
+                return Err(Warning::BadRecordCount {
+                    offset: self.offsets.start,
+                    record: E::NAME.into(),
+                    count: self.count,
+                    expected_count,
+                });
+            }
+        }
+        Ok(())
+    }
+
+    fn read<R: Read + Seek>(
+        r: &mut R,
+        endian: Endian,
+        n_vars: usize,
+        warn: &dyn Fn(Warning),
+    ) -> Result<Option<Record>, Error> {
+        let subtype = endian.parse(read_bytes(r)?);
+        let header_offset = r.stream_position()?;
+        let size: u32 = endian.parse(read_bytes(r)?);
+        let count = endian.parse(read_bytes(r)?);
+        let Some(product) = size.checked_mul(count) else {
+            return Err(Error::ExtensionRecordTooLarge {
+                offset: header_offset,
+                subtype,
+                size,
+                count,
+            });
+        };
+        let start_offset = r.stream_position()?;
+        let data = read_vec(r, product as usize)?;
+        let end_offset = start_offset + product as u64;
+        let extension = Extension {
+            offsets: start_offset..end_offset,
+            subtype,
+            size,
+            count,
+            data,
+        };
+        let result = match subtype {
+            IntegerInfoRecord::SUBTYPE => IntegerInfoRecord::parse(&extension, endian),
+            FloatInfoRecord::SUBTYPE => FloatInfoRecord::parse(&extension, endian),
+            VarDisplayRecord::SUBTYPE => VarDisplayRecord::parse(&extension, n_vars, endian, warn),
+            MultipleResponseRecord::SUBTYPE | 19 => {
+                MultipleResponseRecord::parse(&extension, endian)
+            }
+            LongStringValueLabelRecord::SUBTYPE => {
+                LongStringValueLabelRecord::parse(&extension, endian)
+            }
+            EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian),
+            NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian),
+            5 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::VariableSets,
+            ))),
+            10 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::ProductInfo,
+            ))),
+            13 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::LongNames,
+            ))),
+            14 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::VeryLongStrings,
+            ))),
+            17 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::FileAttributes,
+            ))),
+            18 => Ok(Record::Text(TextRecord::new(
+                extension,
+                TextRecordType::VariableAttributes,
+            ))),
+            _ => Ok(Record::OtherExtension(extension)),
+        };
+        match result {
+            Ok(result) => Ok(Some(result)),
+            Err(error) => {
+                warn(error);
+                Ok(None)
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ZHeader {
+    /// File offset to the start of the record.
+    pub offset: u64,
+
+    /// File offset to the ZLIB data header.
+    pub zheader_offset: u64,
+
+    /// File offset to the ZLIB trailer.
+    pub ztrailer_offset: u64,
+
+    /// Length of the ZLIB trailer in bytes.
+    pub ztrailer_len: u64,
+}
+
+impl ZHeader {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
+        let offset = r.stream_position()?;
+        let zheader_offset: u64 = endian.parse(read_bytes(r)?);
+        let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
+        let ztrailer_len: u64 = endian.parse(read_bytes(r)?);
+
+        Ok(ZHeader {
+            offset,
+            zheader_offset,
+            ztrailer_offset,
+            ztrailer_len,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ZTrailer {
+    /// File offset to the start of the record.
+    pub offset: u64,
+
+    /// Compression bias as a negative integer, e.g. -100.
+    pub int_bias: i64,
+
+    /// Always observed as zero.
+    pub zero: u64,
+
+    /// Uncompressed size of each block, except possibly the last.  Only
+    /// `0x3ff000` has been observed so far.
+    pub block_size: u32,
+
+    /// Block descriptors, always `(ztrailer_len - 24) / 24)` of them.
+    pub blocks: Vec<ZBlock>,
+}
+
+#[derive(Clone, Debug)]
+pub struct ZBlock {
+    /// Offset of block of data if simple compression were used.
+    pub uncompressed_ofs: u64,
+
+    /// Actual offset within the file of the compressed data block.
+    pub compressed_ofs: u64,
+
+    /// The number of bytes in this data block after decompression.  This is
+    /// `block_size` in every data block but the last, which may be smaller.
+    pub uncompressed_size: u32,
+
+    /// The number of bytes in this data block, as stored compressed in this
+    /// file.
+    pub compressed_size: u32,
+}
+
+impl ZBlock {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZBlock, Error> {
+        Ok(ZBlock {
+            uncompressed_ofs: endian.parse(read_bytes(r)?),
+            compressed_ofs: endian.parse(read_bytes(r)?),
+            uncompressed_size: endian.parse(read_bytes(r)?),
+            compressed_size: endian.parse(read_bytes(r)?),
+        })
+    }
+}
+
+impl ZTrailer {
+    fn read<R: Read + Seek>(
+        reader: &mut R,
+        endian: Endian,
+        ztrailer_ofs: u64,
+        ztrailer_len: u64,
+    ) -> Result<Option<ZTrailer>, Error> {
+        let start_offset = reader.stream_position()?;
+        if reader.seek(SeekFrom::Start(ztrailer_ofs)).is_err() {
+            return Ok(None);
+        }
+        let int_bias = endian.parse(read_bytes(reader)?);
+        let zero = endian.parse(read_bytes(reader)?);
+        let block_size = endian.parse(read_bytes(reader)?);
+        let n_blocks: u32 = endian.parse(read_bytes(reader)?);
+        let expected_n_blocks = (ztrailer_len - 24) / 24;
+        if n_blocks as u64 != expected_n_blocks {
+            return Err(Error::BadZlibTrailerNBlocks {
+                offset: ztrailer_ofs,
+                n_blocks,
+                expected_n_blocks,
+                ztrailer_len,
+            });
+        }
+        let blocks = (0..n_blocks)
+            .map(|_| ZBlock::read(reader, endian))
+            .collect::<Result<Vec<_>, _>>()?;
+        reader.seek(SeekFrom::Start(start_offset))?;
+        Ok(Some(ZTrailer {
+            offset: ztrailer_ofs,
+            int_bias,
+            zero,
+            block_size,
+            blocks,
+        }))
+    }
+}
+
+fn try_read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<Option<[u8; N]>, IoError> {
+    let mut buf = [0; N];
+    let n = r.read(&mut buf)?;
+    if n > 0 {
+        if n < N {
+            r.read_exact(&mut buf[n..])?;
+        }
+        Ok(Some(buf))
+    } else {
+        Ok(None)
+    }
+}
+
+fn read_bytes<const N: usize, R: Read>(r: &mut R) -> Result<[u8; N], IoError> {
+    let mut buf = [0; N];
+    r.read_exact(&mut buf)?;
+    Ok(buf)
+}
+
+fn read_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>, IoError> {
+    let mut vec = vec![0; n];
+    r.read_exact(&mut vec)?;
+    Ok(vec)
+}
+
+fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
+    let length: u32 = endian.parse(read_bytes(r)?);
+    Ok(read_vec(r, length as usize)?.into())
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels<N, S>
+where
+    S: Debug,
+{
+    pub var_name: N,
+    pub width: u32,
+
+    /// `(value, label)` pairs, where each value is `width` bytes.
+    pub labels: Vec<(RawString, S)>,
+}
+
+impl LongStringValueLabels<RawString, RawString> {
+    fn decode(
+        &self,
+        decoder: &Decoder,
+    ) -> Result<LongStringValueLabels<Identifier, String>, Warning> {
+        let var_name = decoder.decode(&self.var_name);
+        let var_name = Identifier::from_encoding(var_name.trim_end(), decoder.encoding)
+            .map_err(Warning::InvalidLongStringValueLabelName)?;
+
+        let mut labels = Vec::with_capacity(self.labels.len());
+        for (value, label) in self.labels.iter() {
+            let label = decoder.decode(label).to_string();
+            labels.push((value.clone(), label));
+        }
+
+        Ok(LongStringValueLabels {
+            var_name,
+            width: self.width,
+            labels,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord<N, S>(pub Vec<LongStringValueLabels<N, S>>)
+where
+    N: Debug,
+    S: Debug;
+
+impl ExtensionRecord for LongStringValueLabelRecord<RawString, RawString> {
+    const SUBTYPE: u32 = 21;
+    const SIZE: Option<u32> = Some(1);
+    const COUNT: Option<u32> = None;
+    const NAME: &'static str = "long string value labels record";
+
+    fn parse(ext: &Extension, endian: Endian) -> Result<Record, Warning> {
+        ext.check_size::<Self>()?;
+
+        let mut input = &ext.data[..];
+        let mut label_set = Vec::new();
+        while !input.is_empty() {
+            let var_name = read_string(&mut input, endian)?;
+            let width: u32 = endian.parse(read_bytes(&mut input)?);
+            let n_labels: u32 = endian.parse(read_bytes(&mut input)?);
+            let mut labels = Vec::new();
+            for _ in 0..n_labels {
+                let value = read_string(&mut input, endian)?;
+                let label = read_string(&mut input, endian)?;
+                labels.push((value, label));
+            }
+            label_set.push(LongStringValueLabels {
+                var_name,
+                width,
+                labels,
+            })
+        }
+        Ok(Record::LongStringValueLabels(LongStringValueLabelRecord(
+            label_set,
+        )))
+    }
+}
+
+impl LongStringValueLabelRecord<RawString, RawString> {
+    fn decode(self, decoder: &Decoder) -> LongStringValueLabelRecord<Identifier, String> {
+        let mut labels = Vec::with_capacity(self.0.len());
+        for label in &self.0 {
+            match label.decode(decoder) {
+                Ok(set) => labels.push(set),
+                Err(error) => decoder.warn(error),
+            }
+        }
+        LongStringValueLabelRecord(labels)
+    }
+}
+
+#[derive(Default)]
+pub struct VarTypes {
+    pub types: Vec<Option<VarType>>,
+}
+
+impl VarTypes {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn push(&mut self, width: RawWidth) {
+        if let Ok(var_type) = VarType::try_from(width) {
+            self.types.push(Some(var_type));
+            for _ in 1..width.n_values().unwrap() {
+                self.types.push(None);
+            }
+        }
+    }
+
+    pub fn n_values(&self) -> usize {
+        self.types.len()
+    }
+
+    pub fn is_valid_index(&self, index: usize) -> bool {
+        self.var_type_at(index).is_some()
+    }
+
+    pub fn var_type_at(&self, index: usize) -> Option<VarType> {
+        if index >= 1 && index <= self.types.len() {
+            self.types[index - 1]
+        } else {
+            None
+        }
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = VarType> + use<'_> {
+        self.types
+            .iter()
+            .map(|var_type| var_type.unwrap_or(VarType::String))
+    }
+}
diff --git a/rust/pspp/src/sys/sack.rs b/rust/pspp/src/sys/sack.rs
new file mode 100644 (file)
index 0000000..103a9be
--- /dev/null
@@ -0,0 +1,633 @@
+use float_next_after::NextAfter;
+use num::{Bounded, Zero};
+use ordered_float::OrderedFloat;
+use std::{
+    collections::{hash_map::Entry, HashMap},
+    error::Error as StdError,
+    fmt::{Display, Formatter, Result as FmtResult},
+    iter::repeat_n,
+};
+
+use crate::endian::{Endian, ToBytes};
+
+pub type Result<T, F = Error> = std::result::Result<T, F>;
+
+#[derive(Debug)]
+pub struct Error {
+    pub file_name: Option<String>,
+    pub line_number: Option<usize>,
+    pub token: Option<String>,
+    pub message: String,
+}
+
+impl Error {
+    fn new(
+        file_name: Option<&str>,
+        line_number: Option<usize>,
+        token: Option<&str>,
+        message: String,
+    ) -> Error {
+        Error {
+            file_name: file_name.map(String::from),
+            line_number,
+            token: token.map(String::from),
+            message,
+        }
+    }
+}
+
+impl StdError for Error {}
+
+impl Display for Error {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        match (self.file_name.as_ref(), self.line_number) {
+            (Some(ref file_name), Some(line_number)) => write!(f, "{file_name}:{line_number}: ")?,
+            (Some(ref file_name), None) => write!(f, "{file_name}: ")?,
+            (None, Some(line_number)) => write!(f, "line {line_number}: ")?,
+            (None, None) => (),
+        }
+        if let Some(ref token) = self.token {
+            write!(f, "at '{token}': ")?;
+        }
+        write!(f, "{}", self.message)
+    }
+}
+
+pub fn sack(input: &str, input_file_name: Option<&str>, endian: Endian) -> Result<Vec<u8>> {
+    let mut symbol_table = HashMap::new();
+    let output = _sack(input, input_file_name, endian, &mut symbol_table)?;
+    let output = if !symbol_table.is_empty() {
+        for (k, v) in symbol_table.iter() {
+            println!("{k} => {v:?}");
+        }
+        for (k, v) in symbol_table.iter() {
+            if v.is_none() {
+                Err(Error::new(
+                    input_file_name,
+                    None,
+                    None,
+                    format!("label {k} used but never defined"),
+                ))?
+            }
+        }
+        _sack(input, input_file_name, endian, &mut symbol_table)?
+    } else {
+        output
+    };
+    Ok(output)
+}
+
+fn _sack(
+    input: &str,
+    input_file_name: Option<&str>,
+    endian: Endian,
+    symbol_table: &mut HashMap<String, Option<u32>>,
+) -> Result<Vec<u8>> {
+    let mut lexer = Lexer::new(input, input_file_name, endian)?;
+    let mut output = Vec::new();
+    while parse_data_item(&mut lexer, &mut output, symbol_table)? {}
+    Ok(output)
+}
+
+fn parse_data_item(
+    lexer: &mut Lexer,
+    output: &mut Vec<u8>,
+    symbol_table: &mut HashMap<String, Option<u32>>,
+) -> Result<bool> {
+    if lexer.token.is_none() {
+        return Ok(false);
+    };
+
+    let initial_len = output.len();
+    match lexer.take()? {
+        Token::Integer(integer) => {
+            if let Ok(integer) = TryInto::<i32>::try_into(integer) {
+                output.extend_from_slice(&lexer.endian.to_bytes(integer));
+            } else if let Ok(integer) = TryInto::<u32>::try_into(integer) {
+                output.extend_from_slice(&lexer.endian.to_bytes(integer));
+            } else {
+                Err(lexer.error(format!(
+                    "{integer} is not in the valid range [{},{}]",
+                    i32::MIN,
+                    u32::MAX
+                )))?;
+            };
+        }
+        Token::Float(float) => output.extend_from_slice(&lexer.endian.to_bytes(float.0)),
+        Token::PcSysmis => {
+            output.extend_from_slice(&[0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff])
+        }
+        Token::I8 => put_integers::<u8, 1>(lexer, "i8", output)?,
+        Token::I16 => put_integers::<u16, 2>(lexer, "i16", output)?,
+        Token::I64 => put_integers::<i64, 8>(lexer, "i64", output)?,
+        Token::String(string) => output.extend_from_slice(string.as_bytes()),
+        Token::S(size) => {
+            let Some((Token::String(ref string), _)) = lexer.token else {
+                Err(lexer.error(format!("string expected after 's{size}'")))?
+            };
+            let len = string.len();
+            if len > size {
+                Err(lexer.error(format!(
+                    "{len}-byte string is longer than pad length {size}"
+                )))?
+            }
+            output.extend_from_slice(string.as_bytes());
+            output.extend(repeat_n(b' ', size - len));
+            lexer.get()?;
+        }
+        Token::LParen => {
+            while !matches!(lexer.token, Some((Token::RParen, _))) {
+                parse_data_item(lexer, output, symbol_table)?;
+            }
+            lexer.get()?;
+        }
+        Token::Count => put_counted_items::<u32, 4>(lexer, "COUNT", output, symbol_table)?,
+        Token::Count8 => put_counted_items::<u8, 1>(lexer, "COUNT8", output, symbol_table)?,
+        Token::Hex => {
+            let Some((Token::String(ref string), _)) = lexer.token else {
+                Err(lexer.error(String::from("string expected after 'hex'")))?
+            };
+            let mut string = &string[..];
+            loop {
+                string = string.trim_start();
+                if string.is_empty() {
+                    break;
+                };
+
+                let mut i = string.chars();
+                let Some(c0) = i.next() else { return Ok(true) };
+                let Some(c1) = i.next() else {
+                    Err(lexer.error(String::from("hex string has odd number of characters")))?
+                };
+
+                let (Some(digit0), Some(digit1)) = (c0.to_digit(16), c1.to_digit(16)) else {
+                    Err(lexer.error(String::from("invalid digit in hex string")))?
+                };
+                let byte = digit0 * 16 + digit1;
+                output.push(byte as u8);
+
+                string = i.as_str();
+            }
+            lexer.get()?;
+        }
+        Token::Label(name) => {
+            println!("define {name}");
+            let value = output.len() as u32;
+            match symbol_table.entry(name.clone()) {
+                Entry::Vacant(v) => {
+                    v.insert(Some(value));
+                }
+                Entry::Occupied(mut o) => {
+                    match o.get() {
+                        Some(v) => {
+                            if *v != value {
+                                Err(lexer.error(format!("{name}: can't redefine label for offset {:#x} with offset {:#x}", *v, value)))?
+                            }
+                        }
+                        None => drop(o.insert(Some(value))),
+                    }
+                }
+            };
+            return Ok(true);
+        }
+        Token::At(name) => {
+            let mut value = *symbol_table.entry(name.clone()).or_insert(None);
+            loop {
+                let plus = match lexer.token {
+                    Some((Token::Plus, _)) => true,
+                    Some((Token::Minus, _)) => false,
+                    _ => break,
+                };
+                lexer.get()?;
+
+                let operand = match lexer.token {
+                    Some((Token::At(ref name), _)) => {
+                        *symbol_table.entry(name.clone()).or_insert(None)
+                    }
+                    Some((Token::Integer(integer), _)) => Some(
+                        integer
+                            .try_into()
+                            .map_err(|msg| lexer.error(format!("bad offset literal ({msg})")))?,
+                    ),
+                    _ => Err(lexer.error(String::from("expecting @label or integer literal")))?,
+                };
+                lexer.get()?;
+
+                value = match (value, operand) {
+                    (Some(a), Some(b)) => Some(
+                        if plus {
+                            a.checked_add(b)
+                        } else {
+                            a.checked_sub(b)
+                        }
+                        .ok_or_else(|| {
+                            lexer.error(String::from("overflow in offset arithmetic"))
+                        })?,
+                    ),
+                    _ => None,
+                };
+            }
+            let value = value.unwrap_or(0);
+            output.extend_from_slice(&lexer.endian.to_bytes(value));
+        }
+        _ => (),
+    };
+    if let Some((Token::Asterisk, _)) = lexer.token {
+        lexer.get()?;
+        let Token::Integer(count) = lexer.take()? else {
+            Err(lexer.error(String::from("positive integer expected after '*'")))?
+        };
+        if count < 1 {
+            Err(lexer.error(String::from("positive integer expected after '*'")))?
+        };
+        let final_len = output.len();
+        for _ in 1..count {
+            output.extend_from_within(initial_len..final_len);
+        }
+    }
+    match lexer.token {
+        Some((Token::Semicolon, _)) => {
+            lexer.get()?;
+        }
+        Some((Token::RParen, _)) => (),
+        _ => Err(lexer.error(String::from("';' expected")))?,
+    }
+    Ok(true)
+}
+
+fn put_counted_items<T, const N: usize>(
+    lexer: &mut Lexer,
+    name: &str,
+    output: &mut Vec<u8>,
+    symbol_table: &mut HashMap<String, Option<u32>>,
+) -> Result<()>
+where
+    T: Zero + TryFrom<usize>,
+    Endian: ToBytes<T, N>,
+{
+    let old_size = output.len();
+    output.extend_from_slice(&lexer.endian.to_bytes(T::zero()));
+    let start = output.len();
+    if !matches!(lexer.token, Some((Token::LParen, _))) {
+        Err(lexer.error(format!("'(' expected after '{name}'")))?
+    }
+    lexer.get()?;
+    while !matches!(lexer.token, Some((Token::RParen, _))) {
+        parse_data_item(lexer, output, symbol_table)?;
+    }
+    lexer.get()?;
+    let delta = output.len() - start;
+    let Ok(delta): Result<T, _> = delta.try_into() else {
+        Err(lexer.error(format!("{delta} bytes is too much for '{name}'")))?
+    };
+    let dest = &mut output[old_size..old_size + N];
+    dest.copy_from_slice(&lexer.endian.to_bytes(delta));
+    Ok(())
+}
+
+fn put_integers<T, const N: usize>(
+    lexer: &mut Lexer,
+    name: &str,
+    output: &mut Vec<u8>,
+) -> Result<()>
+where
+    T: Bounded + Display + TryFrom<i64> + Copy,
+    Endian: ToBytes<T, N>,
+{
+    println!("put_integers {:?}", lexer.token);
+    let mut n = 0;
+    while let Some(integer) = lexer.take_if(|t| match t {
+        Token::Integer(integer) => Some(*integer),
+        _ => None,
+    })? {
+        println!("got integer {integer}");
+        let Ok(integer) = integer.try_into() else {
+            Err(lexer.error(format!(
+                "{integer} is not in the valid range [{},{}]",
+                T::min_value(),
+                T::max_value()
+            )))?
+        };
+        output.extend_from_slice(&lexer.endian.to_bytes(integer));
+        n += 1;
+    }
+    println!("put_integers {:?} {n}", lexer.token);
+    if n == 0 {
+        Err(lexer.error(format!("integer expected after '{name}'")))?
+    }
+    Ok(())
+}
+
+#[derive(PartialEq, Eq, Clone, Debug)]
+enum Token {
+    Integer(i64),
+    Float(OrderedFloat<f64>),
+    PcSysmis,
+    String(String),
+    Semicolon,
+    Asterisk,
+    LParen,
+    RParen,
+    I8,
+    I16,
+    I64,
+    S(usize),
+    Count,
+    Count8,
+    Hex,
+    Label(String),
+    At(String),
+    Minus,
+    Plus,
+}
+
+struct Lexer<'a> {
+    input: &'a str,
+    token: Option<(Token, &'a str)>,
+    input_file_name: Option<&'a str>,
+    line_number: usize,
+    endian: Endian,
+}
+
+fn skip_comments(mut s: &str) -> (&str, usize) {
+    let mut n_newlines = 0;
+    let s = loop {
+        s = s.trim_start_matches([' ', '\t', '\r', '<', '>']);
+        if let Some(remainder) = s.strip_prefix('#') {
+            let Some((_, remainder)) = remainder.split_once('\n') else {
+                break "";
+            };
+            s = remainder;
+            n_newlines += 1;
+        } else if let Some(remainder) = s.strip_prefix('\n') {
+            s = remainder;
+            n_newlines += 1;
+        } else {
+            break s;
+        }
+    };
+    (s, n_newlines)
+}
+
+impl<'a> Lexer<'a> {
+    fn new(input: &'a str, input_file_name: Option<&'a str>, endian: Endian) -> Result<Lexer<'a>> {
+        let mut lexer = Lexer {
+            input,
+            token: None,
+            input_file_name,
+            line_number: 1,
+            endian,
+        };
+        lexer.token = lexer.next()?;
+        Ok(lexer)
+    }
+    fn error(&self, message: String) -> Error {
+        let repr = self.token.as_ref().map(|(_, repr)| *repr);
+        Error::new(self.input_file_name, Some(self.line_number), repr, message)
+    }
+    fn take(&mut self) -> Result<Token> {
+        let Some(token) = self.token.take() else {
+            Err(self.error(String::from("unexpected end of input")))?
+        };
+        self.token = self.next()?;
+        Ok(token.0)
+    }
+    fn take_if<F, T>(&mut self, condition: F) -> Result<Option<T>>
+    where
+        F: FnOnce(&Token) -> Option<T>,
+    {
+        let Some(ref token) = self.token else {
+            return Ok(None);
+        };
+        match condition(&token.0) {
+            Some(value) => {
+                self.token = self.next()?;
+                Ok(Some(value))
+            }
+            None => Ok(None),
+        }
+    }
+    fn get(&mut self) -> Result<Option<&Token>> {
+        if self.token.is_none() {
+            Err(self.error(String::from("unexpected end of input")))?
+        } else {
+            self.token = self.next()?;
+            match self.token {
+                Some((ref token, _)) => Ok(Some(token)),
+                None => Ok(None),
+            }
+        }
+    }
+
+    fn next(&mut self) -> Result<Option<(Token, &'a str)>> {
+        // Get the first character of the token, skipping past white space and
+        // comments.
+        let (s, n_newlines) = skip_comments(self.input);
+        self.line_number += n_newlines;
+        self.input = s;
+
+        let start = s;
+        let mut iter = s.chars();
+        let Some(c) = iter.next() else {
+            return Ok(None);
+        };
+        let (token, rest) = match c {
+            c if c.is_ascii_digit() || c == '-' => {
+                let len = s
+                    .find(|c: char| {
+                        !(c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '-')
+                    })
+                    .unwrap_or(s.len());
+                let (number, rest) = s.split_at(len);
+                let token = if number == "-" {
+                    Token::Minus
+                } else if let Some(digits) = number.strip_prefix("0x") {
+                    Token::Integer(i64::from_str_radix(digits, 16).map_err(|msg| {
+                        self.error(format!("bad integer literal '{number}' ({msg})"))
+                    })?)
+                } else if !number.contains('.') {
+                    Token::Integer(number.parse().map_err(|msg| {
+                        self.error(format!("bad integer literal '{number}' ({msg})"))
+                    })?)
+                } else {
+                    Token::Float(number.parse().map_err(|msg| {
+                        self.error(format!("bad float literal '{number}' ({msg})"))
+                    })?)
+                };
+                (token, rest)
+            }
+            '"' => {
+                let s = iter.as_str();
+                let Some(len) = s.find(['\n', '"']) else {
+                    Err(self.error(String::from("end-of-file inside string")))?
+                };
+                let (string, rest) = s.split_at(len);
+                let Some(rest) = rest.strip_prefix('"') else {
+                    Err(self.error(format!("new-line inside string ({string}...{rest})")))?
+                };
+                (Token::String(string.into()), rest)
+            }
+            ';' => (Token::Semicolon, iter.as_str()),
+            '*' => (Token::Asterisk, iter.as_str()),
+            '+' => (Token::Plus, iter.as_str()),
+            '(' => (Token::LParen, iter.as_str()),
+            ')' => (Token::RParen, iter.as_str()),
+            c if c.is_alphabetic() || c == '@' || c == '_' => {
+                let len = s
+                    .find(|c: char| {
+                        !(c.is_ascii_digit()
+                            || c.is_alphabetic()
+                            || c == '@'
+                            || c == '.'
+                            || c == '_')
+                    })
+                    .unwrap_or(s.len());
+                let (s, rest) = s.split_at(len);
+                if let Some(rest) = rest.strip_prefix(':') {
+                    (Token::Label(s.into()), rest)
+                } else if let Some(name) = s.strip_prefix('@') {
+                    (Token::At(name.into()), rest)
+                } else if let Some(count) = s.strip_prefix('s') {
+                    let token =
+                        Token::S(count.parse().map_err(|msg| {
+                            self.error(format!("bad counted string '{s}' ({msg})"))
+                        })?);
+                    (token, rest)
+                } else {
+                    let token = match s {
+                        "i8" => Token::I8,
+                        "i16" => Token::I16,
+                        "i64" => Token::I64,
+                        "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)),
+                        "PCSYSMIS" => Token::PcSysmis,
+                        "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()),
+                        "HIGHEST" => Token::Float(f64::MAX.into()),
+                        "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }),
+                        "COUNT" => Token::Count,
+                        "COUNT8" => Token::Count8,
+                        "hex" => Token::Hex,
+                        _ => Err(self.error(format!("invalid token '{s}'")))?,
+                    };
+                    (token, rest)
+                }
+            }
+            _ => Err(self.error(format!("invalid input byte '{c}'")))?,
+        };
+        self.input = rest;
+        let repr = &start[..start.len() - rest.len()];
+        println!("{token:?} {repr}");
+        Ok(Some((token, repr)))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::endian::Endian;
+    use crate::sys::sack::sack;
+    use anyhow::Result;
+    use hexplay::HexView;
+
+    #[test]
+    fn basic_sack() -> Result<()> {
+        let input = r#"
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+28; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+1; # 1 case.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52";
+"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 "";
+i8 0 *3;
+"#;
+        let output = sack(input, None, Endian::Big)?;
+        HexView::new(&output).print()?;
+        Ok(())
+    }
+
+    #[test]
+    fn pcp_sack() -> Result<()> {
+        let input = r#"
+# File header.
+2; 0;
+@MAIN; @MAIN_END - @MAIN;
+@VARS; @VARS_END - @VARS;
+@LABELS; @LABELS_END - @LABELS;
+@DATA; @DATA_END - @DATA;
+(0; 0) * 11;
+i8 0 * 128;
+
+MAIN:
+    i16 1;         # Fixed.
+    s62 "PCSPSS PSPP synthetic test product";
+    PCSYSMIS;
+    0; 0; i16 1;   # Fixed.
+    i16 0;
+    i16 15;
+    1;
+    i16 0;         # Fixed.
+    1;
+    s8 "11/28/14";
+    s8 "15:11:00";
+    s64 "PSPP synthetic test file";
+MAIN_END:
+
+VARS:
+    0; 0; 0; 0x050800; s8 "$CASENUM"; PCSYSMIS;
+    0; 0; 0; 0x010800; s8 "$DATE"; PCSYSMIS;
+    0; 0; 0; 0x050802; s8 "$WEIGHT"; PCSYSMIS;
+
+    # Numeric variable, no label or missing values.
+    0; 0; 0; 0x050800; s8 "NUM1"; PCSYSMIS;
+
+    # Numeric variable, variable label.
+    0; 0; @NUM2_LABEL - @LABELS_OFS; 0x050800; s8 "NUM2"; PCSYSMIS;
+
+    # Numeric variable with missing value.
+    0; 0; 0; 0x050800; s8 "NUM3"; 1.0;
+
+    # Numeric variable, variable label and missing value.
+    0; 0; @NUM4_LABEL - @LABELS_OFS; 0x050800; s8 "NUM4"; 2.0;
+
+    # String variable, no label or missing values.
+    0; 0; 0; 0x010800; s8 "STR1"; PCSYSMIS;
+
+    # String variable, variable label.
+    0; 0; @STR2_LABEL - @LABELS_OFS; 0x010400; s8 "STR2"; PCSYSMIS;
+
+    # String variable with missing value.
+    0; 0; 0; 0x010500; s8 "STR3"; s8 "MISS";
+
+    # String variable, variable label and missing value.
+    0; 0; @STR4_LABEL - @LABELS_OFS; 0x010100; s8 "STR4"; s8 "OTHR";
+
+    # Long string variable
+    0; 0; 0; 0x010b00; s8 "STR5"; PCSYSMIS;
+    0 * 8;
+
+    # Long string variable with variable label
+    0; 0; @STR6_LABEL - @LABELS_OFS; 0x010b00; s8 "STR6"; PCSYSMIS;
+    0 * 8;
+VARS_END:
+
+LABELS:
+    3; i8 0 0 0; LABELS_OFS: i8 0;
+    NUM2_LABEL: COUNT8("Numeric variable 2's label");
+    NUM4_LABEL: COUNT8("Another numeric variable label");
+    STR2_LABEL: COUNT8("STR2's variable label");
+    STR4_LABEL: COUNT8("STR4's variable label");
+    STR6_LABEL: COUNT8("Another string variable's label");
+LABELS_END:
+
+DATA:
+    0.0; "11/28/14"; 1.0;
+    0.0; 1.0; 2.0; PCSYSMIS; s8 "abcdefgh"; s8 "ijkl"; s8 "mnopq"; s8 "r";
+    s16 "stuvwxyzAB"; s16 "CDEFGHIJKLM";
+DATA_END:
+"#;
+        let output = sack(input, None, Endian::Big)?;
+        HexView::new(&output).print()?;
+        Ok(())
+    }
+}
index 49b10e77acc52b8c423f4a09ed60f72d44dd4266..5be80ea45fc612e9c34c8c02292ee4ffbe105cb0 100644 (file)
@@ -4,7 +4,7 @@ use std::path::PathBuf;
 use anyhow::{anyhow, Result};
 use clap::Parser;
 use pspp::endian::Endian;
-use pspp::sack::sack;
+use pspp::sys::sack::sack;
 
 /// SAv Construction Kit
 ///
index d7bc1b611f5c0165bffac24203fb7210d4bb49ee..9536727a0f78d08b047feaeb1b639597462934cc 100644 (file)
@@ -357,10 +357,19 @@ classify_token (enum token_type type)
   NOT_REACHED ();
 }
 
-/* Appends syntax for the tokens in MTS to S. */
+/* Appends syntax for the tokens in MTS to S.  If OFS and LEN are nonnull, sets
+   OFS[i] to the offset within S of the start of token 'i' in MTS and LEN[i] to
+   its length.  OFS[i] + LEN[i] is not necessarily OFS[i + 1] because some
+   tokens are separated by white space.  */
 void
-macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s)
+macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s,
+                        size_t *ofs, size_t *len)
 {
+  assert ((ofs != NULL) == (len != NULL));
+
+  if (!mts->n)
+    return;
+
   for (size_t i = 0; i < mts->n; i++)
     {
       if (i > 0)
@@ -379,7 +388,11 @@ macro_tokens_to_syntax (struct macro_tokens *mts, struct string *s)
             }
         }
 
+      if (ofs)
+        ofs[i] = s->ss.length;
       macro_token_to_syntax (&mts->mts[i], s);
+      if (len)
+        len[i] = s->ss.length - ofs[i];
     }
 }
 
@@ -925,7 +938,7 @@ parse_function_arg (const struct macro_expander *me,
       if (param)
         {
           size_t param_idx = param - me->macro->params;
-          macro_tokens_to_syntax (me->args[param_idx], farg);
+          macro_tokens_to_syntax (me->args[param_idx], farg, NULL, NULL);
           return 1;
         }
 
@@ -937,7 +950,7 @@ parse_function_arg (const struct macro_expander *me,
                 break;
               if (i)
                 ds_put_byte (farg, ' ');
-              macro_tokens_to_syntax (me->args[i], farg);
+              macro_tokens_to_syntax (me->args[i], farg, NULL, NULL);
             }
           return 1;
         }
@@ -1254,7 +1267,7 @@ expand_macro_function (const struct macro_expander *me,
         if (mts.n > 1)
           {
             struct macro_tokens tail = { .mts = mts.mts + 1, .n = mts.n - 1 };
-            macro_tokens_to_syntax (&tail, output);
+            macro_tokens_to_syntax (&tail, output, NULL, NULL);
           }
         macro_tokens_uninit (&mts);
         ds_destroy (&tmp);
@@ -1293,7 +1306,7 @@ expand_macro_function (const struct macro_expander *me,
         subme.stack = &stack;
 
         macro_expand (mts.mts, mts.n, &subme, &exp);
-        macro_tokens_to_syntax (&exp, output);
+        macro_tokens_to_syntax (&exp, output, NULL, NULL);
         macro_tokens_uninit (&exp);
         macro_tokens_uninit (&mts);
       }
index e0ab8f872ead36f7826bbf274cd4e1b6695e1d0a..e4fe405d47c340963666604f3c43a4059fd2a456 100644 (file)
@@ -222,6 +222,12 @@ scan_punct2__ (char c0, char c1)
 
     case '~':
       return T_NE;
+
+    case '&':
+      return T_AND;
+
+    case '|':
+      return T_OR;
     }
 
   NOT_REACHED ();
index a52d0b095aeb4a4b793b97178104dd3f2b27df5f..1dbffd8270aa37d3bee3deffadae794fedb14ffb 100644 (file)
@@ -246,7 +246,7 @@ void
 xr_pager_destroy (struct xr_pager *p)
 {
   if (p)
-    {x
+    {
       free (p->nodes);
 
       xr_page_style_unref (p->page_style);
index ecccbffe4e6366a11d0225e38e5e1397419775dc..a3d1fab78f0381ac0996fcc489d91e624f6adea3 100644 (file)
@@ -47,6 +47,7 @@ struct table_cell
 
     unsigned char options;       /* TABLE_CELL_*. */
     const struct pivot_value *value;
+    const struct font_style *font_style;
     const struct cell_style *cell_style;
   };