cleanup
[pspp] / rust / src / cooked.rs
index c84ffd65de177cc97b091db6e059aeac7c33777a..d00f3f3c34f7a6087570c6315e98b0539f82c2ab 100644 (file)
-use std::borrow::Cow;
-
-use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
-use encoding_rs::Encoding;
+use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range};
 
 use crate::{
-    Error,
-    {endian::Endian, CategoryLabels, Compression},
-    format::UncheckedFormat,
+    encoding::{default_encoding, get_encoding, Error as EncodingError},
+    endian::Endian,
+    format::{Error as FormatError, Spec, UncheckedSpec},
+    identifier::{Error as IdError, Identifier},
+    raw::{self, UnencodedStr, VarType},
 };
+use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
+use encoding_rs::{DecoderResult, Encoding};
+use num::integer::div_ceil;
+use ordered_float::OrderedFloat;
+use thiserror::Error as ThisError;
+
+pub use crate::raw::{CategoryLabels, Compression};
+
+#[derive(ThisError, Debug)]
+pub enum Error {
+    // XXX this is really an internal error and maybe we should change the
+    // interfaces to make it impossible
+    #[error("Missing header record")]
+    MissingHeaderRecord,
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+
+    #[error("Using default encoding {0}.")]
+    UsingDefaultEncoding(String),
+
+    #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)]
+    InvalidVariableWidth { offsets: Range<u64>, width: i32 },
+
+    #[error("This file has corrupted metadata written by a buggy version of PSPP.  To ensure that other software can read it correctly, save a new copy of the file.")]
+    InvalidLongMissingValueFormat,
+
+    #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format.  Using 01 Jan 1970.")]
+    InvalidCreationDate { creation_date: String },
+
+    #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format.  Using midnight.")]
+    InvalidCreationTime { creation_time: String },
+
+    #[error("{id_error}  Renaming variable to {new_name}.")]
+    InvalidVariableName {
+        id_error: IdError,
+        new_name: Identifier,
+    },
+
+    #[error(
+        "Substituting {new_spec} for invalid print format on variable {variable}.  {format_error}"
+    )]
+    InvalidPrintFormat {
+        new_spec: Spec,
+        variable: Identifier,
+        format_error: FormatError,
+    },
+
+    #[error(
+        "Substituting {new_spec} for invalid write format on variable {variable}.  {format_error}"
+    )]
+    InvalidWriteFormat {
+        new_spec: Spec,
+        variable: Identifier,
+        format_error: FormatError,
+    },
+
+    #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
+    DuplicateVariableName {
+        duplicate_name: Identifier,
+        new_name: Identifier,
+    },
+
+    #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
+    InvalidDictIndex { dict_index: usize, max_index: usize },
+
+    #[error("Dictionary index {0} refers to a long string continuation.")]
+    DictIndexIsContinuation(usize),
+
+    #[error("Variables associated with value label are not all of identical type.  Variable {numeric_var} is numeric, but variable {string_var} is string.")]
+    ValueLabelsDifferentTypes {
+        numeric_var: Identifier,
+        string_var: Identifier,
+    },
+
+    #[error(
+        "Value labels may not be added to long string variable {0} using record types 3 or 4."
+    )]
+    InvalidLongStringValueLabel(Identifier),
+
+    #[error("Invalid multiple response set name.  {0}")]
+    InvalidMrSetName(IdError),
+
+    #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
+    UnknownMrSetVariable {
+        mr_set: Identifier,
+        short_name: Identifier,
+    },
+
+    #[error("Multiple response set {0} has no variables.")]
+    EmptyMrSet(Identifier),
+
+    #[error("Multiple response set {0} has only one variable.")]
+    OneVarMrSet(Identifier),
+
+    #[error("Multiple response set {0} contains both string and numeric variables.")]
+    MixedMrSet(Identifier),
+
+    #[error(
+        "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
+    )]
+    InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
+
+    #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
+    TooWideMDGroupCountedValue {
+        mr_set: Identifier,
+        value: String,
+        width: usize,
+        max_width: u16,
+    },
+
+    #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
+    InvalidLongValueLabelWidth {
+        name: Identifier,
+        width: u32,
+        min_width: u16,
+        max_width: u16,
+    },
+
+    #[error("Invalid attribute name.  {0}")]
+    InvalidAttributeName(IdError),
+
+    #[error("Invalid short name in long variable name record.  {0}")]
+    InvalidShortName(IdError),
+
+    #[error("Invalid name in long variable name record.  {0}")]
+    InvalidLongName(IdError),
+
+    #[error("Invalid variable name in very long string record.  {0}")]
+    InvalidLongStringName(IdError),
+
+    #[error("Invalid variable name in long string value label record.  {0}")]
+    InvalidLongStringValueLabelName(IdError),
+
+    #[error("Invalid variable name in attribute record.  {0}")]
+    InvalidAttributeVariableName(IdError),
+
+    // XXX This is risky because `text` might be arbitarily long.
+    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+    MalformedString { encoding: String, text: String },
+
+    #[error("Invalid variable measurement level value {0}")]
+    InvalidMeasurement(u32),
+
+    #[error("Invalid variable display alignment value {0}")]
+    InvalidAlignment(u32),
+
+    #[error("Details TBD")]
+    TBD,
+}
+
+#[derive(Clone, Debug)]
+pub enum Record {
+    Header(HeaderRecord),
+    Variable(VariableRecord),
+    ValueLabel(ValueLabelRecord),
+    Document(DocumentRecord),
+    IntegerInfo(IntegerInfoRecord),
+    FloatInfo(FloatInfoRecord),
+    VariableSets(VariableSetRecord),
+    VarDisplay(VarDisplayRecord),
+    MultipleResponse(MultipleResponseRecord),
+    LongStringMissingValues(LongStringMissingValuesRecord),
+    LongStringValueLabels(LongStringValueLabelRecord),
+    Encoding(EncodingRecord),
+    NumberOfCases(NumberOfCasesRecord),
+    ProductInfo(ProductInfoRecord),
+    LongNames(LongNameRecord),
+    VeryLongStrings(VeryLongStringRecord),
+    FileAttributes(FileAttributeRecord),
+    VariableAttributes(VariableAttributeRecord),
+    OtherExtension(Extension),
+    //EndOfHeaders(u32),
+    //ZHeader(ZHeader),
+    //ZTrailer(ZTrailer),
+    //Case(Vec<Value>),
+}
+
+pub use crate::raw::EncodingRecord;
+pub use crate::raw::Extension;
+pub use crate::raw::FloatInfoRecord;
+pub use crate::raw::IntegerInfoRecord;
+pub use crate::raw::NumberOfCasesRecord;
+
+type DictIndex = usize;
+
+pub struct Variable {
+    pub dict_index: DictIndex,
+    pub short_name: Identifier,
+    pub long_name: Option<Identifier>,
+    pub width: VarWidth,
+}
 
 pub struct Decoder {
     pub compression: Option<Compression>,
     pub endian: Endian,
     pub encoding: &'static Encoding,
+    pub variables: HashMap<DictIndex, Variable>,
+    pub var_names: HashMap<Identifier, DictIndex>,
+    n_dict_indexes: usize,
+    n_generated_names: usize,
+}
+
+pub fn decode(
+    headers: Vec<raw::Record>,
+    encoding: Option<&'static Encoding>,
+    warn: &impl Fn(Error),
+) -> Result<Vec<Record>, Error> {
+    let Some(header_record) = headers.iter().find_map(|rec| {
+        if let raw::Record::Header(header) = rec {
+            Some(header)
+        } else {
+            None
+        }
+    }) else {
+        return Err(Error::MissingHeaderRecord);
+    };
+    let encoding = match encoding {
+        Some(encoding) => encoding,
+        None => {
+            let encoding = headers.iter().find_map(|rec| {
+                if let raw::Record::Encoding(ref e) = rec {
+                    Some(e.0.as_str())
+                } else {
+                    None
+                }
+            });
+            let character_code = headers.iter().find_map(|rec| {
+                if let raw::Record::IntegerInfo(ref r) = rec {
+                    Some(r.character_code)
+                } else {
+                    None
+                }
+            });
+            match get_encoding(encoding, character_code) {
+                Ok(encoding) => encoding,
+                Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
+                Err(err) => {
+                    warn(Error::EncodingError(err));
+                    // Warn that we're using the default encoding.
+                    default_encoding()
+                }
+            }
+        }
+    };
+
+    let mut decoder = Decoder {
+        compression: header_record.compression,
+        endian: header_record.endian,
+        encoding,
+        variables: HashMap::new(),
+        var_names: HashMap::new(),
+        n_dict_indexes: 0,
+        n_generated_names: 0,
+    };
+
+    let mut output = Vec::with_capacity(headers.len());
+    for header in &headers {
+        match header {
+            raw::Record::Header(ref input) => {
+                if let Some(header) = HeaderRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::Header(header))
+                }
+            }
+            raw::Record::Variable(ref input) => {
+                if let Some(variable) = VariableRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::Variable(variable));
+                }
+            }
+            raw::Record::ValueLabel(ref input) => {
+                if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, input, warn)?
+                {
+                    output.push(Record::ValueLabel(value_label));
+                }
+            }
+            raw::Record::Document(ref input) => {
+                if let Some(document) = DocumentRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::Document(document))
+                }
+            }
+            raw::Record::IntegerInfo(ref input) => output.push(Record::IntegerInfo(input.clone())),
+            raw::Record::FloatInfo(ref input) => output.push(Record::FloatInfo(input.clone())),
+            raw::Record::VariableSets(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?));
+            }
+            raw::Record::VarDisplay(ref input) => {
+                if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::VarDisplay(vdr))
+                }
+            }
+            raw::Record::MultipleResponse(ref input) => {
+                if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::MultipleResponse(mrr))
+                }
+            }
+            raw::Record::LongStringMissingValues(ref input) => {
+                if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::LongStringMissingValues(mrr))
+                }
+            }
+            raw::Record::LongStringValueLabels(ref input) => {
+                if let Some(mrr) =
+                    LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)?
+                {
+                    output.push(Record::LongStringValueLabels(mrr))
+                }
+            }
+            raw::Record::Encoding(ref input) => output.push(Record::Encoding(input.clone())),
+            raw::Record::NumberOfCases(ref input) => {
+                output.push(Record::NumberOfCases(input.clone()))
+            }
+            raw::Record::ProductInfo(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?));
+            }
+            raw::Record::LongNames(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::LongNames(LongNameRecord::parse(
+                    &mut decoder,
+                    &s,
+                    warn,
+                )?));
+            }
+            raw::Record::VeryLongStrings(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
+                    &decoder, &s, warn,
+                )?));
+            }
+            raw::Record::FileAttributes(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::FileAttributes(FileAttributeRecord::parse(
+                    &decoder, &s, warn,
+                )?));
+            }
+            raw::Record::VariableAttributes(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::VariableAttributes(VariableAttributeRecord::parse(
+                    &decoder, &s, warn,
+                )?));
+            }
+            raw::Record::OtherExtension(ref input) => {
+                output.push(Record::OtherExtension(input.clone()))
+            }
+            raw::Record::EndOfHeaders(_) => (),
+            raw::Record::ZHeader(_) => (),
+            raw::Record::ZTrailer(_) => (),
+            raw::Record::Case(_) => (),
+        };
+    }
+    Ok(output)
 }
 
 impl Decoder {
-    fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
+    fn generate_name(&mut self) -> Identifier {
+        loop {
+            self.n_generated_names += 1;
+            let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
+                .unwrap();
+            if !self.var_names.contains_key(&name) {
+                return name;
+            }
+            assert!(self.n_generated_names < usize::MAX);
+        }
+    }
+    fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
         let (output, malformed) = self.encoding.decode_without_bom_handling(input);
         if malformed {
-            warn(Error::TBD);
+            warn(Error::MalformedString {
+                encoding: self.encoding.name().into(),
+                text: output.clone().into(),
+            });
         }
         output
     }
+    fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
+        self.decode_string_cow(input, warn).into()
+    }
+    pub fn decode_identifier(
+        &self,
+        input: &[u8],
+        warn: &impl Fn(Error),
+    ) -> Result<Identifier, IdError> {
+        let s = self.decode_string_cow(input, warn);
+        Identifier::new(&s, self.encoding)
+    }
+    fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
+        let max_index = self.n_dict_indexes;
+        if dict_index == 0 || dict_index > max_index {
+            return Err(Error::InvalidDictIndex {
+                dict_index,
+                max_index,
+            });
+        }
+        let Some(variable) = self.variables.get(&(dict_index - 1)) else {
+            return Err(Error::DictIndexIsContinuation(dict_index));
+        };
+        Ok(variable)
+    }
+
+    /// Returns `input` decoded from `self.encoding` into UTF-8 such that
+    /// re-encoding the result back into `self.encoding` will have exactly the
+    /// same length in bytes.
+    ///
+    /// XXX warn about errors?
+    fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
+        if let (s, false) = self.encoding.decode_without_bom_handling(input) {
+            // This is the common case.  Usually there will be no errors.
+            s
+        } else {
+            // Unusual case.  Don't bother to optimize it much.
+            let mut decoder = self.encoding.new_decoder_without_bom_handling();
+            let mut output = String::with_capacity(
+                decoder
+                    .max_utf8_buffer_length_without_replacement(input.len())
+                    .unwrap(),
+            );
+            let mut rest = input;
+            while !rest.is_empty() {
+                match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
+                    (DecoderResult::InputEmpty, _) => break,
+                    (DecoderResult::OutputFull, _) => unreachable!(),
+                    (DecoderResult::Malformed(a, b), consumed) => {
+                        let skipped = a as usize + b as usize;
+                        output.extend(repeat('?').take(skipped));
+                        rest = &rest[consumed..];
+                    }
+                }
+            }
+            assert_eq!(self.encoding.encode(&output).0.len(), input.len());
+            output.into()
+        }
+    }
 }
 
-pub trait Decode: Sized {
+pub trait TryDecode: Sized {
     type Input;
-    fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Self;
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error>;
+}
+
+pub trait Decode<Input>: Sized {
+    fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
+}
+
+impl<const N: usize> Decode<UnencodedStr<N>> for String {
+    fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
+        decoder.decode_string(&input.0, &warn)
+    }
 }
 
-#[derive(Clone)]
-pub struct Header {
+#[derive(Clone, Debug)]
+pub struct HeaderRecord {
     pub eye_catcher: String,
     pub weight_index: Option<usize>,
     pub n_cases: Option<u64>,
@@ -39,20 +471,30 @@ pub struct Header {
     pub file_label: String,
 }
 
-impl Decode for Header {
-    type Input = crate::raw::Header;
+fn trim_end_spaces(mut s: String) -> String {
+    s.truncate(s.trim_end_matches(' ').len());
+    s
+}
+
+impl TryDecode for HeaderRecord {
+    type Input = crate::raw::HeaderRecord;
 
-    fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Self {
-        let eye_catcher = decoder.decode_string(&input.eye_catcher, &warn);
-        let file_label = decoder.decode_string(&input.file_label, &warn);
-        let creation_date = decoder.decode_string(&input.creation_date, &warn);
-        let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
-            warn(Error::InvalidCreationDate {
-                creation_date: creation_date.into(),
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let eye_catcher = trim_end_spaces(decoder.decode_string(&input.eye_catcher.0, &warn));
+        let file_label = trim_end_spaces(decoder.decode_string(&input.file_label.0, &warn));
+        let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
+        let creation_date =
+            NaiveDate::parse_from_str(&creation_date, "%e %b %Y").unwrap_or_else(|_| {
+                warn(Error::InvalidCreationDate {
+                    creation_date: creation_date.into(),
+                });
+                Default::default()
             });
-            Default::default()
-        });
-        let creation_time = decoder.decode_string(&input.creation_time, &warn);
+        let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
         let creation_time =
             NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
                 warn(Error::InvalidCreationTime {
@@ -60,81 +502,977 @@ impl Decode for Header {
                 });
                 Default::default()
             });
-        Header {
-            eye_catcher: eye_catcher.into(),
+        Ok(Some(HeaderRecord {
+            eye_catcher,
             weight_index: input.weight_index.map(|n| n as usize),
             n_cases: input.n_cases.map(|n| n as u64),
             creation: NaiveDateTime::new(creation_date, creation_time),
-            file_label: file_label.into(),
+            file_label,
+        }))
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum VarWidth {
+    Numeric,
+    String(u16),
+}
+
+impl PartialOrd for VarWidth {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match (self, other) {
+            (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
+            (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
+            _ => None,
         }
     }
 }
 
-pub struct Variable {
-    pub width: i32,
-    pub name: String,
-    pub print_format: UncheckedFormat,
-    pub write_format: UncheckedFormat,
+impl VarWidth {
+    const MAX_STRING: u16 = 32767;
+
+    fn n_dict_indexes(self) -> usize {
+        match self {
+            VarWidth::Numeric => 1,
+            VarWidth::String(w) => div_ceil(w as usize, 8),
+        }
+    }
+
+    fn width_predicate(
+        a: Option<VarWidth>,
+        b: Option<VarWidth>,
+        f: impl Fn(u16, u16) -> u16,
+    ) -> Option<VarWidth> {
+        match (a, b) {
+            (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
+            (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
+                Some(VarWidth::String(f(a, b)))
+            }
+            _ => None,
+        }
+    }
+
+    /// Returns the wider of `self` and `other`:
+    /// - Numerical variable widths are equally wide.
+    /// - Longer strings are wider than shorter strings.
+    /// - Numerical and string types are incomparable, so result in `None`.
+    /// - Any `None` in the input yields `None` in the output.
+    pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+        Self::width_predicate(a, b, |a, b| a.max(b))
+    }
+
+    /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
+    pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+        Self::width_predicate(a, b, |a, b| a.min(b))
+    }
+}
+
+impl From<VarWidth> for VarType {
+    fn from(source: VarWidth) -> Self {
+        match source {
+            VarWidth::Numeric => VarType::Numeric,
+            VarWidth::String(_) => VarType::String,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableRecord {
+    pub width: VarWidth,
+    pub name: Identifier,
+    pub print_format: Spec,
+    pub write_format: Spec,
+    pub missing_values: MissingValues,
+    pub label: Option<String>,
+}
+
+#[derive(Clone, Debug)]
+pub struct MissingValues {
+    /// Individual missing values, up to 3 of them.
+    pub values: Vec<Value>,
+
+    /// Optional range of missing values.
+    pub range: Option<(Value, Value)>,
+}
+
+impl Decode<raw::MissingValues> for MissingValues {
+    fn decode(decoder: &Decoder, input: &raw::MissingValues, _warn: impl Fn(Error)) -> Self {
+        MissingValues {
+            values: input
+                .values
+                .iter()
+                .map(|value| Value::decode(value, decoder))
+                .collect(),
+            range: input
+                .range
+                .as_ref()
+                .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))),
+        }
+    }
 }
 
-#[derive(Clone)]
-pub struct Document(Vec<String>);
+fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
+    UncheckedSpec::try_from(raw)
+        .and_then(Spec::try_from)
+        .and_then(|x| x.check_width_compatibility(width))
+        .unwrap_or_else(|error| {
+            let new_format = Spec::default_for_width(width);
+            warn(new_format, error);
+            new_format
+        })
+}
 
-impl Decode for Document {
-    type Input = crate::raw::Document;
+impl TryDecode for VariableRecord {
+    type Input = raw::VariableRecord;
 
-    fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Self {
-        Document(
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &crate::raw::VariableRecord,
+        warn: impl Fn(Error),
+    ) -> Result<Option<VariableRecord>, Error> {
+        let width = match input.width {
+            0 => VarWidth::Numeric,
+            w @ 1..=255 => VarWidth::String(w as u16),
+            -1 => return Ok(None),
+            _ => {
+                return Err(Error::InvalidVariableWidth {
+                    offsets: input.offsets.clone(),
+                    width: input.width,
+                })
+            }
+        };
+        let name = trim_end_spaces(decoder.decode_string(&input.name.0, &warn));
+        let name = match Identifier::new(&name, decoder.encoding) {
+            Ok(name) => {
+                if !decoder.var_names.contains_key(&name) {
+                    name
+                } else {
+                    let new_name = decoder.generate_name();
+                    warn(Error::DuplicateVariableName {
+                        duplicate_name: name.clone(),
+                        new_name: new_name.clone(),
+                    });
+                    new_name
+                }
+            }
+            Err(id_error) => {
+                let new_name = decoder.generate_name();
+                warn(Error::InvalidVariableName {
+                    id_error,
+                    new_name: new_name.clone(),
+                });
+                new_name
+            }
+        };
+        let variable = Variable {
+            dict_index: decoder.n_dict_indexes,
+            short_name: name.clone(),
+            long_name: None,
+            width,
+        };
+        decoder.n_dict_indexes += width.n_dict_indexes();
+        assert!(decoder
+            .var_names
+            .insert(name.clone(), variable.dict_index)
+            .is_none());
+        assert!(decoder
+            .variables
+            .insert(variable.dict_index, variable)
+            .is_none());
+
+        let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
+            warn(Error::InvalidPrintFormat {
+                new_spec,
+                variable: name.clone(),
+                format_error,
+            })
+        });
+        let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
+            warn(Error::InvalidWriteFormat {
+                new_spec,
+                variable: name.clone(),
+                format_error,
+            })
+        });
+        let label = input
+            .label
+            .as_ref()
+            .map(|label| decoder.decode_string(&label.0, &warn));
+        Ok(Some(VariableRecord {
+            width,
+            name,
+            print_format,
+            write_format,
+            missing_values: MissingValues::decode(decoder, &input.missing_values, warn),
+            label,
+        }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct DocumentRecord(Vec<String>);
+
+impl TryDecode for DocumentRecord {
+    type Input = crate::raw::DocumentRecord;
+
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        Ok(Some(DocumentRecord(
             input
                 .lines
                 .iter()
-                .map(|s| decoder.decode_string(s, &warn).into())
+                .map(|s| trim_end_spaces(decoder.decode_string(&s.0, &warn)))
                 .collect(),
-        )
+        )))
     }
 }
 
-pub use crate::raw::FloatInfo;
-pub use crate::raw::IntegerInfo;
+trait TextRecord
+where
+    Self: Sized,
+{
+    const NAME: &'static str;
+    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
+}
 
 #[derive(Clone, Debug)]
-pub enum MultipleResponseType {
-    MultipleDichotomy {
-        value: String,
-        labels: CategoryLabels,
-    },
-    MultipleCategory,
+pub struct VariableSet {
+    pub name: String,
+    pub vars: Vec<String>,
+}
+
+impl VariableSet {
+    fn parse(input: &str) -> Result<Self, Error> {
+        let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
+        let vars = input.split_ascii_whitespace().map(String::from).collect();
+        Ok(VariableSet {
+            name: name.into(),
+            vars,
+        })
+    }
+}
+
+trait WarnOnError<T> {
+    fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
+}
+impl<T> WarnOnError<T> for Result<T, Error> {
+    fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
+        match self {
+            Ok(result) => Some(result),
+            Err(error) => {
+                warn(error);
+                None
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Value {
+    Number(Option<OrderedFloat<f64>>),
+    String(String),
+}
+
+impl Value {
+    pub fn decode(raw: &raw::Value, decoder: &Decoder) -> Self {
+        match raw {
+            raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
+            raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
+        }
+    }
 }
+
 #[derive(Clone, Debug)]
-pub struct MultipleResponseSet {
-    pub name: String,
+pub struct ValueLabel {
+    pub value: Value,
     pub label: String,
-    pub mr_type: MultipleResponseType,
-    pub vars: Vec<String>,
 }
 
 #[derive(Clone, Debug)]
-pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
+pub struct ValueLabelRecord {
+    pub var_type: VarType,
+    pub labels: Vec<ValueLabel>,
+    pub variables: Vec<Identifier>,
+}
+
+impl TryDecode for ValueLabelRecord {
+    type Input = crate::raw::ValueLabelRecord;
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<ValueLabelRecord>, Error> {
+        let variables: Vec<&Variable> = input
+            .dict_indexes
+            .iter()
+            .filter_map(|&dict_index| {
+                decoder
+                    .get_var_by_index(dict_index as usize)
+                    .warn_on_error(&warn)
+            })
+            .filter(|&variable| match variable.width {
+                VarWidth::String(width) if width > 8 => {
+                    warn(Error::InvalidLongStringValueLabel(
+                        variable.short_name.clone(),
+                    ));
+                    false
+                }
+                _ => true,
+            })
+            .collect();
+        let mut i = variables.iter();
+        let Some(&first_var) = i.next() else {
+            return Ok(None);
+        };
+        let var_type: VarType = first_var.width.into();
+        for &variable in i {
+            let this_type: VarType = variable.width.into();
+            if var_type != this_type {
+                let (numeric_var, string_var) = match var_type {
+                    VarType::Numeric => (first_var, variable),
+                    VarType::String => (variable, first_var),
+                };
+                warn(Error::ValueLabelsDifferentTypes {
+                    numeric_var: numeric_var.short_name.clone(),
+                    string_var: string_var.short_name.clone(),
+                });
+                return Ok(None);
+            }
+        }
+        let labels = input
+            .labels
+            .iter()
+            .map(|(value, label)| {
+                let label = decoder.decode_string(&label.0, &warn);
+                let value = Value::decode(
+                    &raw::Value::from_raw(value, var_type, decoder.endian),
+                    decoder,
+                );
+                ValueLabel { value, label }
+            })
+            .collect();
+        let variables = variables
+            .iter()
+            .map(|&variable| variable.short_name.clone())
+            .collect();
+        Ok(Some(ValueLabelRecord {
+            var_type,
+            labels,
+            variables,
+        }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableSetRecord(Vec<VariableSet>);
+
+impl TextRecord for VariableSetRecord {
+    const NAME: &'static str = "variable set";
+    fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+        let mut sets = Vec::new();
+        for line in input.lines() {
+            if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
+                sets.push(set)
+            }
+        }
+        Ok(VariableSetRecord(sets))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ProductInfoRecord(pub String);
+
+impl TextRecord for ProductInfoRecord {
+    const NAME: &'static str = "extra product info";
+    fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
+        Ok(ProductInfoRecord(input.into()))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongName {
+    pub short_name: Identifier,
+    pub long_name: Identifier,
+}
+
+impl LongName {
+    fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
+        let short_name =
+            Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?;
+        let long_name =
+            Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?;
+        Ok(LongName {
+            short_name,
+            long_name,
+        })
+    }
+}
 
 #[derive(Clone, Debug)]
-pub struct ProductInfo(String);
+pub struct LongNameRecord(Vec<LongName>);
+
+impl LongNameRecord {
+    pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+        let mut names = Vec::new();
+        for pair in input.split('\t').filter(|s| !s.is_empty()) {
+            if let Some((short_name, long_name)) = pair.split_once('=') {
+                if let Some(long_name) =
+                    LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
+                {
+                    names.push(long_name);
+                }
+            } else {
+                warn(Error::TBD)
+            }
+        }
+        Ok(LongNameRecord(names))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongString {
+    pub short_name: Identifier,
+    pub length: u16,
+}
+
+impl VeryLongString {
+    fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
+        let Some((short_name, length)) = input.split_once('=') else {
+            return Err(Error::TBD);
+        };
+        let short_name =
+            Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?;
+        let length: u16 = length.parse().map_err(|_| Error::TBD)?;
+        if length > VarWidth::MAX_STRING {
+            return Err(Error::TBD);
+        }
+        Ok(VeryLongString { short_name, length })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VeryLongStringRecord(Vec<VeryLongString>);
+
+impl VeryLongStringRecord {
+    pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+        let mut very_long_strings = Vec::new();
+        for tuple in input
+            .split('\0')
+            .map(|s| s.trim_end_matches('\t'))
+            .filter(|s| !s.is_empty())
+        {
+            if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
+                very_long_strings.push(vls)
+            }
+        }
+        Ok(VeryLongStringRecord(very_long_strings))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Attribute {
+    pub name: Identifier,
+    pub values: Vec<String>,
+}
 
+impl Attribute {
+    fn parse<'a>(
+        decoder: &Decoder,
+        input: &'a str,
+        warn: &impl Fn(Error),
+    ) -> Result<(Option<Attribute>, &'a str), Error> {
+        let Some((name, mut input)) = input.split_once('(') else {
+            return Err(Error::TBD);
+        };
+        let mut values = Vec::new();
+        loop {
+            let Some((value, rest)) = input.split_once('\n') else {
+                return Err(Error::TBD);
+            };
+            if let Some(stripped) = value
+                .strip_prefix('\'')
+                .and_then(|value| value.strip_suffix('\''))
+            {
+                values.push(stripped.into());
+            } else {
+                warn(Error::TBD);
+                values.push(value.into());
+            }
+            if let Some(rest) = rest.strip_prefix(')') {
+                let attribute = Identifier::new(name, decoder.encoding)
+                    .map_err(Error::InvalidAttributeName)
+                    .warn_on_error(warn)
+                    .map(|name| Attribute { name, values });
+                return Ok((attribute, rest));
+            };
+            input = rest;
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct AttributeSet(pub Vec<Attribute>);
+
+impl AttributeSet {
+    fn parse<'a>(
+        decoder: &Decoder,
+        mut input: &'a str,
+        sentinel: Option<char>,
+        warn: &impl Fn(Error),
+    ) -> Result<(AttributeSet, &'a str), Error> {
+        let mut attributes = Vec::new();
+        let rest = loop {
+            match input.chars().next() {
+                None => break input,
+                c if c == sentinel => break &input[1..],
+                _ => {
+                    let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
+                    if let Some(attribute) = attribute {
+                        attributes.push(attribute);
+                    }
+                    input = rest;
+                }
+            }
+        };
+        Ok((AttributeSet(attributes), rest))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct FileAttributeRecord(AttributeSet);
+
+impl FileAttributeRecord {
+    pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+        let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
+        if !rest.is_empty() {
+            warn(Error::TBD);
+        }
+        Ok(FileAttributeRecord(set))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VarAttributeSet {
+    pub long_var_name: Identifier,
+    pub attributes: AttributeSet,
+}
+
+impl VarAttributeSet {
+    fn parse<'a>(
+        decoder: &Decoder,
+        input: &'a str,
+        warn: &impl Fn(Error),
+    ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
+        let Some((long_var_name, rest)) = input.split_once(':') else {
+            return Err(Error::TBD);
+        };
+        let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
+        let var_attribute = Identifier::new(long_var_name, decoder.encoding)
+            .map_err(Error::InvalidAttributeVariableName)
+            .warn_on_error(warn)
+            .map(|name| VarAttributeSet {
+                long_var_name: name,
+                attributes,
+            });
+        Ok((var_attribute, rest))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
+
+impl VariableAttributeRecord {
+    pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+        let mut var_attribute_sets = Vec::new();
+        while !input.is_empty() {
+            let Some((var_attribute, rest)) =
+                VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
+            else {
+                break;
+            };
+            if let Some(var_attribute) = var_attribute {
+                var_attribute_sets.push(var_attribute);
+            }
+            input = rest;
+        }
+        Ok(VariableAttributeRecord(var_attribute_sets))
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Measure {
     Nominal,
     Ordinal,
     Scale,
 }
 
+impl Measure {
+    fn try_decode(source: u32) -> Result<Option<Measure>, Error> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Measure::Nominal)),
+            2 => Ok(Some(Measure::Ordinal)),
+            3 => Ok(Some(Measure::Scale)),
+            _ => Err(Error::InvalidMeasurement(source)),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Alignment {
     Left,
     Right,
     Center,
 }
 
+impl Alignment {
+    fn try_decode(source: u32) -> Result<Option<Alignment>, Error> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Alignment::Left)),
+            2 => Ok(Some(Alignment::Right)),
+            3 => Ok(Some(Alignment::Center)),
+            _ => Err(Error::InvalidAlignment(source)),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
 pub struct VarDisplay {
     pub measure: Option<Measure>,
-    pub width: u32,
-    pub align: Option<Alignment>,
+    pub width: Option<u32>,
+    pub alignment: Option<Alignment>,
 }
 
+#[derive(Clone, Debug)]
 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
+
+impl TryDecode for VarDisplayRecord {
+    type Input = raw::VarDisplayRecord;
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let n_vars = decoder.variables.len();
+        let n_per_var = if input.0.len() == 3 * n_vars {
+            3
+        } else if input.0.len() == 2 * n_vars {
+            2
+        } else {
+            return Err(Error::TBD);
+        };
+
+        let var_displays = input
+            .0
+            .chunks(n_per_var)
+            .map(|chunk| {
+                let (measure, width, alignment) = match n_per_var == 3 {
+                    true => (chunk[0], Some(chunk[1]), chunk[2]),
+                    false => (chunk[0], None, chunk[1]),
+                };
+                let measure = Measure::try_decode(measure).warn_on_error(&warn).flatten();
+                let alignment = Alignment::try_decode(alignment)
+                    .warn_on_error(&warn)
+                    .flatten();
+                VarDisplay {
+                    measure,
+                    width,
+                    alignment,
+                }
+            })
+            .collect();
+        Ok(Some(VarDisplayRecord(var_displays)))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum MultipleResponseType {
+    MultipleDichotomy {
+        value: Value,
+        labels: CategoryLabels,
+    },
+    MultipleCategory,
+}
+
+impl MultipleResponseType {
+    fn decode(
+        decoder: &Decoder,
+        mr_set: &Identifier,
+        input: &raw::MultipleResponseType,
+        min_width: VarWidth,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let mr_type = match input {
+            raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
+                let value = decoder.decode_string_cow(&value.0, warn);
+                let value = match min_width {
+                    VarWidth::Numeric => {
+                        let number: f64 = value.trim().parse().map_err(|_| {
+                            Error::InvalidMDGroupCountedValue {
+                                mr_set: mr_set.clone(),
+                                number: value.into(),
+                            }
+                        })?;
+                        Value::Number(Some(number.into()))
+                    }
+                    VarWidth::String(max_width) => {
+                        let value = value.trim_end_matches(' ');
+                        let width = value.len();
+                        if width > max_width as usize {
+                            return Err(Error::TooWideMDGroupCountedValue {
+                                mr_set: mr_set.clone(),
+                                value: value.into(),
+                                width,
+                                max_width,
+                            });
+                        };
+                        Value::String(value.into())
+                    }
+                };
+                MultipleResponseType::MultipleDichotomy {
+                    value,
+                    labels: *labels,
+                }
+            }
+            raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
+        };
+        Ok(mr_type)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet {
+    pub name: Identifier,
+    pub min_width: VarWidth,
+    pub max_width: VarWidth,
+    pub label: String,
+    pub mr_type: MultipleResponseType,
+    pub dict_indexes: Vec<DictIndex>,
+}
+
+impl MultipleResponseSet {
+    fn decode(
+        decoder: &Decoder,
+        input: &raw::MultipleResponseSet,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let mr_set_name = decoder
+            .decode_identifier(&input.name.0, warn)
+            .map_err(Error::InvalidMrSetName)?;
+
+        let label = decoder.decode_string(&input.label.0, warn);
+
+        let mut dict_indexes = Vec::with_capacity(input.short_names.len());
+        for short_name in input.short_names.iter() {
+            let short_name = match decoder.decode_identifier(&short_name.0, warn) {
+                Ok(name) => name,
+                Err(error) => {
+                    warn(Error::InvalidMrSetName(error));
+                    continue;
+                }
+            };
+            let Some(&dict_index) = decoder.var_names.get(&short_name) else {
+                warn(Error::UnknownMrSetVariable {
+                    mr_set: mr_set_name.clone(),
+                    short_name: short_name.clone(),
+                });
+                continue;
+            };
+            dict_indexes.push(dict_index);
+        }
+
+        match dict_indexes.len() {
+            0 => return Err(Error::EmptyMrSet(mr_set_name)),
+            1 => return Err(Error::OneVarMrSet(mr_set_name)),
+            _ => (),
+        }
+
+        let Some((Some(min_width), Some(max_width))) = dict_indexes
+            .iter()
+            .map(|dict_index| decoder.variables[dict_index].width)
+            .map(|w| (Some(w), Some(w)))
+            .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
+        else {
+            return Err(Error::MixedMrSet(mr_set_name));
+        };
+
+        let mr_type =
+            MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
+
+        Ok(MultipleResponseSet {
+            name: mr_set_name,
+            min_width,
+            max_width,
+            label,
+            mr_type,
+            dict_indexes,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
+
+impl TryDecode for MultipleResponseRecord {
+    type Input = raw::MultipleResponseRecord;
+
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let mut sets = Vec::with_capacity(input.0.len());
+        for set in &input.0 {
+            match MultipleResponseSet::decode(decoder, set, &warn) {
+                Ok(set) => sets.push(set),
+                Err(error) => warn(error),
+            }
+        }
+        Ok(Some(MultipleResponseRecord(sets)))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValues {
+    /// Variable name.
+    pub var_name: Identifier,
+
+    /// Missing values.
+    pub missing_values: MissingValues,
+}
+
+impl LongStringMissingValues {
+    fn decode(
+        decoder: &Decoder,
+        input: &raw::LongStringMissingValues,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let var_name = decoder.decode_string(&input.var_name.0, warn);
+        let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+            .map_err(Error::InvalidLongStringValueLabelName)?;
+
+        let missing_values = MissingValues::decode(decoder, &input.missing_values, warn);
+
+        Ok(LongStringMissingValues {
+            var_name,
+            missing_values
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValuesRecord(Vec<LongStringMissingValues>);
+
+impl TryDecode for LongStringMissingValuesRecord {
+    type Input = raw::LongStringMissingValueSet;
+
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let mut labels = Vec::with_capacity(input.0.len());
+        for label in &input.0 {
+            match LongStringMissingValues::decode(decoder, label, &warn) {
+                Ok(set) => labels.push(set),
+                Err(error) => warn(error),
+            }
+        }
+        Ok(Some(LongStringMissingValuesRecord(labels)))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels {
+    pub var_name: Identifier,
+    pub width: VarWidth,
+    pub labels: Vec<ValueLabel>,
+}
+
+impl LongStringValueLabels {
+    fn decode(
+        decoder: &Decoder,
+        input: &raw::LongStringValueLabels,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let var_name = decoder.decode_string(&input.var_name.0, warn);
+        let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+            .map_err(Error::InvalidLongStringValueLabelName)?;
+
+        let min_width = 9;
+        let max_width = VarWidth::MAX_STRING;
+        if input.width < 9 || input.width > max_width as u32 {
+            return Err(Error::InvalidLongValueLabelWidth {
+                name: var_name,
+                width: input.width,
+                min_width,
+                max_width,
+            });
+        }
+        let width = input.width as u16;
+
+        let mut labels = Vec::with_capacity(input.labels.len());
+        for (value, label) in input.labels.iter() {
+            let value = Value::String(decoder.decode_exact_length(&value.0).into());
+            let label = decoder.decode_string(&label.0, warn);
+            labels.push(ValueLabel { value, label });
+        }
+
+        Ok(LongStringValueLabels {
+            var_name,
+            width: VarWidth::String(width),
+            labels,
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
+
+impl TryDecode for LongStringValueLabelRecord {
+    type Input = raw::LongStringValueLabelRecord;
+
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let mut labels = Vec::with_capacity(input.0.len());
+        for label in &input.0 {
+            match LongStringValueLabels::decode(decoder, label, &warn) {
+                Ok(set) => labels.push(set),
+                Err(error) => warn(error),
+            }
+        }
+        Ok(Some(LongStringValueLabelRecord(labels)))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use encoding_rs::WINDOWS_1252;
+
+    #[test]
+    fn test() {
+        let mut s = String::new();
+        s.push(char::REPLACEMENT_CHARACTER);
+        let encoded = WINDOWS_1252.encode(&s).0;
+        let decoded = WINDOWS_1252.decode(&encoded[..]).0;
+        println!("{:?}", decoded);
+    }
+
+    #[test]
+    fn test2() {
+        let charset: Vec<u8> = (0..=255).collect();
+        println!("{}", charset.len());
+        let decoded = WINDOWS_1252.decode(&charset[..]).0;
+        println!("{}", decoded.len());
+        let encoded = WINDOWS_1252.encode(&decoded[..]).0;
+        println!("{}", encoded.len());
+        assert_eq!(&charset[..], &encoded[..]);
+    }
+}