cleanup
[pspp] / rust / src / cooked.rs
index 9f6f0101b0ce51535221779b362008edcd311a94..d00f3f3c34f7a6087570c6315e98b0539f82c2ab 100644 (file)
@@ -1,10 +1,11 @@
-use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
+use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range};
 
 use crate::{
+    encoding::{default_encoding, get_encoding, Error as EncodingError},
     endian::Endian,
     format::{Error as FormatError, Spec, UncheckedSpec},
     identifier::{Error as IdError, Identifier},
-    raw::{self, MissingValues, UnencodedStr, VarType},
+    raw::{self, UnencodedStr, VarType},
 };
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
 use encoding_rs::{DecoderResult, Encoding};
@@ -16,8 +17,19 @@ pub use crate::raw::{CategoryLabels, Compression};
 
 #[derive(ThisError, Debug)]
 pub enum Error {
-    #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
-    InvalidVariableWidth { offset: u64, width: i32 },
+    // XXX this is really an internal error and maybe we should change the
+    // interfaces to make it impossible
+    #[error("Missing header record")]
+    MissingHeaderRecord,
+
+    #[error("{0}")]
+    EncodingError(EncodingError),
+
+    #[error("Using default encoding {0}.")]
+    UsingDefaultEncoding(String),
+
+    #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)]
+    InvalidVariableWidth { offsets: Range<u64>, width: i32 },
 
     #[error("This file has corrupted metadata written by a buggy version of PSPP.  To ensure that other software can read it correctly, save a new copy of the file.")]
     InvalidLongMissingValueFormat,
@@ -132,6 +144,16 @@ pub enum Error {
     #[error("Invalid variable name in attribute record.  {0}")]
     InvalidAttributeVariableName(IdError),
 
+    // XXX This is risky because `text` might be arbitarily long.
+    #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+    MalformedString { encoding: String, text: String },
+
+    #[error("Invalid variable measurement level value {0}")]
+    InvalidMeasurement(u32),
+
+    #[error("Invalid variable display alignment value {0}")]
+    InvalidAlignment(u32),
+
     #[error("Details TBD")]
     TBD,
 }
@@ -147,6 +169,7 @@ pub enum Record {
     VariableSets(VariableSetRecord),
     VarDisplay(VarDisplayRecord),
     MultipleResponse(MultipleResponseRecord),
+    LongStringMissingValues(LongStringMissingValuesRecord),
     LongStringValueLabels(LongStringValueLabelRecord),
     Encoding(EncodingRecord),
     NumberOfCases(NumberOfCasesRecord),
@@ -187,24 +210,155 @@ pub struct Decoder {
     n_generated_names: usize,
 }
 
-pub fn decode<T>(headers: Vec<raw::Record>) -> Vec<Record> {
-    let encoding = headers.iter().find_map(|rec| {
-        if let raw::Record::Encoding(ref e) = rec {
-            Some(e.0.as_str())
+pub fn decode(
+    headers: Vec<raw::Record>,
+    encoding: Option<&'static Encoding>,
+    warn: &impl Fn(Error),
+) -> Result<Vec<Record>, Error> {
+    let Some(header_record) = headers.iter().find_map(|rec| {
+        if let raw::Record::Header(header) = rec {
+            Some(header)
         } else {
             None
         }
-    });
-    let character_code = headers.iter().find_map(|rec| {
-        if let raw::Record::IntegerInfo(ref r) = rec {
-            Some(r.character_code)
-        } else {
-            None
+    }) else {
+        return Err(Error::MissingHeaderRecord);
+    };
+    let encoding = match encoding {
+        Some(encoding) => encoding,
+        None => {
+            let encoding = headers.iter().find_map(|rec| {
+                if let raw::Record::Encoding(ref e) = rec {
+                    Some(e.0.as_str())
+                } else {
+                    None
+                }
+            });
+            let character_code = headers.iter().find_map(|rec| {
+                if let raw::Record::IntegerInfo(ref r) = rec {
+                    Some(r.character_code)
+                } else {
+                    None
+                }
+            });
+            match get_encoding(encoding, character_code) {
+                Ok(encoding) => encoding,
+                Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
+                Err(err) => {
+                    warn(Error::EncodingError(err));
+                    // Warn that we're using the default encoding.
+                    default_encoding()
+                }
+            }
         }
-    });
-    
-
-    Vec::new()
+    };
+
+    let mut decoder = Decoder {
+        compression: header_record.compression,
+        endian: header_record.endian,
+        encoding,
+        variables: HashMap::new(),
+        var_names: HashMap::new(),
+        n_dict_indexes: 0,
+        n_generated_names: 0,
+    };
+
+    let mut output = Vec::with_capacity(headers.len());
+    for header in &headers {
+        match header {
+            raw::Record::Header(ref input) => {
+                if let Some(header) = HeaderRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::Header(header))
+                }
+            }
+            raw::Record::Variable(ref input) => {
+                if let Some(variable) = VariableRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::Variable(variable));
+                }
+            }
+            raw::Record::ValueLabel(ref input) => {
+                if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, input, warn)?
+                {
+                    output.push(Record::ValueLabel(value_label));
+                }
+            }
+            raw::Record::Document(ref input) => {
+                if let Some(document) = DocumentRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::Document(document))
+                }
+            }
+            raw::Record::IntegerInfo(ref input) => output.push(Record::IntegerInfo(input.clone())),
+            raw::Record::FloatInfo(ref input) => output.push(Record::FloatInfo(input.clone())),
+            raw::Record::VariableSets(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?));
+            }
+            raw::Record::VarDisplay(ref input) => {
+                if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::VarDisplay(vdr))
+                }
+            }
+            raw::Record::MultipleResponse(ref input) => {
+                if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::MultipleResponse(mrr))
+                }
+            }
+            raw::Record::LongStringMissingValues(ref input) => {
+                if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? {
+                    output.push(Record::LongStringMissingValues(mrr))
+                }
+            }
+            raw::Record::LongStringValueLabels(ref input) => {
+                if let Some(mrr) =
+                    LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)?
+                {
+                    output.push(Record::LongStringValueLabels(mrr))
+                }
+            }
+            raw::Record::Encoding(ref input) => output.push(Record::Encoding(input.clone())),
+            raw::Record::NumberOfCases(ref input) => {
+                output.push(Record::NumberOfCases(input.clone()))
+            }
+            raw::Record::ProductInfo(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?));
+            }
+            raw::Record::LongNames(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::LongNames(LongNameRecord::parse(
+                    &mut decoder,
+                    &s,
+                    warn,
+                )?));
+            }
+            raw::Record::VeryLongStrings(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
+                    &decoder, &s, warn,
+                )?));
+            }
+            raw::Record::FileAttributes(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::FileAttributes(FileAttributeRecord::parse(
+                    &decoder, &s, warn,
+                )?));
+            }
+            raw::Record::VariableAttributes(ref input) => {
+                let s = decoder.decode_string_cow(&input.text.0, warn);
+                output.push(Record::VariableAttributes(VariableAttributeRecord::parse(
+                    &decoder, &s, warn,
+                )?));
+            }
+            raw::Record::OtherExtension(ref input) => {
+                output.push(Record::OtherExtension(input.clone()))
+            }
+            raw::Record::EndOfHeaders(_) => (),
+            raw::Record::ZHeader(_) => (),
+            raw::Record::ZTrailer(_) => (),
+            raw::Record::Case(_) => (),
+        };
+    }
+    Ok(output)
 }
 
 impl Decoder {
@@ -222,7 +376,10 @@ impl Decoder {
     fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
         let (output, malformed) = self.encoding.decode_without_bom_handling(input);
         if malformed {
-            warn(Error::TBD);
+            warn(Error::MalformedString {
+                encoding: self.encoding.name().into(),
+                text: output.clone().into(),
+            });
         }
         output
     }
@@ -238,14 +395,14 @@ impl Decoder {
         Identifier::new(&s, self.encoding)
     }
     fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
-        let max_index = self.n_dict_indexes - 1;
-        if dict_index == 0 || dict_index as usize > max_index {
+        let max_index = self.n_dict_indexes;
+        if dict_index == 0 || dict_index > max_index {
             return Err(Error::InvalidDictIndex {
                 dict_index,
                 max_index,
             });
         }
-        let Some(variable) = self.variables.get(&dict_index) else {
+        let Some(variable) = self.variables.get(&(dict_index - 1)) else {
             return Err(Error::DictIndexIsContinuation(dict_index));
         };
         Ok(variable)
@@ -259,7 +416,7 @@ impl Decoder {
     fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
         if let (s, false) = self.encoding.decode_without_bom_handling(input) {
             // This is the common case.  Usually there will be no errors.
-            s.into()
+            s
         } else {
             // Unusual case.  Don't bother to optimize it much.
             let mut decoder = self.encoding.new_decoder_without_bom_handling();
@@ -289,10 +446,10 @@ impl Decoder {
 pub trait TryDecode: Sized {
     type Input;
     fn try_decode(
-        decoder: &Decoder,
+        decoder: &mut Decoder,
         input: &Self::Input,
         warn: impl Fn(Error),
-    ) -> Result<Self, Error>;
+    ) -> Result<Option<Self>, Error>;
 }
 
 pub trait Decode<Input>: Sized {
@@ -314,23 +471,29 @@ pub struct HeaderRecord {
     pub file_label: String,
 }
 
+fn trim_end_spaces(mut s: String) -> String {
+    s.truncate(s.trim_end_matches(' ').len());
+    s
+}
+
 impl TryDecode for HeaderRecord {
     type Input = crate::raw::HeaderRecord;
 
     fn try_decode(
-        decoder: &Decoder,
+        decoder: &mut Decoder,
         input: &Self::Input,
         warn: impl Fn(Error),
-    ) -> Result<Self, Error> {
-        let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
-        let file_label = decoder.decode_string(&input.file_label.0, &warn);
+    ) -> Result<Option<Self>, Error> {
+        let eye_catcher = trim_end_spaces(decoder.decode_string(&input.eye_catcher.0, &warn));
+        let file_label = trim_end_spaces(decoder.decode_string(&input.file_label.0, &warn));
         let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
-        let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
-            warn(Error::InvalidCreationDate {
-                creation_date: creation_date.into(),
+        let creation_date =
+            NaiveDate::parse_from_str(&creation_date, "%e %b %Y").unwrap_or_else(|_| {
+                warn(Error::InvalidCreationDate {
+                    creation_date: creation_date.into(),
+                });
+                Default::default()
             });
-            Default::default()
-        });
         let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
         let creation_time =
             NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
@@ -339,13 +502,13 @@ impl TryDecode for HeaderRecord {
                 });
                 Default::default()
             });
-        Ok(HeaderRecord {
+        Ok(Some(HeaderRecord {
             eye_catcher,
             weight_index: input.weight_index.map(|n| n as usize),
             n_cases: input.n_cases.map(|n| n as u64),
             creation: NaiveDateTime::new(creation_date, creation_time),
             file_label,
-        })
+        }))
     }
 }
 
@@ -423,6 +586,31 @@ pub struct VariableRecord {
     pub label: Option<String>,
 }
 
+#[derive(Clone, Debug)]
+pub struct MissingValues {
+    /// Individual missing values, up to 3 of them.
+    pub values: Vec<Value>,
+
+    /// Optional range of missing values.
+    pub range: Option<(Value, Value)>,
+}
+
+impl Decode<raw::MissingValues> for MissingValues {
+    fn decode(decoder: &Decoder, input: &raw::MissingValues, _warn: impl Fn(Error)) -> Self {
+        MissingValues {
+            values: input
+                .values
+                .iter()
+                .map(|value| Value::decode(value, decoder))
+                .collect(),
+            range: input
+                .range
+                .as_ref()
+                .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))),
+        }
+    }
+}
+
 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
     UncheckedSpec::try_from(raw)
         .and_then(Spec::try_from)
@@ -434,8 +622,10 @@ fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatErro
         })
 }
 
-impl VariableRecord {
-    pub fn decode(
+impl TryDecode for VariableRecord {
+    type Input = raw::VariableRecord;
+
+    fn try_decode(
         decoder: &mut Decoder,
         input: &crate::raw::VariableRecord,
         warn: impl Fn(Error),
@@ -446,12 +636,13 @@ impl VariableRecord {
             -1 => return Ok(None),
             _ => {
                 return Err(Error::InvalidVariableWidth {
-                    offset: input.offset,
+                    offsets: input.offsets.clone(),
                     width: input.width,
                 })
             }
         };
-        let name = match decoder.decode_identifier(&input.name.0, &warn) {
+        let name = trim_end_spaces(decoder.decode_string(&input.name.0, &warn));
+        let name = match Identifier::new(&name, decoder.encoding) {
             Ok(name) => {
                 if !decoder.var_names.contains_key(&name) {
                     name
@@ -512,7 +703,7 @@ impl VariableRecord {
             name,
             print_format,
             write_format,
-            missing_values: input.missing_values.clone(),
+            missing_values: MissingValues::decode(decoder, &input.missing_values, warn),
             label,
         }))
     }
@@ -525,17 +716,17 @@ impl TryDecode for DocumentRecord {
     type Input = crate::raw::DocumentRecord;
 
     fn try_decode(
-        decoder: &Decoder,
+        decoder: &mut Decoder,
         input: &Self::Input,
         warn: impl Fn(Error),
-    ) -> Result<Self, Error> {
-        Ok(DocumentRecord(
+    ) -> Result<Option<Self>, Error> {
+        Ok(Some(DocumentRecord(
             input
                 .lines
                 .iter()
-                .map(|s| decoder.decode_string(&s.0, &warn))
+                .map(|s| trim_end_spaces(decoder.decode_string(&s.0, &warn)))
                 .collect(),
-        ))
+        )))
     }
 }
 
@@ -586,7 +777,7 @@ pub enum Value {
 }
 
 impl Value {
-    pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
+    pub fn decode(raw: &raw::Value, decoder: &Decoder) -> Self {
         match raw {
             raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
             raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
@@ -607,14 +798,14 @@ pub struct ValueLabelRecord {
     pub variables: Vec<Identifier>,
 }
 
-impl ValueLabelRecord {
-    pub fn decode(
+impl TryDecode for ValueLabelRecord {
+    type Input = crate::raw::ValueLabelRecord;
+    fn try_decode(
         decoder: &mut Decoder,
-        raw_value_label: &crate::raw::ValueLabelRecord,
-        dict_indexes: &crate::raw::VarIndexRecord,
+        input: &Self::Input,
         warn: impl Fn(Error),
     ) -> Result<Option<ValueLabelRecord>, Error> {
-        let variables: Vec<&Variable> = dict_indexes
+        let variables: Vec<&Variable> = input
             .dict_indexes
             .iter()
             .filter_map(|&dict_index| {
@@ -651,14 +842,14 @@ impl ValueLabelRecord {
                 return Ok(None);
             }
         }
-        let labels = raw_value_label
+        let labels = input
             .labels
             .iter()
             .map(|(value, label)| {
                 let label = decoder.decode_string(&label.0, &warn);
                 let value = Value::decode(
-                    raw::Value::from_raw(*value, var_type, decoder.endian),
-                    &decoder,
+                    &raw::Value::from_raw(value, var_type, decoder.endian),
+                    decoder,
                 );
                 ValueLabel { value, label }
             })
@@ -709,10 +900,10 @@ pub struct LongName {
 
 impl LongName {
     fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
-        let short_name = Identifier::new(short_name, decoder.encoding)
-            .map_err(|e| Error::InvalidShortName(e))?;
+        let short_name =
+            Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?;
         let long_name =
-            Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
+            Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?;
         Ok(LongName {
             short_name,
             long_name,
@@ -752,16 +943,13 @@ impl VeryLongString {
         let Some((short_name, length)) = input.split_once('=') else {
             return Err(Error::TBD);
         };
-        let short_name = Identifier::new(short_name, decoder.encoding)
-            .map_err(|e| Error::InvalidLongStringName(e))?;
+        let short_name =
+            Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?;
         let length: u16 = length.parse().map_err(|_| Error::TBD)?;
         if length > VarWidth::MAX_STRING {
             return Err(Error::TBD);
         }
-        Ok(VeryLongString {
-            short_name: short_name.into(),
-            length,
-        })
+        Ok(VeryLongString { short_name, length })
     }
 }
 
@@ -815,7 +1003,7 @@ impl Attribute {
             }
             if let Some(rest) = rest.strip_prefix(')') {
                 let attribute = Identifier::new(name, decoder.encoding)
-                    .map_err(|e| Error::InvalidAttributeName(e))
+                    .map_err(Error::InvalidAttributeName)
                     .warn_on_error(warn)
                     .map(|name| Attribute { name, values });
                 return Ok((attribute, rest));
@@ -883,7 +1071,7 @@ impl VarAttributeSet {
         };
         let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
         let var_attribute = Identifier::new(long_var_name, decoder.encoding)
-            .map_err(|e| Error::InvalidAttributeVariableName(e))
+            .map_err(Error::InvalidAttributeVariableName)
             .warn_on_error(warn)
             .map(|name| VarAttributeSet {
                 long_var_name: name,
@@ -921,6 +1109,18 @@ pub enum Measure {
     Scale,
 }
 
+impl Measure {
+    fn try_decode(source: u32) -> Result<Option<Measure>, Error> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Measure::Nominal)),
+            2 => Ok(Some(Measure::Ordinal)),
+            3 => Ok(Some(Measure::Scale)),
+            _ => Err(Error::InvalidMeasurement(source)),
+        }
+    }
+}
+
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Alignment {
     Left,
@@ -928,16 +1128,67 @@ pub enum Alignment {
     Center,
 }
 
+impl Alignment {
+    fn try_decode(source: u32) -> Result<Option<Alignment>, Error> {
+        match source {
+            0 => Ok(None),
+            1 => Ok(Some(Alignment::Left)),
+            2 => Ok(Some(Alignment::Right)),
+            3 => Ok(Some(Alignment::Center)),
+            _ => Err(Error::InvalidAlignment(source)),
+        }
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct VarDisplay {
     pub measure: Option<Measure>,
-    pub width: u32,
-    pub align: Option<Alignment>,
+    pub width: Option<u32>,
+    pub alignment: Option<Alignment>,
 }
 
 #[derive(Clone, Debug)]
 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
 
+impl TryDecode for VarDisplayRecord {
+    type Input = raw::VarDisplayRecord;
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let n_vars = decoder.variables.len();
+        let n_per_var = if input.0.len() == 3 * n_vars {
+            3
+        } else if input.0.len() == 2 * n_vars {
+            2
+        } else {
+            return Err(Error::TBD);
+        };
+
+        let var_displays = input
+            .0
+            .chunks(n_per_var)
+            .map(|chunk| {
+                let (measure, width, alignment) = match n_per_var == 3 {
+                    true => (chunk[0], Some(chunk[1]), chunk[2]),
+                    false => (chunk[0], None, chunk[1]),
+                };
+                let measure = Measure::try_decode(measure).warn_on_error(&warn).flatten();
+                let alignment = Alignment::try_decode(alignment)
+                    .warn_on_error(&warn)
+                    .flatten();
+                VarDisplay {
+                    measure,
+                    width,
+                    alignment,
+                }
+            })
+            .collect();
+        Ok(Some(VarDisplayRecord(var_displays)))
+    }
+}
+
 #[derive(Clone, Debug)]
 pub enum MultipleResponseType {
     MultipleDichotomy {
@@ -1011,7 +1262,7 @@ impl MultipleResponseSet {
     ) -> Result<Self, Error> {
         let mr_set_name = decoder
             .decode_identifier(&input.name.0, warn)
-            .map_err(|error| Error::InvalidMrSetName(error))?;
+            .map_err(Error::InvalidMrSetName)?;
 
         let label = decoder.decode_string(&input.label.0, warn);
 
@@ -1070,10 +1321,10 @@ impl TryDecode for MultipleResponseRecord {
     type Input = raw::MultipleResponseRecord;
 
     fn try_decode(
-        decoder: &Decoder,
+        decoder: &mut Decoder,
         input: &Self::Input,
         warn: impl Fn(Error),
-    ) -> Result<Self, Error> {
+    ) -> Result<Option<Self>, Error> {
         let mut sets = Vec::with_capacity(input.0.len());
         for set in &input.0 {
             match MultipleResponseSet::decode(decoder, set, &warn) {
@@ -1081,7 +1332,57 @@ impl TryDecode for MultipleResponseRecord {
                 Err(error) => warn(error),
             }
         }
-        Ok(MultipleResponseRecord(sets))
+        Ok(Some(MultipleResponseRecord(sets)))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValues {
+    /// Variable name.
+    pub var_name: Identifier,
+
+    /// Missing values.
+    pub missing_values: MissingValues,
+}
+
+impl LongStringMissingValues {
+    fn decode(
+        decoder: &Decoder,
+        input: &raw::LongStringMissingValues,
+        warn: &impl Fn(Error),
+    ) -> Result<Self, Error> {
+        let var_name = decoder.decode_string(&input.var_name.0, warn);
+        let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+            .map_err(Error::InvalidLongStringValueLabelName)?;
+
+        let missing_values = MissingValues::decode(decoder, &input.missing_values, warn);
+
+        Ok(LongStringMissingValues {
+            var_name,
+            missing_values
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringMissingValuesRecord(Vec<LongStringMissingValues>);
+
+impl TryDecode for LongStringMissingValuesRecord {
+    type Input = raw::LongStringMissingValueSet;
+
+    fn try_decode(
+        decoder: &mut Decoder,
+        input: &Self::Input,
+        warn: impl Fn(Error),
+    ) -> Result<Option<Self>, Error> {
+        let mut labels = Vec::with_capacity(input.0.len());
+        for label in &input.0 {
+            match LongStringMissingValues::decode(decoder, label, &warn) {
+                Ok(set) => labels.push(set),
+                Err(error) => warn(error),
+            }
+        }
+        Ok(Some(LongStringMissingValuesRecord(labels)))
     }
 }
 
@@ -1098,15 +1399,15 @@ impl LongStringValueLabels {
         input: &raw::LongStringValueLabels,
         warn: &impl Fn(Error),
     ) -> Result<Self, Error> {
-        let var_name = decoder
-            .decode_identifier(&input.var_name.0, warn)
-            .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
+        let var_name = decoder.decode_string(&input.var_name.0, warn);
+        let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+            .map_err(Error::InvalidLongStringValueLabelName)?;
 
         let min_width = 9;
         let max_width = VarWidth::MAX_STRING;
         if input.width < 9 || input.width > max_width as u32 {
             return Err(Error::InvalidLongValueLabelWidth {
-                name: var_name.into(),
+                name: var_name,
                 width: input.width,
                 min_width,
                 max_width,
@@ -1136,10 +1437,10 @@ impl TryDecode for LongStringValueLabelRecord {
     type Input = raw::LongStringValueLabelRecord;
 
     fn try_decode(
-        decoder: &Decoder,
+        decoder: &mut Decoder,
         input: &Self::Input,
         warn: impl Fn(Error),
-    ) -> Result<Self, Error> {
+    ) -> Result<Option<Self>, Error> {
         let mut labels = Vec::with_capacity(input.0.len());
         for label in &input.0 {
             match LongStringValueLabels::decode(decoder, label, &warn) {
@@ -1147,7 +1448,7 @@ impl TryDecode for LongStringValueLabelRecord {
                 Err(error) => warn(error),
             }
         }
-        Ok(LongStringValueLabelRecord(labels))
+        Ok(Some(LongStringValueLabelRecord(labels)))
     }
 }