work
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 20 Aug 2023 20:57:25 +0000 (13:57 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 20 Aug 2023 20:57:25 +0000 (13:57 -0700)
rust/src/cooked.rs
rust/src/format.rs
rust/src/identifier.rs
rust/src/raw.rs

index 93fe21f2bd39edbb0963ea35c94445439dad530b..fc7c44f0cdc18025dc73912229ab38e0d1cd1db9 100644 (file)
@@ -1,23 +1,23 @@
-use std::{borrow::Cow, collections::{HashSet, HashMap}, cmp::Ordering};
+use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
 
-use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
-use encoding_rs::Encoding;
-use num::integer::div_ceil;
 use crate::{
-    format::{Spec, UncheckedSpec},
+    format::{Error as FormatError, Spec, UncheckedSpec},
     identifier::{Error as IdError, Identifier},
     raw::{self, MissingValues, VarType},
     {endian::Endian, Compression},
 };
+use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
+use encoding_rs::{DecoderResult, Encoding};
+use num::integer::div_ceil;
 use thiserror::Error as ThisError;
 
 #[derive(ThisError, Debug)]
 pub enum Error {
     #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
-    BadVariableWidth { offset: u64, width: i32 },
+    InvalidVariableWidth { offset: u64, width: i32 },
 
     #[error("This file has corrupted metadata written by a buggy version of PSPP.  To ensure that other software can read it correctly, save a new copy of the file.")]
-    BadLongMissingValueFormat,
+    InvalidLongMissingValueFormat,
 
     #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format.  Using 01 Jan 1970.")]
     InvalidCreationDate { creation_date: String },
@@ -25,49 +25,88 @@ pub enum Error {
     #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format.  Using midnight.")]
     InvalidCreationTime { creation_time: String },
 
-    #[error("Invalid variable name: {0}")]
-    BadIdentifier(#[from] IdError),
+    #[error("{id_error}  Renaming variable to {new_name}.")]
+    InvalidVariableName {
+        id_error: IdError,
+        new_name: Identifier,
+    },
+
+    #[error(
+        "Substituting {new_spec} for invalid print format on variable {variable}.  {format_error}"
+    )]
+    InvalidPrintFormat {
+        new_spec: Spec,
+        variable: Identifier,
+        format_error: FormatError,
+    },
+
+    #[error(
+        "Substituting {new_spec} for invalid write format on variable {variable}.  {format_error}"
+    )]
+    InvalidWriteFormat {
+        new_spec: Spec,
+        variable: Identifier,
+        format_error: FormatError,
+    },
+
+    #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
+    DuplicateVariableName {
+        duplicate_name: Identifier,
+        new_name: Identifier,
+    },
+
+    #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
+    InvalidDictIndex { dict_index: usize, max_index: usize },
+
+    #[error("Dictionary index {0} refers to a long string continuation.")]
+    DictIndexIsContinuation(usize),
+
+    #[error("Variables associated with value label are not all of identical type.  Variable {numeric_var} is numeric, but variable {string_var} is string.")]
+    ValueLabelsDifferentTypes {
+        numeric_var: Identifier,
+        string_var: Identifier,
+    },
+
+    #[error(
+        "Value labels may not be added to long string variable {0} using record types 3 or 4."
+    )]
+    InvalidLongStringValueLabel(Identifier),
 
     #[error("Details TBD")]
     TBD,
 }
 
+type DictIndex = usize;
+
+pub struct Variable {
+    pub dict_index: DictIndex,
+    pub short_name: Identifier,
+    pub long_name: Option<Identifier>,
+    pub width: VarWidth,
+}
+
 pub struct Decoder {
     pub compression: Option<Compression>,
     pub endian: Endian,
     pub encoding: &'static Encoding,
-    pub var_names: HashSet<Identifier>,
-    pub dict_indexes: HashMap<usize, Identifier>,
+    pub variables: HashMap<DictIndex, Variable>,
+    pub var_names: HashMap<Identifier, DictIndex>,
     n_dict_indexes: usize,
     n_generated_names: usize,
 }
 
 impl Decoder {
-    fn take_name(&mut self, id: &Identifier) -> bool {
-        self.var_names.insert(id.clone())
-    }
     fn generate_name(&mut self) -> Identifier {
         loop {
             self.n_generated_names += 1;
             let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
                 .unwrap();
-            if self.take_name(&name) {
+            if !self.var_names.contains_key(&name) {
                 return name;
             }
             assert!(self.n_generated_names < usize::MAX);
         }
     }
-    fn take_dict_indexes(&mut self, id: &Identifier, width: VarWidth) -> usize {
-        let n = match width {
-            VarWidth::Numeric => 1,
-            VarWidth::String(w) => div_ceil(w as usize, 8),
-        };
-        let dict_index = self.n_dict_indexes;
-        self.dict_indexes.insert(self.n_dict_indexes, id.clone());
-        self.n_dict_indexes += n;
-        dict_index
-        
-    }
     fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
         let (output, malformed) = self.encoding.decode_without_bom_handling(input);
         if malformed {
@@ -75,6 +114,53 @@ impl Decoder {
         }
         output
     }
+    fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
+        let max_index = self.n_dict_indexes - 1;
+        if dict_index == 0 || dict_index as usize > max_index {
+            return Err(Error::InvalidDictIndex {
+                dict_index,
+                max_index,
+            });
+        }
+        let Some(variable) = self.variables.get(&dict_index) else {
+            return Err(Error::DictIndexIsContinuation(dict_index));
+        };
+        Ok(variable)
+    }
+
+    /// Returns `input` decoded from `self.encoding` into UTF-8 such that
+    /// re-encoding the result back into `self.encoding` will have exactly the
+    /// same length in bytes.
+    ///
+    /// XXX warn about errors?
+    fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
+        if let (s, false) = self.encoding.decode_without_bom_handling(input) {
+            // This is the common case.  Usually there will be no errors.
+            s.into()
+        } else {
+            // Unusual case.  Don't bother to optimize it much.
+            let mut decoder = self.encoding.new_decoder_without_bom_handling();
+            let mut output = String::with_capacity(
+                decoder
+                    .max_utf8_buffer_length_without_replacement(input.len())
+                    .unwrap(),
+            );
+            let mut rest = input;
+            while !rest.is_empty() {
+                match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
+                    (DecoderResult::InputEmpty, _) => break,
+                    (DecoderResult::OutputFull, _) => unreachable!(),
+                    (DecoderResult::Malformed(a, b), consumed) => {
+                        let skipped = a as usize + b as usize;
+                        output.extend(repeat('?').take(skipped));
+                        rest = &rest[consumed..];
+                    }
+                }
+            }
+            assert_eq!(self.encoding.encode(&output).0.len(), input.len());
+            output.into()
+        }
+    }
 }
 
 pub trait Decode: Sized {
@@ -138,6 +224,15 @@ impl PartialOrd for VarWidth {
     }
 }
 
+impl VarWidth {
+    fn n_dict_indexes(self) -> usize {
+        match self {
+            VarWidth::Numeric => 1,
+            VarWidth::String(w) => div_ceil(w as usize, 8),
+        }
+    }
+}
+
 impl From<VarWidth> for VarType {
     fn from(source: VarWidth) -> Self {
         match source {
@@ -147,7 +242,7 @@ impl From<VarWidth> for VarType {
     }
 }
 
-pub struct Variable {
+pub struct VariableRecord {
     pub width: VarWidth,
     pub name: Identifier,
     pub print_format: Spec,
@@ -156,28 +251,29 @@ pub struct Variable {
     pub label: Option<String>,
 }
 
-fn decode_format(raw: raw::Spec, name: &str, width: VarWidth) -> Spec {
+fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
     UncheckedSpec::try_from(raw)
         .and_then(Spec::try_from)
-        .and_then(|x| x.check_width_compatibility(Some(name), width))
-        .unwrap_or_else(|_warning| {
-            /*warn(warning);*/
-            Spec::default_for_width(width)
+        .and_then(|x| x.check_width_compatibility(width))
+        .unwrap_or_else(|error| {
+            let new_format = Spec::default_for_width(width);
+            warn(new_format, error);
+            new_format
         })
 }
 
-impl Variable {
+impl VariableRecord {
     pub fn decode(
         decoder: &mut Decoder,
         input: &crate::raw::Variable,
         warn: impl Fn(Error),
-    ) -> Result<Option<Variable>, Error> {
+    ) -> Result<Option<VariableRecord>, Error> {
         let width = match input.width {
             0 => VarWidth::Numeric,
             w @ 1..=255 => VarWidth::String(w as u16),
             -1 => return Ok(None),
             _ => {
-                return Err(Error::BadVariableWidth {
+                return Err(Error::InvalidVariableWidth {
                     offset: input.offset,
                     width: input.width,
                 })
@@ -186,25 +282,61 @@ impl Variable {
         let name = decoder.decode_string(&input.name.0, &warn);
         let name = match Identifier::new(&name, decoder.encoding) {
             Ok(name) => {
-                if !decoder.take_name(&name) {
-                    decoder.generate_name()
-                } else {
+                if !decoder.var_names.contains_key(&name) {
                     name
+                } else {
+                    let new_name = decoder.generate_name();
+                    warn(Error::DuplicateVariableName {
+                        duplicate_name: name.clone(),
+                        new_name: new_name.clone(),
+                    });
+                    new_name
                 }
             }
-            Err(error) => {
-                warn(error.into());
-                decoder.generate_name()
+            Err(id_error) => {
+                let new_name = decoder.generate_name();
+                warn(Error::InvalidVariableName {
+                    id_error,
+                    new_name: new_name.clone(),
+                });
+                new_name
             }
         };
-        let print_format = decode_format(input.print_format, &name.0, width);
-        let write_format = decode_format(input.write_format, &name.0, width);
+        let variable = Variable {
+            dict_index: decoder.n_dict_indexes,
+            short_name: name.clone(),
+            long_name: None,
+            width,
+        };
+        decoder.n_dict_indexes += width.n_dict_indexes();
+        assert!(decoder
+            .var_names
+            .insert(name.clone(), variable.dict_index)
+            .is_none());
+        assert!(decoder
+            .variables
+            .insert(variable.dict_index, variable)
+            .is_none());
+
+        let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
+            warn(Error::InvalidPrintFormat {
+                new_spec,
+                variable: name.clone(),
+                format_error,
+            })
+        });
+        let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
+            warn(Error::InvalidWriteFormat {
+                new_spec,
+                variable: name.clone(),
+                format_error,
+            })
+        });
         let label = input
             .label
             .as_ref()
             .map(|label| decoder.decode_string(&label.0, &warn).into());
-        decoder.take_dict_indexes(&name, width);
-        Ok(Some(Variable {
+        Ok(Some(VariableRecord {
             width,
             name,
             print_format,
@@ -259,11 +391,107 @@ impl VariableSet {
     }
 }
 
-/*
+#[derive(Clone)]
+pub enum Value {
+    Number(Option<f64>),
+    String(String),
+}
+
+impl Value {
+    pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
+        match raw {
+            raw::Value::Number(x) => Value::Number(x),
+            raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
+        }
+    }
+}
+
 pub struct ValueLabelRecord {
-    pub labels: Vec<(
+    pub var_type: VarType,
+    pub labels: Vec<(Value, String)>,
+    pub variables: Vec<Identifier>,
+}
+
+trait WarnOnError<T> {
+    fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
+}
+impl<T> WarnOnError<T> for Result<T, Error> {
+    fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
+        match self {
+            Ok(result) => Some(result),
+            Err(error) => {
+                warn(error);
+                None
+            }
+        }
+    }
+}
+
+impl ValueLabelRecord {
+    pub fn decode(
+        decoder: &mut Decoder,
+        raw_value_label: &crate::raw::ValueLabel,
+        dict_indexes: &crate::raw::VarIndexes,
+        warn: impl Fn(Error),
+    ) -> Result<Option<ValueLabelRecord>, Error> {
+        let variables: Vec<&Variable> = dict_indexes
+            .dict_indexes
+            .iter()
+            .filter_map(|&dict_index| {
+                decoder
+                    .get_var_by_index(dict_index as usize)
+                    .warn_on_error(&warn)
+            })
+            .filter(|&variable| match variable.width {
+                VarWidth::String(width) if width > 8 => {
+                    warn(Error::InvalidLongStringValueLabel(
+                        variable.short_name.clone(),
+                    ));
+                    false
+                }
+                _ => true,
+            })
+            .collect();
+        let mut i = variables.iter();
+        let Some(&first_var) = i.next() else {
+            return Ok(None);
+        };
+        let var_type: VarType = first_var.width.into();
+        for &variable in i {
+            let this_type: VarType = variable.width.into();
+            if var_type != this_type {
+                let (numeric_var, string_var) = match var_type {
+                    VarType::Numeric => (first_var, variable),
+                    VarType::String => (variable, first_var),
+                };
+                warn(Error::ValueLabelsDifferentTypes {
+                    numeric_var: numeric_var.short_name.clone(),
+                    string_var: string_var.short_name.clone(),
+                });
+                return Ok(None);
+            }
+        }
+        let labels = raw_value_label
+            .labels
+            .iter()
+            .map(|(value, label)| {
+                let label = decoder.decode_string(&label.0, &warn);
+                let value = Value::decode(raw::Value::from_raw(*value, var_type, decoder.endian), &decoder);
+                (value, label.into())
+            })
+            .collect();
+        let variables = variables
+            .iter()
+            .map(|&variable| variable.short_name.clone())
+            .collect();
+        Ok(Some(ValueLabelRecord {
+            var_type,
+            labels,
+            variables,
+        }))
+    }
 }
-*/
+
 pub struct VariableSetRecord(Vec<VariableSet>);
 
 impl TextRecord for VariableSetRecord {
@@ -271,9 +499,8 @@ impl TextRecord for VariableSetRecord {
     fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
         let mut sets = Vec::new();
         for line in input.lines() {
-            match VariableSet::parse(line) {
-                Ok(set) => sets.push(set),
-                Err(error) => warn(error),
+            if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
+                sets.push(set)
             }
         }
         Ok(VariableSetRecord(sets))
@@ -344,9 +571,8 @@ impl TextRecord for VeryLongStringRecord {
             .map(|s| s.trim_end_matches('\t'))
             .filter(|s| !s.is_empty())
         {
-            match VeryLongString::parse(tuple) {
-                Ok(vls) => very_long_strings.push(vls),
-                Err(error) => warn(error),
+            if let Some(vls) = VeryLongString::parse(tuple).warn_on_error(&warn) {
+                very_long_strings.push(vls)
             }
         }
         Ok(VeryLongStringRecord(very_long_strings))
@@ -459,16 +685,13 @@ impl TextRecord for VariableAttributeRecord {
     fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
         let mut var_attribute_sets = Vec::new();
         while !input.is_empty() {
-            match VarAttributeSet::parse(input, &warn) {
-                Ok((var_attribute, rest)) => {
-                    var_attribute_sets.push(var_attribute);
-                    input = rest;
-                }
-                Err(error) => {
-                    warn(error);
-                    break;
-                }
-            }
+            let Some((var_attribute, rest)) =
+                VarAttributeSet::parse(input, &warn).warn_on_error(&warn)
+            else {
+                break;
+            };
+            var_attribute_sets.push(var_attribute);
+            input = rest;
         }
         Ok(VariableAttributeRecord(var_attribute_sets))
     }
@@ -493,3 +716,28 @@ pub struct VarDisplay {
 }
 
 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
+
+#[cfg(test)]
+mod test {
+    use encoding_rs::WINDOWS_1252;
+
+    #[test]
+    fn test() {
+        let mut s = String::new();
+        s.push(char::REPLACEMENT_CHARACTER);
+        let encoded = WINDOWS_1252.encode(&s).0;
+        let decoded = WINDOWS_1252.decode(&encoded[..]).0;
+        println!("{:?}", decoded);
+    }
+
+    #[test]
+    fn test2() {
+        let charset: Vec<u8> = (0..=255).collect();
+        println!("{}", charset.len());
+        let decoded = WINDOWS_1252.decode(&charset[..]).0;
+        println!("{}", decoded.len());
+        let encoded = WINDOWS_1252.encode(&decoded[..]).0;
+        println!("{}", encoded.len());
+        assert_eq!(&charset[..], &encoded[..]);
+    }
+}
index 0fc82a3987670b0cf08b79e1d386df78057d0a5d..34798ed65af54c5072f717de94381fdd74827414 100644 (file)
@@ -12,7 +12,7 @@ use crate::{
 
 #[derive(ThisError, Debug)]
 pub enum Error {
-    #[error("Unknown format type {value}")]
+    #[error("Unknown format type {value}.")]
     UnknownFormat { value: u16 },
 
     #[error("Output format {0} specifies width {}, but {} requires an even width.", .0.w, .0.format)]
@@ -39,12 +39,6 @@ pub enum Error {
     #[error("Numeric variable is not compatible with string format {0}.")]
     UnnamedVariableNotCompatibleWithStringFormat(Format),
 
-    #[error("String variable {variable} is not compatible with numeric format {format}.")]
-    NamedVariableNotCompatibleWithNumericFormat { variable: String, format: Format },
-
-    #[error("Numeric variable {variable} is not compatible with string format {format}.")]
-    NamedVariableNotCompatibleWithStringFormat { variable: String, format: Format },
-
     #[error("String variable {variable} with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
     NamedStringVariableBadSpecWidth {
         variable: String,
@@ -312,32 +306,14 @@ impl Format {
 
     /// Checks whether this format is valid for a variable with the given
     /// `var_type`.
-    pub fn check_type_compatibility(
-        self,
-        variable: Option<&str>,
-        var_type: VarType,
-    ) -> Result<(), Error> {
+    pub fn check_type_compatibility(self, var_type: VarType) -> Result<(), Error> {
         let my_type = self.var_type();
         match (my_type, var_type) {
             (VarType::Numeric, VarType::String) => {
-                if let Some(variable) = variable {
-                    Err(Error::NamedVariableNotCompatibleWithNumericFormat {
-                        variable: variable.into(),
-                        format: self,
-                    })
-                } else {
-                    Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self))
-                }
+                Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self))
             }
             (VarType::String, VarType::Numeric) => {
-                if let Some(variable) = variable {
-                    Err(Error::NamedVariableNotCompatibleWithStringFormat {
-                        variable: variable.into(),
-                        format: self,
-                    })
-                } else {
-                    Err(Error::UnnamedVariableNotCompatibleWithStringFormat(self))
-                }
+                Err(Error::UnnamedVariableNotCompatibleWithStringFormat(self))
             }
             _ => Ok(()),
         }
@@ -450,14 +426,10 @@ impl Spec {
 
     /// Checks whether this format specification is valid for a variable with
     /// width `var_width`.
-    pub fn check_width_compatibility(
-        self,
-        variable: Option<&str>,
-        var_width: VarWidth,
-    ) -> Result<Self, Error> {
+    pub fn check_width_compatibility(self, var_width: VarWidth) -> Result<Self, Error> {
         // Verify that the format is right for the variable's type.
         self.format
-            .check_type_compatibility(variable, var_width.into())?;
+            .check_type_compatibility(var_width.into())?;
 
         if let VarWidth::String(w) = var_width {
             if var_width != self.var_width() {
@@ -467,20 +439,11 @@ impl Spec {
                 } else {
                     Spec { w: w * 2, ..self }
                 };
-                if let Some(variable) = variable {
-                    return Err(Error::NamedStringVariableBadSpecWidth {
-                        variable: variable.into(),
-                        width: w,
-                        bad_spec,
-                        good_spec,
-                    });
-                } else {
-                    return Err(Error::UnnamedStringVariableBadSpecWidth {
-                        width: w,
-                        bad_spec,
-                        good_spec,
-                    });
-                }
+                return Err(Error::UnnamedStringVariableBadSpecWidth {
+                    width: w,
+                    bad_spec,
+                    good_spec,
+                });
             }
         }
 
index 0553b41b119e2e8e5eb74d81f380400aef63dd25..5b0649f533ac99f56c1bbaf9101ef5404fa90142 100644 (file)
@@ -1,3 +1,5 @@
+use std::fmt::{Display, Formatter, Result as FmtResult};
+
 use encoding_rs::{EncoderResult, Encoding};
 use finl_unicode::categories::{CharacterCategories, MajorCategory};
 use thiserror::Error as ThisError;
@@ -28,9 +30,6 @@ impl IdentifierChar for char {
     }
 }
 
-#[derive(Clone, PartialEq, Eq, Debug, Hash)]
-pub struct Identifier(pub UniCase<String>);
-
 #[derive(Clone, Debug, ThisError)]
 pub enum Error {
     #[error("Identifier cannot be empty string.")]
@@ -72,6 +71,9 @@ fn is_reserved_word(s: &str) -> bool {
     false
 }
 
+#[derive(Clone, PartialEq, Eq, Debug, Hash)]
+pub struct Identifier(pub UniCase<String>);
+
 impl Identifier {
     /// Maximum length of an identifier, in bytes.  The limit applies in the
     /// encoding used by the dictionary, not in UTF-8.
@@ -118,3 +120,9 @@ impl Identifier {
         Ok(())
     }
 }
+
+impl Display for Identifier {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}", self.0)
+    }
+}
index 9fb92f52b07defbb4467e2942d567f1e0a7f53c2..8fa0f26bcf208b03a9303c8ed1f30cbceab1b166 100644 (file)
@@ -183,7 +183,7 @@ pub struct Header {
     /// Compression type, if any,
     pub compression: Option<Compression>,
 
-    /// 0-based variable index of the weight variable, or `None` if the file is
+    /// 1-based variable index of the weight variable, or `None` if the file is
     /// unweighted.
     pub weight_index: Option<u32>,
 
@@ -256,7 +256,7 @@ impl Header {
         };
 
         let weight_index: u32 = endian.parse(read_bytes(r)?);
-        let weight_index = (weight_index > 0).then(|| weight_index - 1);
+        let weight_index = (weight_index > 0).then_some(weight_index);
 
         let n_cases: u32 = endian.parse(read_bytes(r)?);
         let n_cases = (n_cases < i32::MAX as u32 / 2).then_some(n_cases);
@@ -500,14 +500,14 @@ impl Debug for Value {
 
 impl Value {
     fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Value, IoError> {
-        Ok(Self::from_raw(var_type, read_bytes(r)?, endian))
+        Ok(Self::from_raw(UntypedValue(read_bytes(r)?), var_type, endian))
     }
 
-    pub fn from_raw(var_type: VarType, raw: [u8; 8], endian: Endian) -> Value {
+    pub fn from_raw(raw: UntypedValue, var_type: VarType, endian: Endian) -> Value {
         match var_type {
-            VarType::String => Value::String(UnencodedStr(raw)),
+            VarType::String => Value::String(UnencodedStr(raw.0)),
             VarType::Numeric => {
-                let number: f64 = endian.parse(raw);
+                let number: f64 = endian.parse(raw.0);
                 Value::Number((number != -f64::MAX).then_some(number))
             }
         }
@@ -533,7 +533,7 @@ impl Value {
                     });
                 }
             };
-            values.push(Value::from_raw(var_type, raw, endian));
+            values.push(Value::from_raw(UntypedValue(raw), var_type, endian));
         }
         Ok(Some(values))
     }
@@ -583,7 +583,7 @@ impl Value {
                             });
                         }
                     }
-                    253 => break Value::from_raw(var_type, read_bytes(reader)?, endian),
+                    253 => break Value::from_raw(UntypedValue(read_bytes(reader)?), var_type, endian),
                     254 => match var_type {
                         VarType::String => break Value::String(UnencodedStr(*b"        ")), // XXX EBCDIC
                         VarType::Numeric => {
@@ -1023,15 +1023,15 @@ pub struct VarIndexes {
     /// Offset from the start of the file to the start of the record.
     pub offset: u64,
 
-    /// The 0-based indexes of the variable indexes.
-    pub var_indexes: Vec<u32>,
+    /// The 1-based indexes of the variable indexes.
+    pub dict_indexes: Vec<u32>,
 }
 
 impl Debug for VarIndexes {
     fn fmt(&self, f: &mut Formatter) -> FmtResult {
         write!(f, "apply to variables")?;
-        for var_index in self.var_indexes.iter() {
-            write!(f, " #{var_index}")?;
+        for dict_index in self.dict_indexes.iter() {
+            write!(f, " #{dict_index}")?;
         }
         Ok(())
     }
@@ -1051,14 +1051,14 @@ impl VarIndexes {
                 max: VarIndexes::MAX,
             });
         }
-        let mut var_indexes = Vec::with_capacity(n as usize);
+        let mut dict_indexes = Vec::with_capacity(n as usize);
         for _ in 0..n {
-            var_indexes.push(endian.parse(read_bytes(r)?));
+            dict_indexes.push(endian.parse(read_bytes(r)?));
         }
 
         Ok(VarIndexes {
             offset,
-            var_indexes,
+            dict_indexes,
         })
     }
 }