improve value parsing
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 14 Oct 2025 20:57:57 +0000 (13:57 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 14 Oct 2025 20:57:57 +0000 (13:57 -0700)
rust/pspp/src/output/spv/light.rs

index 05b5747ac95198e1631dd43636dbaaefc0b5df1c..22a01e8386b8a86f137dab0ac09e8871bc7c6667 100644 (file)
@@ -7,7 +7,10 @@ use std::{
     sync::Arc,
 };
 
-use binrw::{BinRead, BinResult, Endian, Error as BinError, VecArgs, binread, io::TakeSeekExt};
+use binrw::{
+    BinRead, BinResult, Endian, Error as BinError, VecArgs, binread, error::ContextExt,
+    io::TakeSeekExt,
+};
 use chrono::DateTime;
 use displaydoc::Display;
 use encoding_rs::{Encoding, WINDOWS_1252};
@@ -267,9 +270,9 @@ struct Titles {
     user_title: Value,
     #[br(temp)]
     _3: Optional<One>,
-    #[br(parse_with(parse_optional), args(version))]
+    #[br(parse_with(parse_explicit_optional), args(version))]
     corner_text: Option<Value>,
-    #[br(parse_with(parse_optional), args(version))]
+    #[br(parse_with(parse_explicit_optional), args(version))]
     caption: Option<Value>,
 }
 
@@ -284,7 +287,7 @@ struct One;
 struct Zero;
 
 #[binrw::parser(reader, endian)]
-pub fn parse_optional<'a, T, A>(args: A, ...) -> BinResult<Option<T>>
+pub fn parse_explicit_optional<'a, T, A>(args: A, ...) -> BinResult<Option<T>>
 where
     T: BinRead<Args<'a> = A>,
 {
@@ -315,7 +318,7 @@ where
 struct Footnote {
     #[br(args(version))]
     text: Value,
-    #[br(parse_with(parse_optional))]
+    #[br(parse_with(parse_explicit_optional))]
     #[br(args(version))]
     marker: Option<Value>,
     show: i32,
@@ -634,35 +637,6 @@ impl Sizing {
     }
 }
 
-#[derive(Debug)]
-struct Value(RawValue);
-
-impl Value {
-    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
-        self.0.decode(encoding, footnotes)
-    }
-}
-
-impl BinRead for Value {
-    type Args<'a> = (Version,);
-
-    fn read_options<R: Read + Seek>(
-        reader: &mut R,
-        endian: binrw::Endian,
-        (version,): (Version,),
-    ) -> BinResult<Self> {
-        let start = reader.stream_position()?;
-        for i in 0..4 {
-            let x = <u8>::read_options(reader, endian, ())?;
-            if x != 0 {
-                reader.seek(std::io::SeekFrom::Start(start + i))?;
-                break;
-            }
-        }
-        Ok(Value(RawValue::read_options(reader, endian, (version,))?))
-    }
-}
-
 #[binread]
 #[derive(Default)]
 struct U32String {
@@ -1078,78 +1052,151 @@ impl CustomCurrency {
 }
 
 #[binread]
-#[br(little)]
-//#[br(return_unexpected_error)]
-#[br(import(version: Version))]
+#[br(little, import(version: Version))]
 #[derive(Debug)]
-enum RawValue {
-    #[br(magic = 1u8)]
-    Number {
-        #[br(parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        #[br(parse_with(parse_format))]
-        format: Format,
-        x: f64,
-    },
-    #[br(magic = 2u8)]
-    VarNumber {
-        #[br(parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        #[br(parse_with(parse_format))]
-        format: Format,
-        x: f64,
-        var_name: U32String,
-        value_label: U32String,
-        #[br(parse_with(parse_show))]
-        show: Option<Show>,
-    },
-    #[br(magic = 3u8)]
-    Text {
-        local: U32String,
-        #[br(parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        id: U32String,
-        c: U32String,
-        #[br(parse_with(parse_bool))]
-        fixed: bool,
-    },
-    #[br(magic = 4u8)]
-    String {
-        #[br(parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        #[br(parse_with(parse_format))]
-        format: Format,
-        value_label: U32String,
-        var_name: U32String,
-        #[br(parse_with(parse_show))]
-        show: Option<Show>,
-        s: U32String,
-    },
-    #[br(magic = 5u8)]
-    VarName {
-        #[br(parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        var_name: U32String,
-        var_label: U32String,
-        #[br(parse_with(parse_show))]
-        show: Option<Show>,
-    },
-    #[br(magic = 6u8)]
-    FixedText {
-        local: U32String,
-        #[br(parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        id: U32String,
-        c: U32String,
-    },
-    Template {
-        #[br(dbg, parse_with(parse_optional), args(version))]
-        mods: Option<ValueMods>,
-        #[br(dbg)]
-        template: U32String,
-        #[br(parse_with(parse_vec), args(version))]
-        args: Vec<Argument>,
-    },
+struct ValueNumber {
+    #[br(parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    #[br(parse_with(parse_format))]
+    format: Format,
+    x: f64,
+}
+
+#[binread]
+#[br(little, import(version: Version))]
+#[derive(Debug)]
+struct ValueVarNumber {
+    #[br(parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    #[br(parse_with(parse_format))]
+    format: Format,
+    x: f64,
+    var_name: U32String,
+    value_label: U32String,
+    #[br(parse_with(parse_show))]
+    show: Option<Show>,
+}
+
+#[binread]
+#[br(little, import(version: Version))]
+#[derive(Debug)]
+struct ValueText {
+    local: U32String,
+    #[br(parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    id: U32String,
+    c: U32String,
+    #[br(parse_with(parse_bool))]
+    fixed: bool,
+}
+
+#[binread]
+#[br(little, import(version: Version))]
+#[derive(Debug)]
+struct ValueString {
+    #[br(parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    #[br(parse_with(parse_format))]
+    format: Format,
+    value_label: U32String,
+    var_name: U32String,
+    #[br(parse_with(parse_show))]
+    show: Option<Show>,
+    s: U32String,
+}
+
+#[binread]
+#[br(little, import(version: Version))]
+#[derive(Debug)]
+struct ValueVarName {
+    #[br(parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    var_name: U32String,
+    var_label: U32String,
+    #[br(parse_with(parse_show))]
+    show: Option<Show>,
+}
+
+#[binread]
+#[br(little, import(version: Version))]
+#[derive(Debug)]
+struct ValueFixedText {
+    local: U32String,
+    #[br(parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    id: U32String,
+    c: U32String,
+}
+
+#[binread]
+#[br(little, import(version: Version))]
+#[derive(Debug)]
+struct ValueTemplate {
+    #[br(dbg, parse_with(parse_explicit_optional), args(version))]
+    mods: Option<ValueMods>,
+    #[br(dbg)]
+    template: U32String,
+    #[br(parse_with(parse_vec), args(version))]
+    args: Vec<Argument>,
+}
+
+#[derive(Debug)]
+enum Value {
+    Number(ValueNumber),
+    VarNumber(ValueVarNumber),
+    Text(ValueText),
+    String(ValueString),
+    VarName(ValueVarName),
+    FixedText(ValueFixedText),
+    Template(ValueTemplate),
+}
+
+impl BinRead for Value {
+    type Args<'a> = (Version,);
+
+    fn read_options<R: Read + Seek>(
+        reader: &mut R,
+        endian: Endian,
+        args: Self::Args<'_>,
+    ) -> BinResult<Self> {
+        let start = reader.stream_position()?;
+        let kind = loop {
+            let x = <u8>::read_options(reader, endian, ())?;
+            if x != 0 {
+                break x;
+            }
+        };
+        match kind {
+            1 => ValueNumber::read_options(reader, endian, args).map(Self::Number),
+            2 => Ok(Self::VarNumber(ValueVarNumber::read_options(
+                reader, endian, args,
+            )?)),
+            3 => Ok(Self::Text(ValueText::read_options(reader, endian, args)?)),
+            4 => Ok(Self::String(ValueString::read_options(
+                reader, endian, args,
+            )?)),
+            5 => Ok(Self::VarName(ValueVarName::read_options(
+                reader, endian, args,
+            )?)),
+            6 => Ok(Self::FixedText(ValueFixedText::read_options(
+                reader, endian, args,
+            )?)),
+            b'1' | b'X' => {
+                reader.seek(std::io::SeekFrom::Current(-1))?;
+                Ok(Self::Template(ValueTemplate::read_options(
+                    reader, endian, args,
+                )?))
+            }
+            _ => Err(BinError::NoVariantMatch { pos: start }),
+        }
+        .inspect(|result| {
+            println!(
+                "{start:#x}..{:#x}: {result:?}",
+                reader.stream_position().unwrap()
+            )
+        })
+        .map_err(|e| e.with_message(format!("while parsing Value starting at offset {start:#x}")))
+    }
 }
 
 #[binrw::parser(reader, endian)]
@@ -1174,84 +1221,98 @@ fn parse_format() -> BinResult<Format> {
     Ok(UncheckedFormat::new(type_, w, d).fix())
 }
 
-impl RawValue {
+impl ValueNumber {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new_number_with_format((self.x != -f64::MAX).then_some(self.x), self.format)
+            .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+    }
+}
+
+impl ValueVarNumber {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new_number_with_format((self.x != -f64::MAX).then_some(self.x), self.format)
+            .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+            .with_value_label(self.value_label.decode_optional(encoding))
+            .with_variable_name(Some(self.var_name.decode(encoding)))
+            .with_show_value_label(self.show)
+    }
+}
+
+impl ValueText {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new_general_text(
+            self.local.decode(encoding),
+            self.c.decode(encoding),
+            self.id.decode(encoding),
+            !self.fixed,
+        )
+        .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+    }
+}
+
+impl ValueString {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new(pivot::ValueInner::String(StringValue {
+            s: self.s.decode(encoding),
+            hex: self.format.type_() == Type::AHex,
+            show: self.show,
+            var_name: self.var_name.decode_optional(encoding),
+            value_label: self.value_label.decode_optional(encoding),
+        }))
+        .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+    }
+}
+
+impl ValueVarName {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new(pivot::ValueInner::Variable(VariableValue {
+            show: self.show,
+            var_name: self.var_name.decode(encoding),
+            variable_label: self.var_label.decode_optional(encoding),
+        }))
+        .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+    }
+}
+impl ValueFixedText {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new_general_text(
+            self.local.decode(encoding),
+            self.c.decode(encoding),
+            self.id.decode(encoding),
+            false,
+        )
+        .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+    }
+}
+
+impl ValueTemplate {
+    fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
+        pivot::Value::new(pivot::ValueInner::Template(TemplateValue {
+            args: self
+                .args
+                .iter()
+                .map(|argument| argument.decode(encoding, footnotes))
+                .collect(),
+            localized: self.template.decode(encoding),
+            id: self
+                .mods
+                .as_ref()
+                .and_then(|mods| mods.template_id(encoding)),
+        }))
+        .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes))
+    }
+}
+
+impl Value {
     fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value {
         match self {
-            RawValue::Number { mods, format, x } => {
-                pivot::Value::new_number_with_format((*x != -f64::MAX).then_some(*x), *format)
-                    .with_styling(ValueMods::decode_optional(mods, encoding, footnotes))
-            }
-            RawValue::VarNumber {
-                mods,
-                format,
-                x,
-                var_name,
-                value_label,
-                show,
-            } => pivot::Value::new_number_with_format((*x != -f64::MAX).then_some(*x), *format)
-                .with_styling(ValueMods::decode_optional(mods, encoding, footnotes))
-                .with_value_label(value_label.decode_optional(encoding))
-                .with_variable_name(Some(var_name.decode(encoding)))
-                .with_show_value_label(*show),
-            RawValue::Text {
-                local,
-                mods,
-                id,
-                c,
-                fixed,
-            } => pivot::Value::new_general_text(
-                local.decode(encoding),
-                c.decode(encoding),
-                id.decode(encoding),
-                !*fixed,
-            )
-            .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)),
-            RawValue::String {
-                mods,
-                format,
-                value_label,
-                var_name,
-                show,
-                s,
-            } => pivot::Value::new(pivot::ValueInner::String(StringValue {
-                s: s.decode(encoding),
-                hex: format.type_() == Type::AHex,
-                show: *show,
-                var_name: var_name.decode_optional(encoding),
-                value_label: value_label.decode_optional(encoding),
-            }))
-            .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)),
-            RawValue::VarName {
-                mods,
-                var_name,
-                var_label,
-                show,
-            } => pivot::Value::new(pivot::ValueInner::Variable(VariableValue {
-                show: *show,
-                var_name: var_name.decode(encoding),
-                variable_label: var_label.decode_optional(encoding),
-            }))
-            .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)),
-            RawValue::FixedText { local, mods, id, c } => pivot::Value::new_general_text(
-                local.decode(encoding),
-                c.decode(encoding),
-                id.decode(encoding),
-                false,
-            )
-            .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)),
-            RawValue::Template {
-                mods,
-                template,
-                args,
-            } => pivot::Value::new(pivot::ValueInner::Template(TemplateValue {
-                args: args
-                    .iter()
-                    .map(|argument| argument.decode(encoding, footnotes))
-                    .collect(),
-                localized: template.decode(encoding),
-                id: mods.as_ref().and_then(|mods| mods.template_id(encoding)),
-            }))
-            .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)),
+            Value::Number(number) => number.decode(encoding, footnotes),
+            Value::VarNumber(var_number) => var_number.decode(encoding, footnotes),
+            Value::Text(text) => text.decode(encoding, footnotes),
+            Value::String(string) => string.decode(encoding, footnotes),
+            Value::VarName(var_name) => var_name.decode(encoding, footnotes),
+            Value::FixedText(fixed_text) => fixed_text.decode(encoding, footnotes),
+            Value::Template(template) => template.decode(encoding, footnotes),
         }
     }
 }
@@ -1398,7 +1459,7 @@ impl ValueMods {
 struct TemplateString {
     #[br(parse_with(parse_counted), temp)]
     _sponge: Sponge,
-    #[br(parse_with(parse_optional))]
+    #[br(parse_with(parse_explicit_optional))]
     id: Option<U32String>,
 }
 
@@ -1419,9 +1480,9 @@ impl BinRead for Sponge {
 #[br(little)]
 #[derive(Debug, Default)]
 struct StylePair {
-    #[br(parse_with(parse_optional))]
+    #[br(parse_with(parse_explicit_optional))]
     font_style: Option<FontStyle>,
-    #[br(parse_with(parse_optional))]
+    #[br(parse_with(parse_explicit_optional))]
     cell_style: Option<CellStyle>,
 }