From: Ben Pfaff Date: Tue, 14 Oct 2025 20:57:57 +0000 (-0700) Subject: improve value parsing X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a6475d08f449bca3861b6f6e09e59fc11e1dd758;p=pspp improve value parsing --- diff --git a/rust/pspp/src/output/spv/light.rs b/rust/pspp/src/output/spv/light.rs index 05b5747ac9..22a01e8386 100644 --- a/rust/pspp/src/output/spv/light.rs +++ b/rust/pspp/src/output/spv/light.rs @@ -7,7 +7,10 @@ use std::{ sync::Arc, }; -use binrw::{BinRead, BinResult, Endian, Error as BinError, VecArgs, binread, io::TakeSeekExt}; +use binrw::{ + BinRead, BinResult, Endian, Error as BinError, VecArgs, binread, error::ContextExt, + io::TakeSeekExt, +}; use chrono::DateTime; use displaydoc::Display; use encoding_rs::{Encoding, WINDOWS_1252}; @@ -267,9 +270,9 @@ struct Titles { user_title: Value, #[br(temp)] _3: Optional, - #[br(parse_with(parse_optional), args(version))] + #[br(parse_with(parse_explicit_optional), args(version))] corner_text: Option, - #[br(parse_with(parse_optional), args(version))] + #[br(parse_with(parse_explicit_optional), args(version))] caption: Option, } @@ -284,7 +287,7 @@ struct One; struct Zero; #[binrw::parser(reader, endian)] -pub fn parse_optional<'a, T, A>(args: A, ...) -> BinResult> +pub fn parse_explicit_optional<'a, T, A>(args: A, ...) -> BinResult> where T: BinRead = A>, { @@ -315,7 +318,7 @@ where struct Footnote { #[br(args(version))] text: Value, - #[br(parse_with(parse_optional))] + #[br(parse_with(parse_explicit_optional))] #[br(args(version))] marker: Option, show: i32, @@ -634,35 +637,6 @@ impl Sizing { } } -#[derive(Debug)] -struct Value(RawValue); - -impl Value { - fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { - self.0.decode(encoding, footnotes) - } -} - -impl BinRead for Value { - type Args<'a> = (Version,); - - fn read_options( - reader: &mut R, - endian: binrw::Endian, - (version,): (Version,), - ) -> BinResult { - let start = reader.stream_position()?; - for i in 0..4 { - let x = ::read_options(reader, endian, ())?; - if x != 0 { - reader.seek(std::io::SeekFrom::Start(start + i))?; - break; - } - } - Ok(Value(RawValue::read_options(reader, endian, (version,))?)) - } -} - #[binread] #[derive(Default)] struct U32String { @@ -1078,78 +1052,151 @@ impl CustomCurrency { } #[binread] -#[br(little)] -//#[br(return_unexpected_error)] -#[br(import(version: Version))] +#[br(little, import(version: Version))] #[derive(Debug)] -enum RawValue { - #[br(magic = 1u8)] - Number { - #[br(parse_with(parse_optional), args(version))] - mods: Option, - #[br(parse_with(parse_format))] - format: Format, - x: f64, - }, - #[br(magic = 2u8)] - VarNumber { - #[br(parse_with(parse_optional), args(version))] - mods: Option, - #[br(parse_with(parse_format))] - format: Format, - x: f64, - var_name: U32String, - value_label: U32String, - #[br(parse_with(parse_show))] - show: Option, - }, - #[br(magic = 3u8)] - Text { - local: U32String, - #[br(parse_with(parse_optional), args(version))] - mods: Option, - id: U32String, - c: U32String, - #[br(parse_with(parse_bool))] - fixed: bool, - }, - #[br(magic = 4u8)] - String { - #[br(parse_with(parse_optional), args(version))] - mods: Option, - #[br(parse_with(parse_format))] - format: Format, - value_label: U32String, - var_name: U32String, - #[br(parse_with(parse_show))] - show: Option, - s: U32String, - }, - #[br(magic = 5u8)] - VarName { - #[br(parse_with(parse_optional), args(version))] - mods: Option, - var_name: U32String, - var_label: U32String, - #[br(parse_with(parse_show))] - show: Option, - }, - #[br(magic = 6u8)] - FixedText { - local: U32String, - #[br(parse_with(parse_optional), args(version))] - mods: Option, - id: U32String, - c: U32String, - }, - Template { - #[br(dbg, parse_with(parse_optional), args(version))] - mods: Option, - #[br(dbg)] - template: U32String, - #[br(parse_with(parse_vec), args(version))] - args: Vec, - }, +struct ValueNumber { + #[br(parse_with(parse_explicit_optional), args(version))] + mods: Option, + #[br(parse_with(parse_format))] + format: Format, + x: f64, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueVarNumber { + #[br(parse_with(parse_explicit_optional), args(version))] + mods: Option, + #[br(parse_with(parse_format))] + format: Format, + x: f64, + var_name: U32String, + value_label: U32String, + #[br(parse_with(parse_show))] + show: Option, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueText { + local: U32String, + #[br(parse_with(parse_explicit_optional), args(version))] + mods: Option, + id: U32String, + c: U32String, + #[br(parse_with(parse_bool))] + fixed: bool, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueString { + #[br(parse_with(parse_explicit_optional), args(version))] + mods: Option, + #[br(parse_with(parse_format))] + format: Format, + value_label: U32String, + var_name: U32String, + #[br(parse_with(parse_show))] + show: Option, + s: U32String, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueVarName { + #[br(parse_with(parse_explicit_optional), args(version))] + mods: Option, + var_name: U32String, + var_label: U32String, + #[br(parse_with(parse_show))] + show: Option, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueFixedText { + local: U32String, + #[br(parse_with(parse_explicit_optional), args(version))] + mods: Option, + id: U32String, + c: U32String, +} + +#[binread] +#[br(little, import(version: Version))] +#[derive(Debug)] +struct ValueTemplate { + #[br(dbg, parse_with(parse_explicit_optional), args(version))] + mods: Option, + #[br(dbg)] + template: U32String, + #[br(parse_with(parse_vec), args(version))] + args: Vec, +} + +#[derive(Debug)] +enum Value { + Number(ValueNumber), + VarNumber(ValueVarNumber), + Text(ValueText), + String(ValueString), + VarName(ValueVarName), + FixedText(ValueFixedText), + Template(ValueTemplate), +} + +impl BinRead for Value { + type Args<'a> = (Version,); + + fn read_options( + reader: &mut R, + endian: Endian, + args: Self::Args<'_>, + ) -> BinResult { + let start = reader.stream_position()?; + let kind = loop { + let x = ::read_options(reader, endian, ())?; + if x != 0 { + break x; + } + }; + match kind { + 1 => ValueNumber::read_options(reader, endian, args).map(Self::Number), + 2 => Ok(Self::VarNumber(ValueVarNumber::read_options( + reader, endian, args, + )?)), + 3 => Ok(Self::Text(ValueText::read_options(reader, endian, args)?)), + 4 => Ok(Self::String(ValueString::read_options( + reader, endian, args, + )?)), + 5 => Ok(Self::VarName(ValueVarName::read_options( + reader, endian, args, + )?)), + 6 => Ok(Self::FixedText(ValueFixedText::read_options( + reader, endian, args, + )?)), + b'1' | b'X' => { + reader.seek(std::io::SeekFrom::Current(-1))?; + Ok(Self::Template(ValueTemplate::read_options( + reader, endian, args, + )?)) + } + _ => Err(BinError::NoVariantMatch { pos: start }), + } + .inspect(|result| { + println!( + "{start:#x}..{:#x}: {result:?}", + reader.stream_position().unwrap() + ) + }) + .map_err(|e| e.with_message(format!("while parsing Value starting at offset {start:#x}"))) + } } #[binrw::parser(reader, endian)] @@ -1174,84 +1221,98 @@ fn parse_format() -> BinResult { Ok(UncheckedFormat::new(type_, w, d).fix()) } -impl RawValue { +impl ValueNumber { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new_number_with_format((self.x != -f64::MAX).then_some(self.x), self.format) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + } +} + +impl ValueVarNumber { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new_number_with_format((self.x != -f64::MAX).then_some(self.x), self.format) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + .with_value_label(self.value_label.decode_optional(encoding)) + .with_variable_name(Some(self.var_name.decode(encoding))) + .with_show_value_label(self.show) + } +} + +impl ValueText { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new_general_text( + self.local.decode(encoding), + self.c.decode(encoding), + self.id.decode(encoding), + !self.fixed, + ) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + } +} + +impl ValueString { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new(pivot::ValueInner::String(StringValue { + s: self.s.decode(encoding), + hex: self.format.type_() == Type::AHex, + show: self.show, + var_name: self.var_name.decode_optional(encoding), + value_label: self.value_label.decode_optional(encoding), + })) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + } +} + +impl ValueVarName { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new(pivot::ValueInner::Variable(VariableValue { + show: self.show, + var_name: self.var_name.decode(encoding), + variable_label: self.var_label.decode_optional(encoding), + })) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + } +} +impl ValueFixedText { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new_general_text( + self.local.decode(encoding), + self.c.decode(encoding), + self.id.decode(encoding), + false, + ) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + } +} + +impl ValueTemplate { + fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { + pivot::Value::new(pivot::ValueInner::Template(TemplateValue { + args: self + .args + .iter() + .map(|argument| argument.decode(encoding, footnotes)) + .collect(), + localized: self.template.decode(encoding), + id: self + .mods + .as_ref() + .and_then(|mods| mods.template_id(encoding)), + })) + .with_styling(ValueMods::decode_optional(&self.mods, encoding, footnotes)) + } +} + +impl Value { fn decode(&self, encoding: &'static Encoding, footnotes: &pivot::Footnotes) -> pivot::Value { match self { - RawValue::Number { mods, format, x } => { - pivot::Value::new_number_with_format((*x != -f64::MAX).then_some(*x), *format) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)) - } - RawValue::VarNumber { - mods, - format, - x, - var_name, - value_label, - show, - } => pivot::Value::new_number_with_format((*x != -f64::MAX).then_some(*x), *format) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)) - .with_value_label(value_label.decode_optional(encoding)) - .with_variable_name(Some(var_name.decode(encoding))) - .with_show_value_label(*show), - RawValue::Text { - local, - mods, - id, - c, - fixed, - } => pivot::Value::new_general_text( - local.decode(encoding), - c.decode(encoding), - id.decode(encoding), - !*fixed, - ) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), - RawValue::String { - mods, - format, - value_label, - var_name, - show, - s, - } => pivot::Value::new(pivot::ValueInner::String(StringValue { - s: s.decode(encoding), - hex: format.type_() == Type::AHex, - show: *show, - var_name: var_name.decode_optional(encoding), - value_label: value_label.decode_optional(encoding), - })) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), - RawValue::VarName { - mods, - var_name, - var_label, - show, - } => pivot::Value::new(pivot::ValueInner::Variable(VariableValue { - show: *show, - var_name: var_name.decode(encoding), - variable_label: var_label.decode_optional(encoding), - })) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), - RawValue::FixedText { local, mods, id, c } => pivot::Value::new_general_text( - local.decode(encoding), - c.decode(encoding), - id.decode(encoding), - false, - ) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), - RawValue::Template { - mods, - template, - args, - } => pivot::Value::new(pivot::ValueInner::Template(TemplateValue { - args: args - .iter() - .map(|argument| argument.decode(encoding, footnotes)) - .collect(), - localized: template.decode(encoding), - id: mods.as_ref().and_then(|mods| mods.template_id(encoding)), - })) - .with_styling(ValueMods::decode_optional(mods, encoding, footnotes)), + Value::Number(number) => number.decode(encoding, footnotes), + Value::VarNumber(var_number) => var_number.decode(encoding, footnotes), + Value::Text(text) => text.decode(encoding, footnotes), + Value::String(string) => string.decode(encoding, footnotes), + Value::VarName(var_name) => var_name.decode(encoding, footnotes), + Value::FixedText(fixed_text) => fixed_text.decode(encoding, footnotes), + Value::Template(template) => template.decode(encoding, footnotes), } } } @@ -1398,7 +1459,7 @@ impl ValueMods { struct TemplateString { #[br(parse_with(parse_counted), temp)] _sponge: Sponge, - #[br(parse_with(parse_optional))] + #[br(parse_with(parse_explicit_optional))] id: Option, } @@ -1419,9 +1480,9 @@ impl BinRead for Sponge { #[br(little)] #[derive(Debug, Default)] struct StylePair { - #[br(parse_with(parse_optional))] + #[br(parse_with(parse_explicit_optional))] font_style: Option, - #[br(parse_with(parse_optional))] + #[br(parse_with(parse_explicit_optional))] cell_style: Option, }