From 23ecda0845eb1e109f32717b33cbecdef92cf104 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 18 Aug 2025 09:05:21 -0700 Subject: [PATCH] work --- rust/pspp/src/format/mod.rs | 14 +- rust/pspp/src/main.rs | 10 +- rust/pspp/src/output/pivot/mod.rs | 283 +++++++++++++++++++++++++----- rust/pspp/src/output/spv.rs | 6 +- rust/pspp/src/sys/raw.rs | 58 ++++-- 5 files changed, 305 insertions(+), 66 deletions(-) diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index 2fe6e982dd..a9e9e7b78b 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -25,6 +25,7 @@ use chrono::{Datelike, Local}; use enum_iterator::{all, Sequence}; use enum_map::{Enum, EnumMap}; use serde::{Deserialize, Serialize}; +use smallstr::SmallString; use thiserror::Error as ThisError; use unicode_width::UnicodeWidthStr; @@ -482,13 +483,24 @@ impl TryFrom for UncheckedFormat { } } -#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize)] +#[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct Format { type_: Type, w: Width, d: Decimals, } +impl Serialize for Format { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut s = SmallString::<[u8; 16]>::new(); + write!(&mut s, "{}", self).unwrap(); + s.serialize(serializer) + } +} + impl Format { pub const F40: Format = Format { type_: Type::F, diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 19907f1741..f388f89c6e 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -26,7 +26,8 @@ use pspp::{ sys::{ self, raw::{ - infer_encoding, records::Compression, Decoder, EncodingReport, Magic, Reader, Record, + get_encoding_info, infer_encoding, records::Compression, Decoder, EncodingReport, + Magic, Reader, Record, }, ReadOptions, Records, }, @@ -472,9 +473,12 @@ impl Show { } } Mode::Encodings => { + let records: Vec = reader.records().collect::, _>>()?; + let (encoding, character_code) = get_encoding_info(&records); + let mut record_strings = reader.header().get_strings(); - for record in reader.records() { - record_strings.append(&mut record?.get_strings()); + for record in &records { + record_strings.append(&mut record.get_strings()); } let Some(encoding_report) = EncodingReport::new(&record_strings) else { output.warn(&"No valid encodings found."); diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 39645987bc..bd6c4242fd 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -60,7 +60,11 @@ use enum_iterator::Sequence; use enum_map::{enum_map, Enum, EnumMap}; use look_xml::TableProperties; use quick_xml::{de::from_str, DeError}; -use serde::{de::Visitor, ser::SerializeStruct, Deserialize, Serialize}; +use serde::{ + de::Visitor, + ser::{SerializeMap, SerializeStruct}, + Deserialize, Serialize, Serializer, +}; use smallstr::SmallString; use smallvec::SmallVec; use thiserror::Error as ThisError; @@ -379,7 +383,7 @@ impl PivotTable { format, honor_small: class == Class::Other, value: number, - var_name: None, + variable: None, value_label: None, })); self.insert(data_indexes, value); @@ -492,11 +496,15 @@ pub struct Group { } impl Group { - pub fn new(name: impl Into) -> Group { + pub fn new(name: impl Into) -> Self { + Self::with_capacity(name, 0) + } + + pub fn with_capacity(name: impl Into, capacity: usize) -> Self { Self { len: 0, name: Box::new(name.into()), - children: Vec::new(), + children: Vec::with_capacity(capacity), show_label: false, } } @@ -1922,29 +1930,6 @@ impl Display for Display26Adic { } } -#[cfg(test)] -mod tests { - use super::Display26Adic; - #[test] - fn display_26adic() { - for (number, lowercase, uppercase) in [ - (0, "", ""), - (1, "a", "A"), - (2, "b", "B"), - (26, "z", "Z"), - (27, "aa", "AA"), - (28, "ab", "AB"), - (29, "ac", "AC"), - (18278, "zzz", "ZZZ"), - (18279, "aaaa", "AAAA"), - (19010, "abcd", "ABCD"), - ] { - assert_eq!(Display26Adic::new_lowercase(number).to_string(), lowercase); - assert_eq!(Display26Adic::new_uppercase(number).to_string(), uppercase); - } - } -} - /// The content of a single pivot table cell. /// /// A [Value] is also a pivot table's title, caption, footnote marker and @@ -1999,7 +1984,25 @@ impl Serialize for Value { } } +/// Wrapper for [Value] that uses [Value::serialize_bare] for serialization. +#[derive(Serialize)] +struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] &'a Value); + impl Value { + fn serialize_bare(&self, serializer: S) -> Result + where + S: Serializer, + { + match &self.inner { + ValueInner::Number(number_value) => number_value.value.serialize(serializer), + ValueInner::String(string_value) => string_value.s.serialize(serializer), + ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer), + ValueInner::Text(text_value) => text_value.localized.serialize(serializer), + ValueInner::Template(template_value) => template_value.localized.serialize(serializer), + ValueInner::Empty => ().serialize(serializer), + } + } + fn new(inner: ValueInner) -> Self { Self { inner, @@ -2012,7 +2015,7 @@ impl Value { format, honor_small: false, value: x, - var_name: None, + variable: None, value_label: None, })) } @@ -2050,7 +2053,7 @@ impl Value { }, honor_small: false, value: *number, - var_name, + variable: var_name, value_label, })), Datum::String(string) => Self::new(ValueInner::String(StringValue { @@ -2417,9 +2420,11 @@ impl Display for DisplayValue<'_> { f.write_str(local) } - ValueInner::Template(TemplateValue { args, local, .. }) => { - self.template(f, local, args) - } + ValueInner::Template(TemplateValue { + args, + localized: local, + .. + }) => self.template(f, local, args), ValueInner::Empty => Ok(()), }?; @@ -2455,24 +2460,58 @@ impl Debug for Value { } } -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug)] pub struct NumberValue { - pub show: Option, + /// The numerical value, or `None` if it is a missing value. + pub value: Option, pub format: Format, + pub show: Option, pub honor_small: bool, - pub value: Option, - pub var_name: Option, + pub variable: Option, pub value_label: Option, } +impl Serialize for NumberValue { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() { + self.value.serialize(serializer) + } else { + let mut s = serializer.serialize_map(None)?; + s.serialize_entry("value", &self.value)?; + s.serialize_entry("format", &self.format)?; + if let Some(show) = self.show { + s.serialize_entry("show", &show)?; + } + if self.honor_small { + s.serialize_entry("honor_small", &self.honor_small)?; + } + if let Some(variable) = &self.variable { + s.serialize_entry("variable", variable)?; + } + if let Some(value_label) = &self.value_label { + s.serialize_entry("value_label", value_label)?; + } + s.end() + } + } +} + #[derive(Clone, Debug, Serialize)] pub struct StringValue { - pub show: Option, - pub hex: bool, - - /// If `hex` is true, this string should already be hex digits + /// The string value. + /// + /// If `hex` is true, this should contain hex digits, not raw binary data /// (otherwise it would be impossible to encode non-UTF-8 data). pub s: String, + + /// True if `s` is hex digits. + pub hex: bool, + + pub show: Option, + pub var_name: Option, pub value_label: Option, } @@ -2535,7 +2574,7 @@ impl TextValue { #[derive(Clone, Debug, Serialize)] pub struct TemplateValue { pub args: Vec>, - pub local: String, + pub localized: String, pub id: String, } @@ -2635,3 +2674,165 @@ impl ValueInner { } } } + +pub struct MetadataEntry { + pub name: Value, + pub value: MetadataValue, +} + +pub enum MetadataValue { + Leaf(Value), + Group(Vec), +} + +impl MetadataEntry { + fn into_pivot_table(self) -> PivotTable { + let mut data = Vec::new(); + let group = match self.visit(&mut data) { + Category::Group(group) => group, + Category::Leaf(leaf) => Group::new("Metadata").with(leaf), + }; + PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data( + data.into_iter() + .enumerate() + .map(|(row, value)| ([row], value)), + ) + } + fn visit(self, data: &mut Vec) -> Category { + match self.value { + MetadataValue::Leaf(value) => { + data.push(value); + Leaf::new(self.name).into() + } + MetadataValue::Group(items) => Group::with_capacity(self.name, items.len()) + .with_multiple(items.into_iter().map(|item| item.visit(data))) + .into(), + } + } +} + +impl Serialize for MetadataValue { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + MetadataValue::Leaf(value) => value.serialize_bare(serializer), + MetadataValue::Group(items) => { + let mut map = serializer.serialize_map(Some(items.len()))?; + for item in items { + let name = item.name.display(()).to_string(); + map.serialize_entry(&name, &item.value)?; + } + map.end() + } + } + } +} +impl Serialize for MetadataEntry { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match &self.value { + MetadataValue::Leaf(value) => { + let mut map = serializer.serialize_map(Some(1))?; + let name = self.name.display(()).to_string(); + map.serialize_entry(&name, &BareValue(&value))?; + map.end() + } + MetadataValue::Group(items) => { + let mut map = serializer.serialize_map(Some(items.len()))?; + for item in items { + let name = item.name.display(()).to_string(); + map.serialize_entry(&name, &item.value)?; + } + map.end() + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::output::pivot::{Display26Adic, MetadataEntry, MetadataValue, Value}; + + #[test] + fn display_26adic() { + for (number, lowercase, uppercase) in [ + (0, "", ""), + (1, "a", "A"), + (2, "b", "B"), + (26, "z", "Z"), + (27, "aa", "AA"), + (28, "ab", "AB"), + (29, "ac", "AC"), + (18278, "zzz", "ZZZ"), + (18279, "aaaa", "AAAA"), + (19010, "abcd", "ABCD"), + ] { + assert_eq!(Display26Adic::new_lowercase(number).to_string(), lowercase); + assert_eq!(Display26Adic::new_uppercase(number).to_string(), uppercase); + } + } + + #[test] + fn metadata_entry() { + let tree = MetadataEntry { + name: Value::from("Group"), + value: MetadataValue::Group(vec![ + MetadataEntry { + name: Value::from("Name 1"), + value: MetadataValue::Leaf(Value::from("Value 1")), + }, + MetadataEntry { + name: Value::from("Subgroup 1"), + value: MetadataValue::Group(vec![ + MetadataEntry { + name: Value::from("Subname 1"), + value: MetadataValue::Leaf(Value::from("Subvalue 1")), + }, + MetadataEntry { + name: Value::from("Subname 2"), + value: MetadataValue::Leaf(Value::from("Subvalue 2")), + }, + MetadataEntry { + name: Value::from("Subname 3"), + value: MetadataValue::Leaf(Value::new_integer(Some(3.0))), + }, + ]), + }, + MetadataEntry { + name: Value::from("Name 2"), + value: MetadataValue::Leaf(Value::from("Value 2")), + }, + ]), + }; + assert_eq!( + serde_json::to_string_pretty(&tree).unwrap(), + r#"{ + "Name 1": "Value 1", + "Subgroup 1": { + "Subname 1": "Subvalue 1", + "Subname 2": "Subvalue 2", + "Subname 3": 3.0 + }, + "Name 2": "Value 2" +}"# + ); + + assert_eq!( + tree.into_pivot_table().to_string(), + r#"╭────────────────────┬──────────╮ +│ Name 1 │Value 1 │ +├────────────────────┼──────────┤ +│Subgroup 1 Subname 1│Subvalue 1│ +│ Subname 2│Subvalue 2│ +│ Subname 3│ 3│ +├────────────────────┼──────────┤ +│ Name 2 │Value 2 │ +╰────────────────────┴──────────╯ +"# + ); + } +} diff --git a/rust/pspp/src/output/spv.rs b/rust/pspp/src/output/spv.rs index 21854df5ef..9df728fec1 100644 --- a/rust/pspp/src/output/spv.rs +++ b/rust/pspp/src/output/spv.rs @@ -1278,13 +1278,13 @@ impl BinWrite for Value { format: number.format, honor_small: number.honor_small, }; - if number.var_name.is_some() || number.value_label.is_some() { + if number.variable.is_some() || number.value_label.is_some() { ( 2u8, ValueMod::new(self), format, number.value.unwrap_or(f64::MIN), - SpvString::optional(&number.var_name), + SpvString::optional(&number.variable), SpvString::optional(&number.value_label), Show::as_spv(&number.show), ) @@ -1343,7 +1343,7 @@ impl BinWrite for Value { ( 0u8, ValueMod::new(self), - SpvString(&template.local), + SpvString(&template.localized), template.args.len() as u32, ) .write_options(writer, endian, args)?; diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index f6ebfd406b..75218b794f 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -543,6 +543,20 @@ pub enum Record { } impl Record { + pub fn as_encoding_record(&self) -> Option<&EncodingRecord> { + match self { + Record::Encoding(encoding_record) => Some(encoding_record), + _ => None, + } + } + + pub fn as_integer_info_record(&self) -> Option<&IntegerInfoRecord> { + match self { + Record::IntegerInfo(integer_info_record) => Some(integer_info_record), + _ => None, + } + } + pub fn as_long_string_missing_values( &self, ) -> Option<&LongStringMissingValueRecord> { @@ -806,24 +820,7 @@ pub fn infer_encoding( records: &[Record], mut warn: impl FnMut(Warning), ) -> Result<&'static Encoding, Error> { - // Get the character encoding from the first (and only) encoding record. - let encoding = records - .iter() - .filter_map(|record| match record { - Record::Encoding(record) => Some(record.0.as_str()), - _ => None, - }) - .next(); - - // Get the character code from the first (only) integer info record. - let character_code = records - .iter() - .filter_map(|record| match record { - Record::IntegerInfo(record) => Some(record.inner.character_code), - _ => None, - }) - .next(); - + let (encoding, character_code) = get_encoding_info(records); match get_encoding(encoding, character_code) { Ok(encoding) => Ok(encoding), Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, err.into())), @@ -835,6 +832,31 @@ pub fn infer_encoding( } } +pub fn get_encoding_info(records: &[Record]) -> (Option<&str>, Option) { + ( + get_encoding_record(records).map(|r| r.0.as_str()), + get_integer_info_record(records).map(|r| r.inner.character_code), + ) +} + +pub fn get_encoding_record<'a, I>(iter: I) -> Option<&'a EncodingRecord> +where + I: IntoIterator, +{ + iter.into_iter() + .filter_map(|record| record.as_encoding_record()) + .next() +} + +pub fn get_integer_info_record<'a, I>(iter: I) -> Option<&'a IntegerInfoRecord> +where + I: IntoIterator, +{ + iter.into_iter() + .filter_map(|record| record.as_integer_info_record()) + .next() +} + /// An [Encoding] along with a function to report decoding errors. /// /// This is used by functions that decode raw records. -- 2.30.2