work rust
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 18 Aug 2025 16:05:21 +0000 (09:05 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 18 Aug 2025 16:05:21 +0000 (09:05 -0700)
rust/pspp/src/format/mod.rs
rust/pspp/src/main.rs
rust/pspp/src/output/pivot/mod.rs
rust/pspp/src/output/spv.rs
rust/pspp/src/sys/raw.rs

index 2fe6e982ddf1309231de4c83782a513b160c1917..a9e9e7b78ba1ccaf52ae7b5b1b3a0c8cf0b6f3e5 100644 (file)
@@ -25,6 +25,7 @@ use chrono::{Datelike, Local};
 use enum_iterator::{all, Sequence};
 use enum_map::{Enum, EnumMap};
 use serde::{Deserialize, Serialize};
+use smallstr::SmallString;
 use thiserror::Error as ThisError;
 use unicode_width::UnicodeWidthStr;
 
@@ -482,13 +483,24 @@ impl TryFrom<AbstractFormat> for UncheckedFormat {
     }
 }
 
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize)]
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct Format {
     type_: Type,
     w: Width,
     d: Decimals,
 }
 
+impl Serialize for Format {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let mut s = SmallString::<[u8; 16]>::new();
+        write!(&mut s, "{}", self).unwrap();
+        s.serialize(serializer)
+    }
+}
+
 impl Format {
     pub const F40: Format = Format {
         type_: Type::F,
index 19907f17417abb3b2dd38a74243b9601563230da..f388f89c6edc940f4ccf5c02bbd9e37b880e06a3 100644 (file)
@@ -26,7 +26,8 @@ use pspp::{
     sys::{
         self,
         raw::{
-            infer_encoding, records::Compression, Decoder, EncodingReport, Magic, Reader, Record,
+            get_encoding_info, infer_encoding, records::Compression, Decoder, EncodingReport,
+            Magic, Reader, Record,
         },
         ReadOptions, Records,
     },
@@ -472,9 +473,12 @@ impl Show {
                 }
             }
             Mode::Encodings => {
+                let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
+                let (encoding, character_code) = get_encoding_info(&records);
+
                 let mut record_strings = reader.header().get_strings();
-                for record in reader.records() {
-                    record_strings.append(&mut record?.get_strings());
+                for record in &records {
+                    record_strings.append(&mut record.get_strings());
                 }
                 let Some(encoding_report) = EncodingReport::new(&record_strings) else {
                     output.warn(&"No valid encodings found.");
index 39645987bc1463daa46bfa2f65bc2cc171023447..bd6c4242fd04e3d3f8236bec41a32bc4c65ce32f 100644 (file)
@@ -60,7 +60,11 @@ use enum_iterator::Sequence;
 use enum_map::{enum_map, Enum, EnumMap};
 use look_xml::TableProperties;
 use quick_xml::{de::from_str, DeError};
-use serde::{de::Visitor, ser::SerializeStruct, Deserialize, Serialize};
+use serde::{
+    de::Visitor,
+    ser::{SerializeMap, SerializeStruct},
+    Deserialize, Serialize, Serializer,
+};
 use smallstr::SmallString;
 use smallvec::SmallVec;
 use thiserror::Error as ThisError;
@@ -379,7 +383,7 @@ impl PivotTable {
             format,
             honor_small: class == Class::Other,
             value: number,
-            var_name: None,
+            variable: None,
             value_label: None,
         }));
         self.insert(data_indexes, value);
@@ -492,11 +496,15 @@ pub struct Group {
 }
 
 impl Group {
-    pub fn new(name: impl Into<Value>) -> Group {
+    pub fn new(name: impl Into<Value>) -> Self {
+        Self::with_capacity(name, 0)
+    }
+
+    pub fn with_capacity(name: impl Into<Value>, capacity: usize) -> Self {
         Self {
             len: 0,
             name: Box::new(name.into()),
-            children: Vec::new(),
+            children: Vec::with_capacity(capacity),
             show_label: false,
         }
     }
@@ -1922,29 +1930,6 @@ impl Display for Display26Adic {
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use super::Display26Adic;
-    #[test]
-    fn display_26adic() {
-        for (number, lowercase, uppercase) in [
-            (0, "", ""),
-            (1, "a", "A"),
-            (2, "b", "B"),
-            (26, "z", "Z"),
-            (27, "aa", "AA"),
-            (28, "ab", "AB"),
-            (29, "ac", "AC"),
-            (18278, "zzz", "ZZZ"),
-            (18279, "aaaa", "AAAA"),
-            (19010, "abcd", "ABCD"),
-        ] {
-            assert_eq!(Display26Adic::new_lowercase(number).to_string(), lowercase);
-            assert_eq!(Display26Adic::new_uppercase(number).to_string(), uppercase);
-        }
-    }
-}
-
 /// The content of a single pivot table cell.
 ///
 /// A [Value] is also a pivot table's title, caption, footnote marker and
@@ -1999,7 +1984,25 @@ impl Serialize for Value {
     }
 }
 
+/// Wrapper for [Value] that uses [Value::serialize_bare] for serialization.
+#[derive(Serialize)]
+struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] &'a Value);
+
 impl Value {
+    fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match &self.inner {
+            ValueInner::Number(number_value) => number_value.value.serialize(serializer),
+            ValueInner::String(string_value) => string_value.s.serialize(serializer),
+            ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer),
+            ValueInner::Text(text_value) => text_value.localized.serialize(serializer),
+            ValueInner::Template(template_value) => template_value.localized.serialize(serializer),
+            ValueInner::Empty => ().serialize(serializer),
+        }
+    }
+
     fn new(inner: ValueInner) -> Self {
         Self {
             inner,
@@ -2012,7 +2015,7 @@ impl Value {
             format,
             honor_small: false,
             value: x,
-            var_name: None,
+            variable: None,
             value_label: None,
         }))
     }
@@ -2050,7 +2053,7 @@ impl Value {
                 },
                 honor_small: false,
                 value: *number,
-                var_name,
+                variable: var_name,
                 value_label,
             })),
             Datum::String(string) => Self::new(ValueInner::String(StringValue {
@@ -2417,9 +2420,11 @@ impl Display for DisplayValue<'_> {
                 f.write_str(local)
             }
 
-            ValueInner::Template(TemplateValue { args, local, .. }) => {
-                self.template(f, local, args)
-            }
+            ValueInner::Template(TemplateValue {
+                args,
+                localized: local,
+                ..
+            }) => self.template(f, local, args),
 
             ValueInner::Empty => Ok(()),
         }?;
@@ -2455,24 +2460,58 @@ impl Debug for Value {
     }
 }
 
-#[derive(Clone, Debug, Serialize)]
+#[derive(Clone, Debug)]
 pub struct NumberValue {
-    pub show: Option<Show>,
+    /// The numerical value, or `None` if it is a missing value.
+    pub value: Option<f64>,
     pub format: Format,
+    pub show: Option<Show>,
     pub honor_small: bool,
-    pub value: Option<f64>,
-    pub var_name: Option<String>,
+    pub variable: Option<String>,
     pub value_label: Option<String>,
 }
 
+impl Serialize for NumberValue {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() {
+            self.value.serialize(serializer)
+        } else {
+            let mut s = serializer.serialize_map(None)?;
+            s.serialize_entry("value", &self.value)?;
+            s.serialize_entry("format", &self.format)?;
+            if let Some(show) = self.show {
+                s.serialize_entry("show", &show)?;
+            }
+            if self.honor_small {
+                s.serialize_entry("honor_small", &self.honor_small)?;
+            }
+            if let Some(variable) = &self.variable {
+                s.serialize_entry("variable", variable)?;
+            }
+            if let Some(value_label) = &self.value_label {
+                s.serialize_entry("value_label", value_label)?;
+            }
+            s.end()
+        }
+    }
+}
+
 #[derive(Clone, Debug, Serialize)]
 pub struct StringValue {
-    pub show: Option<Show>,
-    pub hex: bool,
-
-    /// If `hex` is true, this string should already be hex digits
+    /// The string value.
+    ///
+    /// If `hex` is true, this should contain hex digits, not raw binary data
     /// (otherwise it would be impossible to encode non-UTF-8 data).
     pub s: String,
+
+    /// True if `s` is hex digits.
+    pub hex: bool,
+
+    pub show: Option<Show>,
+
     pub var_name: Option<String>,
     pub value_label: Option<String>,
 }
@@ -2535,7 +2574,7 @@ impl TextValue {
 #[derive(Clone, Debug, Serialize)]
 pub struct TemplateValue {
     pub args: Vec<Vec<Value>>,
-    pub local: String,
+    pub localized: String,
     pub id: String,
 }
 
@@ -2635,3 +2674,165 @@ impl ValueInner {
         }
     }
 }
+
+pub struct MetadataEntry {
+    pub name: Value,
+    pub value: MetadataValue,
+}
+
+pub enum MetadataValue {
+    Leaf(Value),
+    Group(Vec<MetadataEntry>),
+}
+
+impl MetadataEntry {
+    fn into_pivot_table(self) -> PivotTable {
+        let mut data = Vec::new();
+        let group = match self.visit(&mut data) {
+            Category::Group(group) => group,
+            Category::Leaf(leaf) => Group::new("Metadata").with(leaf),
+        };
+        PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
+            data.into_iter()
+                .enumerate()
+                .map(|(row, value)| ([row], value)),
+        )
+    }
+    fn visit(self, data: &mut Vec<Value>) -> Category {
+        match self.value {
+            MetadataValue::Leaf(value) => {
+                data.push(value);
+                Leaf::new(self.name).into()
+            }
+            MetadataValue::Group(items) => Group::with_capacity(self.name, items.len())
+                .with_multiple(items.into_iter().map(|item| item.visit(data)))
+                .into(),
+        }
+    }
+}
+
+impl Serialize for MetadataValue {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match self {
+            MetadataValue::Leaf(value) => value.serialize_bare(serializer),
+            MetadataValue::Group(items) => {
+                let mut map = serializer.serialize_map(Some(items.len()))?;
+                for item in items {
+                    let name = item.name.display(()).to_string();
+                    map.serialize_entry(&name, &item.value)?;
+                }
+                map.end()
+            }
+        }
+    }
+}
+impl Serialize for MetadataEntry {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match &self.value {
+            MetadataValue::Leaf(value) => {
+                let mut map = serializer.serialize_map(Some(1))?;
+                let name = self.name.display(()).to_string();
+                map.serialize_entry(&name, &BareValue(&value))?;
+                map.end()
+            }
+            MetadataValue::Group(items) => {
+                let mut map = serializer.serialize_map(Some(items.len()))?;
+                for item in items {
+                    let name = item.name.display(()).to_string();
+                    map.serialize_entry(&name, &item.value)?;
+                }
+                map.end()
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::output::pivot::{Display26Adic, MetadataEntry, MetadataValue, Value};
+
+    #[test]
+    fn display_26adic() {
+        for (number, lowercase, uppercase) in [
+            (0, "", ""),
+            (1, "a", "A"),
+            (2, "b", "B"),
+            (26, "z", "Z"),
+            (27, "aa", "AA"),
+            (28, "ab", "AB"),
+            (29, "ac", "AC"),
+            (18278, "zzz", "ZZZ"),
+            (18279, "aaaa", "AAAA"),
+            (19010, "abcd", "ABCD"),
+        ] {
+            assert_eq!(Display26Adic::new_lowercase(number).to_string(), lowercase);
+            assert_eq!(Display26Adic::new_uppercase(number).to_string(), uppercase);
+        }
+    }
+
+    #[test]
+    fn metadata_entry() {
+        let tree = MetadataEntry {
+            name: Value::from("Group"),
+            value: MetadataValue::Group(vec![
+                MetadataEntry {
+                    name: Value::from("Name 1"),
+                    value: MetadataValue::Leaf(Value::from("Value 1")),
+                },
+                MetadataEntry {
+                    name: Value::from("Subgroup 1"),
+                    value: MetadataValue::Group(vec![
+                        MetadataEntry {
+                            name: Value::from("Subname 1"),
+                            value: MetadataValue::Leaf(Value::from("Subvalue 1")),
+                        },
+                        MetadataEntry {
+                            name: Value::from("Subname 2"),
+                            value: MetadataValue::Leaf(Value::from("Subvalue 2")),
+                        },
+                        MetadataEntry {
+                            name: Value::from("Subname 3"),
+                            value: MetadataValue::Leaf(Value::new_integer(Some(3.0))),
+                        },
+                    ]),
+                },
+                MetadataEntry {
+                    name: Value::from("Name 2"),
+                    value: MetadataValue::Leaf(Value::from("Value 2")),
+                },
+            ]),
+        };
+        assert_eq!(
+            serde_json::to_string_pretty(&tree).unwrap(),
+            r#"{
+  "Name 1": "Value 1",
+  "Subgroup 1": {
+    "Subname 1": "Subvalue 1",
+    "Subname 2": "Subvalue 2",
+    "Subname 3": 3.0
+  },
+  "Name 2": "Value 2"
+}"#
+        );
+
+        assert_eq!(
+            tree.into_pivot_table().to_string(),
+            r#"╭────────────────────┬──────────╮
+│           Name 1   │Value 1   │
+├────────────────────┼──────────┤
+│Subgroup 1 Subname 1│Subvalue 1│
+│           Subname 2│Subvalue 2│
+│           Subname 3│         3│
+├────────────────────┼──────────┤
+│           Name 2   │Value 2   │
+╰────────────────────┴──────────╯
+"#
+        );
+    }
+}
index 21854df5ef39364fc2f4a8fbc037a4235538d1fe..9df728fec1e5f11f95e9a6d01ec221ed7204bae8 100644 (file)
@@ -1278,13 +1278,13 @@ impl BinWrite for Value {
                     format: number.format,
                     honor_small: number.honor_small,
                 };
-                if number.var_name.is_some() || number.value_label.is_some() {
+                if number.variable.is_some() || number.value_label.is_some() {
                     (
                         2u8,
                         ValueMod::new(self),
                         format,
                         number.value.unwrap_or(f64::MIN),
-                        SpvString::optional(&number.var_name),
+                        SpvString::optional(&number.variable),
                         SpvString::optional(&number.value_label),
                         Show::as_spv(&number.show),
                     )
@@ -1343,7 +1343,7 @@ impl BinWrite for Value {
                 (
                     0u8,
                     ValueMod::new(self),
-                    SpvString(&template.local),
+                    SpvString(&template.localized),
                     template.args.len() as u32,
                 )
                     .write_options(writer, endian, args)?;
index f6ebfd406b504daadbd49743d0bf6745f6ef2eab..75218b794f2031c664f3203b7a76ab70c364731a 100644 (file)
@@ -543,6 +543,20 @@ pub enum Record {
 }
 
 impl Record {
+    pub fn as_encoding_record(&self) -> Option<&EncodingRecord> {
+        match self {
+            Record::Encoding(encoding_record) => Some(encoding_record),
+            _ => None,
+        }
+    }
+
+    pub fn as_integer_info_record(&self) -> Option<&IntegerInfoRecord> {
+        match self {
+            Record::IntegerInfo(integer_info_record) => Some(integer_info_record),
+            _ => None,
+        }
+    }
+
     pub fn as_long_string_missing_values(
         &self,
     ) -> Option<&LongStringMissingValueRecord<ByteString>> {
@@ -806,24 +820,7 @@ pub fn infer_encoding(
     records: &[Record],
     mut warn: impl FnMut(Warning),
 ) -> Result<&'static Encoding, Error> {
-    // Get the character encoding from the first (and only) encoding record.
-    let encoding = records
-        .iter()
-        .filter_map(|record| match record {
-            Record::Encoding(record) => Some(record.0.as_str()),
-            _ => None,
-        })
-        .next();
-
-    // Get the character code from the first (only) integer info record.
-    let character_code = records
-        .iter()
-        .filter_map(|record| match record {
-            Record::IntegerInfo(record) => Some(record.inner.character_code),
-            _ => None,
-        })
-        .next();
-
+    let (encoding, character_code) = get_encoding_info(records);
     match get_encoding(encoding, character_code) {
         Ok(encoding) => Ok(encoding),
         Err(err @ EncodingError::Ebcdic) => Err(Error::new(None, err.into())),
@@ -835,6 +832,31 @@ pub fn infer_encoding(
     }
 }
 
+pub fn get_encoding_info(records: &[Record]) -> (Option<&str>, Option<i32>) {
+    (
+        get_encoding_record(records).map(|r| r.0.as_str()),
+        get_integer_info_record(records).map(|r| r.inner.character_code),
+    )
+}
+
+pub fn get_encoding_record<'a, I>(iter: I) -> Option<&'a EncodingRecord>
+where
+    I: IntoIterator<Item = &'a Record>,
+{
+    iter.into_iter()
+        .filter_map(|record| record.as_encoding_record())
+        .next()
+}
+
+pub fn get_integer_info_record<'a, I>(iter: I) -> Option<&'a IntegerInfoRecord>
+where
+    I: IntoIterator<Item = &'a Record>,
+{
+    iter.into_iter()
+        .filter_map(|record| record.as_integer_info_record())
+        .next()
+}
+
 /// An [Encoding] along with a function to report decoding errors.
 ///
 /// This is used by functions that decode raw records.