get rid of datavalue
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 3 Jan 2026 20:02:53 +0000 (12:02 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 3 Jan 2026 20:02:53 +0000 (12:02 -0800)
rust/pspp/src/cli/show_spv.rs
rust/pspp/src/spv/read/legacy_xml.rs

index 087baa243dcdaa074967d80eeced5fdc6c30dfed..6b4c117798f703835a95296718820baeddf7f742 100644 (file)
@@ -27,7 +27,7 @@ use pspp::{
         legacy_bin::LegacyBin,
         read::{
             ReadSeek,
-            legacy_xml::{DataValue, Visualization},
+            legacy_xml::{Visualization, datum_as_format},
             structure::OutlineItem,
         },
     },
@@ -206,14 +206,8 @@ impl ShowSpv {
                     {
                         for (value_index, data_value) in values.iter().enumerate() {
                             let value = Value::new_datum(data_value).with_value_label(
-                                (variable_name == "cellFormat").then(|| {
-                                    DataValue {
-                                        value: data_value.clone(),
-                                        index: None,
-                                    }
-                                    .as_format()
-                                    .to_string()
-                                }),
+                                (variable_name == "cellFormat")
+                                    .then(|| datum_as_format(data_value).to_string()),
                             );
                             pivot_table.insert([value_index, variable_index], value);
                         }
@@ -282,10 +276,8 @@ impl ShowSpv {
                         PivotTable::new([(Axis3::Y, index), (Axis3::X, variables)]);
                     for (series_index, series) in series.values().enumerate() {
                         for (value_index, data_value) in series.values.iter().enumerate() {
-                            pivot_table.insert(
-                                [value_index, series_index],
-                                Value::new_datum(&data_value.value),
-                            );
+                            pivot_table
+                                .insert([value_index, series_index], Value::new_datum(data_value));
                         }
                     }
                     println!("{pivot_table}");
index 9c60440867ebbdbd2c0e9180c42ea21a3cc3dab8..2610c0e0f48778c0edacfc7f064407992812dbf0 100644 (file)
@@ -44,56 +44,37 @@ use crate::{
     spv::read::light::decode_format,
 };
 
-/// One data value.
-#[derive(Clone, Debug)]
-pub struct DataValue {
-    /// Optional index.
-    pub index: Option<usize>,
-
-    /// Data value.
-    pub value: Datum<String>,
+/// Interprets this data value as a [Format], first by looking it up in
+/// `format_map` and otherwise by interpreting it as a [Format] directly.
+///
+/// This should probably be a method on some hypothetical FormatMap.
+pub fn datum_as_format(datum: &Datum<String>) -> crate::format::Format {
+    let f = match datum {
+        Datum::Number(Some(number)) => *number as i64,
+        Datum::Number(None) => 0,
+        Datum::String(s) => s.parse().unwrap_or_default(),
+    };
+    decode_format(f as u32, &mut |_| () /*XXX*/)
 }
 
-impl DataValue {
-    /// Category index, if any.  This is the numeric value of the datum, if
-    /// there is one, falling back to the index.
-    pub fn category(&self) -> Option<usize> {
-        self.index
-    }
-
-    /// Interprets this data value as a [Format], first by looking it up in
-    /// `format_map` and otherwise by interpreting it as a [Format] directly.
-    ///
-    /// This should probably be a method on some hypothetical FormatMap.
-    pub fn as_format(&self) -> crate::format::Format {
-        let f = match &self.value {
-            Datum::Number(Some(number)) => *number as i64,
-            Datum::Number(None) => 0,
-            Datum::String(s) => s.parse().unwrap_or_default(),
-        };
-        decode_format(f as u32, &mut |_| () /*XXX*/)
-    }
-
-    /// Returns this data value interpreted using `format`.
-    pub fn as_pivot_value(&self, format: crate::format::Format) -> Value {
-        if format.type_().category() == crate::format::Category::Date
-            && let Some(s) = self.value.as_string()
-            && let Ok(date_time) =
-                NaiveDateTime::parse_from_str(s.as_str(), "%Y-%m-%dT%H:%M:%S%.3f")
-        {
-            Value::new_date(date_time)
-        } else if format.type_().category() == crate::format::Category::Time
-            && let Some(s) = self.value.as_string()
-            && let Ok(time) = NaiveTime::parse_from_str(s.as_str(), "%H:%M:%S%.3f")
-        {
-            Value::new_time(time)
-        } else if !self.value.is_sysmis() {
-            Value::new_datum(&self.value)
-        } else {
-            Value::new_empty()
-        }
-        .with_format(format)
+/// Returns this data value interpreted using `format`.
+fn datum_as_pivot_value(datum: &Datum<String>, format: crate::format::Format) -> Value {
+    if format.type_().category() == crate::format::Category::Date
+        && let Some(s) = datum.as_string()
+        && let Ok(date_time) = NaiveDateTime::parse_from_str(s.as_str(), "%Y-%m-%dT%H:%M:%S%.3f")
+    {
+        Value::new_date(date_time)
+    } else if format.type_().category() == crate::format::Category::Time
+        && let Some(s) = datum.as_string()
+        && let Ok(time) = NaiveTime::parse_from_str(s.as_str(), "%H:%M:%S%.3f")
+    {
+        Value::new_time(time)
+    } else if !datum.is_sysmis() {
+        Value::new_datum(&datum)
+    } else {
+        Value::new_empty()
     }
+    .with_format(format)
 }
 
 #[derive(Debug)]
@@ -158,26 +139,26 @@ impl Map {
         map
     }
 
-    fn apply(&self, data: &mut Vec<DataValue>) {
+    fn apply(&self, data: &mut Vec<Datum<String>>) {
         for value in data {
-            if let Datum::Number(Some(number)) = value.value
-                && let Some(to) = self.0.get(&OrderedFloat(number))
+            if let Datum::Number(Some(number)) = value
+                && let Some(to) = self.0.get(&OrderedFloat(*number))
             {
-                value.value = to.clone();
+                *value = to.clone();
             }
         }
     }
 
     fn insert_labels(
         &mut self,
-        data: &[DataValue],
+        data: &[Datum<String>],
         label_series: &Series,
         format: crate::format::Format,
     ) {
         for (value, label) in data.iter().zip(label_series.values.iter()) {
-            if let Some(Some(number)) = value.value.as_number() {
-                let dest = match &label.value {
-                    Datum::Number(_) => label.value.display(format).with_stretch().to_string(),
+            if let Some(Some(number)) = value.as_number() {
+                let dest = match label {
+                    Datum::Number(_) => label.display(format).with_stretch().to_string(),
                     Datum::String(s) => s.clone(),
                 };
                 self.0.insert(OrderedFloat(number), Datum::String(dest));
@@ -433,8 +414,7 @@ impl Visualization {
         'outer: for (i, cell) in cell.values.iter().enumerate() {
             coords.clear();
             for dim in dims {
-                if let Some(coordinate) = dim.coordinate.values.get(i)
-                    && let Some(coordinate) = coordinate.category()
+                if let Some(Some(coordinate)) = dim.coordinate.categories.get(i)
                     && let Some(locator) =
                         dim.coordinate.coordinate_to_index.borrow().get(&coordinate)
                     && let Some(index) = locator.as_leaf()
@@ -449,16 +429,16 @@ impl Visualization {
             let format = if let Some(cell_formats) = &cell_formats
                 && let Some(value) = cell_formats.values.get(i)
             {
-                value.as_format()
+                datum_as_format(value)
             } else {
                 F40_2
             };
-            let mut value = cell.as_pivot_value(format);
+            let mut value = datum_as_pivot_value(cell, format);
 
             if let Some(cell_footnotes) = &cell_footnotes
                 && let Some(dv) = cell_footnotes.values.get(i)
             {
-                if let Some(s) = dv.value.as_string() {
+                if let Some(s) = dv.as_string() {
                     for part in s.split(',') {
                         if let Ok(index) = part.parse::<usize>()
                             && let Some(index) = index.checked_sub(1)
@@ -470,9 +450,8 @@ impl Visualization {
                 }
             }
             if let Some((series, mappings)) = &markers
-                && let Some(dv) = series.values.get(i)
-                && let Some(category) = dv.category()
-                && let Some(marker) = mappings.get(&category)
+                && let Some(Some(category)) = series.categories.get(i)
+                && let Some(marker) = mappings.get(category)
             {
                 value.add_subscript(*marker);
             }
@@ -774,7 +753,8 @@ impl Visualization {
 pub struct Series {
     pub name: String,
     label: Option<String>,
-    pub values: Vec<DataValue>,
+    categories: Vec<Option<usize>>,
+    pub values: Vec<Datum<String>>,
     affixes: Vec<Affix>,
     coordinate_to_index: RefCell<BTreeMap<usize, CategoryLocator>>,
     dimension_index: Cell<Option<usize>>,
@@ -789,10 +769,11 @@ impl Debug for Series {
 }
 
 impl Series {
-    fn new(name: String, values: Vec<DataValue>) -> Self {
+    fn new(name: String, categories: Vec<Option<usize>>, values: Vec<Datum<String>>) -> Self {
         Self {
             name,
             label: None,
+            categories,
             values,
             affixes: Vec::new(),
             coordinate_to_index: Default::default(),
@@ -806,8 +787,8 @@ impl Series {
         Self { affixes, ..self }
     }
 
-    fn new_name(&self, dv: &DataValue, footnotes: &pivot::Footnotes) -> Value {
-        let mut name = Value::new_datum(&dv.value);
+    fn new_name(&self, datum: &Datum<String>, footnotes: &pivot::Footnotes) -> Value {
+        let mut name = Value::new_datum(datum);
         for affix in &self.affixes {
             if let Some(index) = affix.defines_reference.checked_sub(1)
                 && let Ok(index) = usize::try_from(index)
@@ -895,21 +876,21 @@ impl SourceVariable {
             None
         };
 
-        let mut data = if let Some(source) = data.get(&self.source)
+        let (categories, mut data) = if let Some(source) = data.get(&self.source)
             && let Some(values) = source.get(&self.variable)
         {
-            values
+            let categories = values
                 .iter()
-                .map(|value| DataValue {
-                    index: value
+                .map(|value| {
+                    value
                         .as_number()
                         .flatten()
-                        .and_then(|v| (v >= 0.0 && v < usize::MAX as f64).then_some(v as usize)),
-                    value: value.clone(),
+                        .and_then(|v| (v >= 0.0 && v < usize::MAX as f64).then_some(v as usize))
                 })
-                .collect()
+                .collect();
+            (categories, values.clone())
         } else {
-            Vec::new()
+            (Vec::new(), Vec::new())
         };
         if let Some(format) = &self.format {
             Map::from_format(format).apply(&mut data);
@@ -927,7 +908,7 @@ impl SourceVariable {
         }
         series.insert(
             &self.id,
-            Series::new(self.id.clone(), data)
+            Series::new(self.id.clone(), categories, data)
                 .with_affixes(Vec::from(self.affixes()))
                 .with_label(self.label.clone()),
         );
@@ -977,12 +958,7 @@ impl DerivedVariable {
             }
 
             if let Some(n_values) = series_len(series) {
-                (0..n_values)
-                    .map(|_| DataValue {
-                        index: Some(0),
-                        value: Datum::Number(Some(0.0)),
-                    })
-                    .collect()
+                (0..n_values).map(|_| Datum::Number(Some(0.0))).collect()
             } else {
                 return false;
             }
@@ -1008,7 +984,14 @@ impl DerivedVariable {
         } else if let Some(string_format) = &self.string_format {
             Map::from_string_format(string_format).apply(&mut values);
         };
-        series.insert(&self.id, Series::new(self.id.clone(), values));
+        series.insert(
+            &self.id,
+            Series::new(
+                self.id.clone(),
+                (0..values.len()).map(|_| Some(0)).collect(),
+                values,
+            ),
+        );
         true
     }
 }
@@ -2185,9 +2168,9 @@ fn decode_dimension<'a>(
 
     // Make leaf categories.
     let mut map = BTreeMap::new();
-    for (index, value) in variables[0].values.iter().enumerate() {
-        if let Some(coordinate) = value.category() {
-            map.entry(coordinate).or_insert(index);
+    for (index, category) in variables[0].categories.iter().enumerate() {
+        if let Some(coordinate) = category {
+            map.entry(*coordinate).or_insert(index);
         }
     }
     let mut coordinate_to_index = BTreeMap::new();
@@ -2211,8 +2194,8 @@ fn decode_dimension<'a>(
         let mut start = 0;
         for end in 1..=cats.len() {
             let dv1 = &variable.values[cats[start].index];
-            if end >= cats.len() || &variable.values[cats[end].index].value != &dv1.value {
-                if !dv1.value.is_spaces() {
+            if end >= cats.len() || &variable.values[cats[end].index] != dv1 {
+                if !dv1.is_spaces() {
                     let name = variable.new_name(dv1, footnotes);
                     let next_cat = CatBuilder {
                         category: Group::new(name)
@@ -2222,8 +2205,10 @@ fn decode_dimension<'a>(
                         leaves: cats[start].leaves.start..cats[end - 1].leaves.end,
                         location: cats[start].location.parent(),
                     };
-                    coordinate_to_index
-                        .insert(dv1.category().unwrap() /*XXX?*/, next_cat.location);
+                    coordinate_to_index.insert(
+                        variable.categories[cats[start].index].unwrap(), /*XXX?*/
+                        next_cat.location,
+                    );
                     next_cats.push(next_cat);
                 } else {
                     for cat in &cats[start..end] {