work
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 23 Oct 2025 15:23:46 +0000 (08:23 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Thu, 23 Oct 2025 15:23:46 +0000 (08:23 -0700)
rust/pspp/src/data.rs
rust/pspp/src/format.rs
rust/pspp/src/output/pivot.rs
rust/pspp/src/output/spv/legacy_xml.rs
rust/pspp/src/output/spv/light.rs
rust/pspp/src/pc.rs
rust/pspp/src/settings.rs
rust/pspp/src/variable.rs

index 5552b813f5b74ecedeb74d2f9104a96a4b436d3f..c2edc22f126e3343e2d3fe213d7314801882739f 100644 (file)
@@ -574,6 +574,50 @@ impl<B> Datum<B> {
         matches!(self, Self::String(_))
     }
 
+    pub fn is_number_and<F>(&self, f: F) -> bool
+    where
+        F: FnOnce(Option<f64>) -> bool,
+    {
+        if let Self::Number(number) = self {
+            f(*number)
+        } else {
+            false
+        }
+    }
+
+    pub fn is_string_or<F>(&self, f: F) -> bool
+    where
+        F: FnOnce(Option<f64>) -> bool,
+    {
+        if let Self::Number(number) = self {
+            f(*number)
+        } else {
+            true
+        }
+    }
+
+    pub fn is_string_and<F>(&self, f: F) -> bool
+    where
+        F: FnOnce(&B) -> bool,
+    {
+        if let Self::String(string) = self {
+            f(string)
+        } else {
+            false
+        }
+    }
+
+    pub fn is_number_or<F>(&self, f: F) -> bool
+    where
+        F: FnOnce(&B) -> bool,
+    {
+        if let Self::String(string) = self {
+            f(string)
+        } else {
+            true
+        }
+    }
+
     /// Returns the number inside this datum, or `None` if this is a string
     /// datum.
     pub fn as_number(&self) -> Option<Option<f64>> {
index 88abd6b97603bbf5721d08e4de0f57e3e9b1cbcd..744dedbdcab9852b3ff268cf676ff50059d67787 100644 (file)
@@ -503,55 +503,55 @@ impl Serialize for Format {
     }
 }
 
-impl Format {
-    pub const F40: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 0,
-    };
-
-    pub const F40_1: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 1,
-    };
-
-    pub const F40_2: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 2,
-    };
-
-    pub const F40_3: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 3,
-    };
-
-    pub const PCT40_1: Format = Format {
-        type_: Type::Pct,
-        w: 40,
-        d: 1,
-    };
-
-    pub const F8_0: Format = Format {
-        type_: Type::F,
-        w: 8,
-        d: 0,
-    };
-
-    pub const F8_2: Format = Format {
-        type_: Type::F,
-        w: 8,
-        d: 2,
-    };
-
-    pub const DATETIME40_0: Format = Format {
-        type_: Type::DateTime,
-        w: 40,
-        d: 0,
-    };
+pub const F40: Format = Format {
+    type_: Type::F,
+    w: 40,
+    d: 0,
+};
 
+pub const F40_1: Format = Format {
+    type_: Type::F,
+    w: 40,
+    d: 1,
+};
+
+pub const F40_2: Format = Format {
+    type_: Type::F,
+    w: 40,
+    d: 2,
+};
+
+pub const F40_3: Format = Format {
+    type_: Type::F,
+    w: 40,
+    d: 3,
+};
+
+pub const PCT40_1: Format = Format {
+    type_: Type::Pct,
+    w: 40,
+    d: 1,
+};
+
+pub const F8_0: Format = Format {
+    type_: Type::F,
+    w: 8,
+    d: 0,
+};
+
+pub const F8_2: Format = Format {
+    type_: Type::F,
+    w: 8,
+    d: 2,
+};
+
+pub const DATETIME40_0: Format = Format {
+    type_: Type::DateTime,
+    w: 40,
+    d: 0,
+};
+
+impl Format {
     pub fn type_(self) -> Type {
         self.type_
     }
index b68d936e340500f175f3698f54dfc55eef7bf0ba..33820af281bf7610d311d0121cd7904aad838a50 100644 (file)
@@ -75,7 +75,10 @@ use tlo::parse_tlo;
 use crate::{
     calendar::date_time_to_pspp,
     data::{ByteString, Datum, EncodedString, RawString},
-    format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
+    format::{
+        DATETIME40_0, Decimal, F8_2, F40, F40_2, F40_3, Format, PCT40_1,
+        Settings as FormatSettings, Type, UncheckedFormat,
+    },
     settings::{Settings, Show},
     util::ToSmallString,
     variable::{VarType, Variable},
@@ -418,12 +421,12 @@ impl PivotTable {
     pub fn insert_number(&mut self, data_indexes: &[usize], number: Option<f64>, class: Class) {
         let format = match class {
             Class::Other => Settings::global().default_format,
-            Class::Integer => Format::F40,
-            Class::Correlations => Format::F40_3,
-            Class::Significance => Format::F40_3,
-            Class::Percent => Format::PCT40_1,
-            Class::Residual => Format::F40_2,
-            Class::Count => Format::F40, // XXX
+            Class::Integer => F40,
+            Class::Correlations => F40_3,
+            Class::Significance => F40_3,
+            Class::Percent => PCT40_1,
+            Class::Residual => F40_2,
+            Class::Count => F40, // XXX
         };
         let value = Value::new(ValueInner::Number(NumberValue {
             show: None,
@@ -1610,7 +1613,7 @@ impl Default for PivotTableStyle {
             settings: FormatSettings::default(), // XXX from settings
             grouping: None,
             small: 0.0001, // XXX from settings.
-            weight_format: Format::F40,
+            weight_format: F40,
         }
     }
 }
@@ -2244,7 +2247,7 @@ impl Value {
         }
     }
     pub fn new_date_time(date_time: NaiveDateTime) -> Self {
-        Self::new_number_with_format(Some(date_time_to_pspp(date_time)), Format::DATETIME40_0)
+        Self::new_number_with_format(Some(date_time_to_pspp(date_time)), DATETIME40_0)
     }
     pub fn new_number_with_format(x: Option<f64>, format: Format) -> Self {
         Self::new(ValueInner::Number(NumberValue {
@@ -2306,10 +2309,10 @@ impl Value {
         }
     }
     pub fn new_number(x: Option<f64>) -> Self {
-        Self::new_number_with_format(x, Format::F8_2)
+        Self::new_number_with_format(x, F8_2)
     }
     pub fn new_integer(x: Option<f64>) -> Self {
-        Self::new_number_with_format(x, Format::F40)
+        Self::new_number_with_format(x, F40)
     }
     pub fn new_text(s: impl Into<String>) -> Self {
         Self::new_user_text(s)
index e7a7657bdedc2ce5c56a83fd8a63a7ab8c0b0465..d52e1325bdd7465a509b03f15e5d2fad702a01a5 100644 (file)
@@ -28,7 +28,7 @@ use serde::{Deserialize, de::Error as _};
 
 use crate::{
     data::Datum,
-    format::{Decimal::Dot, Type, UncheckedFormat},
+    format::{Decimal::Dot, F8_0, Type, UncheckedFormat},
     output::{
         pivot::{
             Area, AreaStyle, Color, HeadingRegion, HorzAlign, Look, PivotTable, RowParity, Value,
@@ -56,9 +56,14 @@ impl<'de, T> Deserialize<'de> for Ref<T> {
     }
 }
 
+#[derive(Clone, Debug, Default)]
 struct Map(HashMap<OrderedFloat<f64>, Datum<String>>);
 
 impl Map {
+    fn new() -> Self {
+        Self::default()
+    }
+
     fn remap_formats(
         &mut self,
         format: &Option<Format>,
@@ -99,7 +104,7 @@ impl Map {
             self.0.insert(OrderedFloat(relabel.from), value);
             // XXX warn on duplicate
         }
-        (format.unwrap_or(crate::format::Format::F8_0), affixes)
+        (format.unwrap_or(F8_0), affixes)
     }
 
     fn apply(&self, data: &mut Vec<DataValue>) {
@@ -113,6 +118,37 @@ impl Map {
             }
         }
     }
+
+    fn insert_labels(
+        &mut self,
+        data: &[DataValue],
+        label_series: &Series,
+        format: crate::format::Format,
+    ) {
+        for (value, label) in data.iter().zip(label_series.values.iter()) {
+            if let Some(Some(number)) = value.value.as_number() {
+                let dest = match &label.value {
+                    Datum::Number(_) => label.value.display(format).with_stretch().to_string(),
+                    Datum::String(s) => s.clone(),
+                };
+                self.0.insert(OrderedFloat(number), Datum::String(dest));
+            }
+        }
+    }
+
+    fn remap_vmes(&mut self, value_map: &[ValueMapEntry]) {
+        for vme in value_map {
+            for from in vme.from.split(';') {
+                let from = from.trim().parse::<f64>().unwrap(); // XXX
+                let to = if let Ok(to) = vme.to.trim().parse::<f64>() {
+                    Datum::Number(Some(to))
+                } else {
+                    Datum::String(vme.to.clone())
+                };
+                self.0.insert(OrderedFloat(from), to);
+            }
+        }
+    }
 }
 
 #[derive(Deserialize, Debug)]
@@ -274,114 +310,16 @@ impl Visualization {
             && (n_source > 0 || n_derived > 0)
         {
             for sv in take(&mut source_variables) {
-                let label_series = if let Some(label_variable) = &sv.label_variable {
-                    let Some(label_series) = series.get(label_variable.references.as_str()) else {
-                        source_variables.push(sv);
-                        continue;
-                    };
-                    Some(label_series)
-                } else {
-                    None
-                };
-
-                let Some(data) = data
-                    .get(&sv.source)
-                    .and_then(|source| source.get(&sv.source_name))
-                else {
-                    todo!()
-                };
-                fn remap_formats(
-                    map: &mut HashMap<OrderedFloat<f64>, Datum<String>>,
-                    format: &Option<Format>,
-                    string_format: &Option<StringFormat>,
-                ) -> (crate::format::Format, Vec<Affix>) {
-                    let (format, affixes, relabels, try_strings_as_numbers) =
-                        if let Some(format) = &format {
-                            (
-                                Some(format.decode()),
-                                format.affixes.clone(),
-                                format.relabels.as_slice(),
-                                format.try_strings_as_numbers.unwrap_or_default(),
-                            )
-                        } else if let Some(string_format) = &string_format {
-                            (
-                                None,
-                                string_format.affixes.clone(),
-                                string_format.relabels.as_slice(),
-                                false,
-                            )
-                        } else {
-                            (None, Vec::new(), [].as_slice(), false)
-                        };
-                    for relabel in relabels {
-                        let value = if try_strings_as_numbers
-                            && let Ok(to) = relabel.to.trim().parse::<f64>()
-                        {
-                            Datum::Number(Some(to))
-                        } else if let Some(format) = format
-                            && let Ok(to) = relabel.to.trim().parse::<f64>()
-                        {
-                            Datum::String(
-                                Datum::<String>::Number(Some(to))
-                                    .display(format)
-                                    .with_stretch()
-                                    .to_string(),
-                            )
-                        } else {
-                            Datum::String(relabel.to.clone())
-                        };
-                        map.insert(OrderedFloat(relabel.from), value);
-                        // XXX warn on duplicate
-                    }
-                    (format.unwrap_or(crate::format::Format::F8_0), affixes)
-                }
-                let mut mapping = HashMap::new();
-                let (format, affixes) = remap_formats(&mut mapping, &sv.format, &sv.string_format);
-                fn execute_mapping(
-                    mapping: &HashMap<OrderedFloat<f64>, Datum<String>>,
-                    data: &mut Vec<DataValue>,
-                ) {
-                    for value in data {
-                        let Datum::Number(Some(number)) = value.value else {
-                            continue;
-                        };
-                        if let Some(to) = mapping.get(&OrderedFloat(number)) {
-                            value.index = Some(number);
-                            value.value = to.clone();
-                        }
-                    }
-                }
-                let mut data = data.clone();
-                if !mapping.is_empty() {
-                    execute_mapping(&mapping, &mut data);
-                } else if let Some(label_series) = label_series {
-                    for (value, label) in data.iter().zip(label_series.values.iter()) {
-                        if let Some(Some(number)) = value.value.as_number() {
-                            let dest = match &label.value {
-                                Datum::Number(_) => {
-                                    label.value.display(format).with_stretch().to_string()
-                                }
-                                Datum::String(s) => s.clone(),
-                            };
-                            mapping.insert(OrderedFloat(number), Datum::String(dest));
-                        }
+                match sv.decode(&data, &series) {
+                    Ok(s) => {
+                        series.insert(&sv.id, s);
                     }
+                    Err(()) => source_variables.push(sv),
                 }
-                series.insert(
-                    &sv.id,
-                    Series {
-                        label: sv.label.clone(),
-                        format,
-                        remapped: false,
-                        values: data,
-                        mapping,
-                        affixes,
-                    },
-                );
             }
 
             for dv in take(&mut derived_variables) {
-                let mut data = if dv.value == "constant(0)" {
+                let mut values = if dv.value == "constant(0)" {
                     let n_values = if let Some(series) = series.values().next() {
                         series.values.len()
                     } else {
@@ -407,29 +345,27 @@ impl Visualization {
                 } else {
                     unreachable!()
                 };
-                let mut mapping = HashMap::new();
-                for vme in &dv.value_map {
-                    for from in vme.from.split(';') {
-                        let from = from.trim().parse::<f64>().unwrap(); // XXX
-                        let to = if let Ok(to) = vme.to.trim().parse::<f64>() {
-                            Datum::Number(Some(to))
-                        } else {
-                            Datum::String(vme.to.clone())
-                        };
-                        mapping.insert(OrderedFloat(from), to);
-                    }
-                }
-                if !mapping.is_empty() {
-                    for value in &mut data {
-                        let Datum::Number(Some(number)) = value.value else {
-                            continue;
-                        };
-                        if let Some(to) = mapping.get(&OrderedFloat(number)) {
-                            value.index = Some(number);
-                            value.value = to.clone();
-                        }
-                    }
+                let mut map = Map::new();
+                map.remap_vmes(&dv.value_map);
+                map.apply(&mut values);
+                map.remap_formats(&dv.format, &dv.string_format);
+                if values
+                    .iter()
+                    .all(|value| value.value.is_string_and(|s| s.is_empty()))
+                {
+                    values.clear();
                 }
+                series.insert(
+                    &dv.id,
+                    Series {
+                        label: None,
+                        format: F8_0,
+                        remapped: false,
+                        values,
+                        map,
+                        affixes: Vec::new(),
+                    },
+                );
             }
         }
 
@@ -442,7 +378,7 @@ struct Series {
     format: crate::format::Format,
     remapped: bool,
     values: Vec<DataValue>,
-    mapping: HashMap<OrderedFloat<f64>, Datum<String>>,
+    map: Map,
     affixes: Vec<Affix>,
 }
 
@@ -505,6 +441,46 @@ struct SourceVariable {
     string_format: Option<StringFormat>,
 }
 
+impl SourceVariable {
+    fn decode(
+        &self,
+        data: &HashMap<String, HashMap<String, Vec<DataValue>>>,
+        series: &HashMap<&str, Series>,
+    ) -> Result<Series, ()> {
+        let label_series = if let Some(label_variable) = &self.label_variable {
+            let Some(label_series) = series.get(label_variable.references.as_str()) else {
+                return Err(());
+            };
+            Some(label_series)
+        } else {
+            None
+        };
+
+        let Some(data) = data
+            .get(&self.source)
+            .and_then(|source| source.get(&self.source_name))
+        else {
+            todo!()
+        };
+        let mut map = Map::new();
+        let (format, affixes) = map.remap_formats(&self.format, &self.string_format);
+        let mut data = data.clone();
+        if !map.0.is_empty() {
+            map.apply(&mut data);
+        } else if let Some(label_series) = label_series {
+            map.insert_labels(&data, label_series, format);
+        }
+        Ok(Series {
+            label: self.label.clone(),
+            format,
+            remapped: false,
+            values: data,
+            map,
+            affixes,
+        })
+    }
+}
+
 #[derive(Deserialize, Debug)]
 #[serde(rename_all = "camelCase")]
 struct DerivedVariable {
index fa5a119022fd64d40daca491f30bb48e85e9ba38..217e36993aa9a23aa64d980fbfefaa84663b7e59 100644 (file)
@@ -17,13 +17,10 @@ use enum_map::{EnumMap, enum_map};
 
 use crate::{
     format::{
-        CC, Decimal, Decimals, Epoch, Format, NumberStyle, Settings, Type, UncheckedFormat, Width,
+        Decimal, Decimals, Epoch, Format, NumberStyle, Settings, Type, UncheckedFormat, Width, CC, F40
     },
     output::pivot::{
-        self, AreaStyle, Axis2, Axis3, BoxBorder, Color, FootnoteMarkerPosition,
-        FootnoteMarkerType, Footnotes, Group, HeadingRegion, HorzAlign, LabelPosition, Look,
-        PivotTable, PivotTableMetadata, PivotTableStyle, PrecomputedIndex, RowColBorder, RowParity,
-        StringValue, Stroke, TemplateValue, ValueStyle, VariableValue, VertAlign, parse_bool,
+        self, parse_bool, AreaStyle, Axis2, Axis3, BoxBorder, Color, FootnoteMarkerPosition, FootnoteMarkerType, Footnotes, Group, HeadingRegion, HorzAlign, LabelPosition, Look, PivotTable, PivotTableMetadata, PivotTableStyle, PrecomputedIndex, RowColBorder, RowParity, StringValue, Stroke, TemplateValue, ValueStyle, VariableValue, VertAlign
     },
     settings::Show,
 };
@@ -180,7 +177,7 @@ impl LightTable {
                     b",.' ".contains(&grouping).then_some(grouping as char)
                 },
                 small: n3.map_or(0.0, |n3| n3.small),
-                weight_format: Format::F40,
+                weight_format: F40,
             })
             .with_metadata(PivotTableMetadata {
                 command_local: y1.map(|y1| y1.command_local.decode(encoding)),
index ce484496a7877e656b267e94c8312c0ace2b6dc7..9627f29aa6850ff72b4fdfd74bbdd85aeaa4957b 100644 (file)
@@ -211,9 +211,7 @@ impl<R> Cases<R> {
         match result {
             Ok(Some(mut raw_case)) => {
                 for datum in &mut raw_case.0 {
-                    if let Datum::Number(Some(number)) = datum
-                        && *number == self.sysmis
-                    {
+                    if datum.is_number_and(|number| number == Some(self.sysmis)) {
                         *datum = Datum::Number(None);
                     }
                 }
index aac4a4c8a83953db1dbff2ed9e0d9d9f27fcd923..d53a8c29a1206a62150847d7ffd7b786a663a92b 100644 (file)
@@ -21,7 +21,7 @@ use enum_map::EnumMap;
 use serde::Serialize;
 
 use crate::{
-    format::{Format, Settings as FormatSettings},
+    format::{Format, Settings as FormatSettings, F8_2},
     message::Severity,
     output::pivot::Look,
 };
@@ -136,7 +136,7 @@ impl Default for Settings {
             macros: MacroSettings::default(),
             max_loops: 40,
             workspace: 64 * 1024 * 1024,
-            default_format: Format::F8_2,
+            default_format: F8_2,
             testing: false,
             fuzz_bits: 6,
             scale_min: 24,
index 6054cf65caed69c5a96f6687f8469a04d989a401..fca523b9750792bd88dc8d7052fc7e448b918d57 100644 (file)
@@ -743,7 +743,7 @@ impl<'a> MissingValuesMut<'a> {
             Err(MissingValuesError::TooMany)
         } else if value.var_type() != VarType::from(self.width) {
             Err(MissingValuesError::MixedTypes)
-        } else if value == Datum::Number(None) {
+        } else if value.is_sysmis() {
             Err(MissingValuesError::SystemMissing)
         } else if value.resize(self.width.min(VarWidth::String(8))).is_err() {
             Err(MissingValuesError::TooWide)