codepage_to_unicode tests pass

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 10 Aug 2025 03:38:36 +0000 (20:38 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 10 Aug 2025 03:38:36 +0000 (20:38 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 10 Aug 2025 03:38:36 +0000 (20:38 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 10 Aug 2025 03:38:36 +0000 (20:38 -0700)
diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs

index f17441070e044f68228ffadd1a48705ef02d6c3d..7b1364dfb9ee245f6c0e15dddc8793e206f6d341 100644 (file)
--- a/rust/pspp/src/data.rs
+++ b/rust/pspp/src/data.rs
@@ -44,8 +44,8 @@ use serde::{
  };
  
  use crate::{
-    variable::{VarType, VarWidth},
      format::DisplayPlain,
+    variable::{VarType, VarWidth},
  };
  
  pub trait RawString: Debug + PartialEq + Eq + PartialOrd + Ord + Hash {
@@ -93,6 +93,14 @@ pub trait RawString: Debug + PartialEq + Eq + PartialOrd + Ord + Hash {
          ByteStr(self.raw_string_bytes())
      }
  
+    fn without_trailing_spaces(&self) -> ByteStr<'_> {
+        let mut raw = self.raw_string_bytes();
+        while let Some(trimmed) = raw.strip_suffix(b" ") {
+            raw = trimmed;
+        }
+        ByteStr(raw)
+    }
+
      fn as_encoded(&self, encoding: &'static Encoding) -> WithEncoding<ByteStr<'_>>
      where
          Self: Sized,
diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs

index d548c9b4f7436c0d2904d508b0ea3efc9736fc15..d7db1173199d805120f484678a3e790ad88be80d 100644 (file)
--- a/rust/pspp/src/dictionary.rs
+++ b/rust/pspp/src/dictionary.rs
@@ -35,7 +35,7 @@ use thiserror::Error as ThisError;
  use unicase::UniCase;
  
  use crate::{
-    data::{ByteString, Datum},
+    data::{ByteString, Datum, RawString},
      identifier::{ByIdentifier, HasIdentifier, Identifier},
      output::pivot::{
          Axis3, Dimension, Display26Adic, Footnote, Footnotes, Group, PivotTable, Value,
@@ -86,7 +86,7 @@ pub struct Dictionary {
      pub attributes: Attributes,
  
      /// Multiple response sets.
-    pub mrsets: BTreeSet<ByIdentifier<DictIndexMultipleResponseSet>>,
+    mrsets: BTreeSet<ByIdentifier<DictIndexMultipleResponseSet>>,
  
      /// Variable sets.
      ///
@@ -170,9 +170,12 @@ pub struct InvalidWeightVariable;
  pub struct InvalidFilterVariable;
  
  /// Invalid dictionary index.
-#[derive(Debug, ThisError)]
-#[error("Invalid dictionary index.")]
-pub struct DictIndexError;
+#[derive(Debug, Clone, ThisError)]
+#[error("Invalid index {index} in dictionary with {n} variables.")]
+pub struct DictIndexError {
+    index: usize,
+    n: usize,
+}
  
  impl Dictionary {
      /// Creates a new, empty dictionary with the specified `encoding`.
@@ -274,6 +277,10 @@ impl Dictionary {
          MultipleResponseSets::new(self)
      }
  
+    pub fn mrsets_mut(&mut self) -> MultipleResponseSetsMut<'_> {
+        MultipleResponseSetsMut::new(self)
+    }
+
      pub fn variable_sets(&self) -> VariableSets<'_> {
          VariableSets::new(self)
      }
@@ -647,17 +654,17 @@ impl Dictionary {
          }
          self.variables = variables;
  
-        let mut vectors = HashSet::new();
          let mut index = 0;
-        for mut vector in self.vectors.drain() {
+        let mut vectors = self.vectors.drain().collect::<Vec<_>>();
+        vectors.sort();
+        for mut vector in vectors {
              vector.codepage_to_unicode();
-            while vectors.contains(&vector) {
+            while self.vectors.contains(&vector) {
                  index += 1;
                  vector.name = Identifier::new(format!("Vec{index}")).unwrap();
              }
-            vectors.insert(vector);
+            self.vectors.insert(vector);
          }
-        self.vectors = vectors;
  
          self.attributes.codepage_to_unicode();
  
@@ -667,7 +674,7 @@ impl Dictionary {
              mrset.codepage_to_unicode();
              while mrsets.contains(&mrset) {
                  index += 1;
-                mrset.name = Identifier::new(format!("Mr{index}")).unwrap();
+                mrset.name = Identifier::new(format!("MrSet{index}")).unwrap();
              }
              mrsets.insert(mrset);
          }
@@ -1105,14 +1112,13 @@ impl<'a> MappedVariables<'a> {
          dictionary: &'a Dictionary,
          dict_indexes: &'a [DictIndex],
      ) -> Result<Self, DictIndexError> {
-        if dict_indexes
-            .iter()
-            .all(|dict_index| *dict_index < dictionary.variables.len())
-        {
-            Ok(Self::new_unchecked(dictionary, dict_indexes))
-        } else {
-            Err(DictIndexError)
+        let n = dictionary.variables.len();
+        for index in dict_indexes.iter().copied() {
+            if index >= n {
+                return Err(DictIndexError { index, n });
+            }
          }
+        Ok(Self::new_unchecked(dictionary, dict_indexes))
      }
  
      pub fn len(&self) -> usize {
@@ -1128,6 +1134,10 @@ impl<'a> MappedVariables<'a> {
      pub fn iter(&self) -> MappedVariablesIter<'a> {
          MappedVariablesIter::new(self.dictionary, self.dict_indexes.iter())
      }
+
+    pub fn dict_indexes(&self) -> &[DictIndex] {
+        self.dict_indexes
+    }
  }
  
  impl<'a> Index<usize> for MappedVariables<'a> {
@@ -1363,6 +1373,53 @@ impl<'a> Iterator for VariableSetsIter<'a> {
      }
  }
  
+#[derive(Debug)]
+pub struct MultipleResponseSetsMut<'a>(&'a mut Dictionary);
+
+#[derive(ThisError, Clone, Debug)]
+pub enum MrSetError {
+    #[error("{0}")]
+    DictIndexError(#[from] DictIndexError),
+
+    /// Counted value {value} has width {width}, but it must be no wider than
+    /// {max_width}, the width of the narrowest variable in multiple response
+    /// set {mr_set}.
+    #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
+    TooWideMDGroupCountedValue {
+        /// Multiple response set name.
+        mr_set: Identifier,
+        /// Counted value.
+        value: String,
+        /// Width of counted value.
+        width: usize,
+        /// Maximum allowed width of counted value.
+        max_width: u16,
+    },
+
+    /// Multiple response set {0} contains both string and numeric variables.
+    #[error("Multiple response set {0} contains both string and numeric variables.")]
+    MixedMrSet(
+        /// Multiple response set name.
+        Identifier,
+    ),
+}
+
+impl<'a> MultipleResponseSetsMut<'a> {
+    fn new(dictionary: &'a mut Dictionary) -> Self {
+        Self(dictionary)
+    }
+
+    pub fn mrsets(&'a self) -> MultipleResponseSets<'a> {
+        MultipleResponseSets::new(self.0)
+    }
+
+    pub fn insert(&mut self, mrset: DictIndexMultipleResponseSet) -> Result<(), MrSetError> {
+        MultipleResponseSet::new(self.0, &mrset)?;
+        self.0.mrsets.insert(ByIdentifier(mrset));
+        Ok(())
+    }
+}
+
  #[derive(Clone, Debug)]
  pub struct MultipleResponseSets<'a>(&'a Dictionary);
  
@@ -1379,7 +1436,7 @@ impl<'a> MultipleResponseSets<'a> {
          self.0
              .mrsets
              .get(&name.0)
-            .map(|mrset| MultipleResponseSet::new(self.0, mrset))
+            .map(|mrset| MultipleResponseSet::new_unchecked(self.0, mrset))
      }
  
      pub fn iter(&self) -> MultipleResponseSetIter<'a> {
@@ -1417,7 +1474,7 @@ impl<'a> Iterator for MultipleResponseSetIter<'a> {
      fn next(&mut self) -> Option<Self::Item> {
          self.iter
              .next()
-            .map(|set| MultipleResponseSet::new(self.dictionary, set))
+            .map(|set| MultipleResponseSet::new_unchecked(self.dictionary, set))
      }
  }
  
@@ -1442,10 +1499,44 @@ pub struct MultipleResponseSet<'a> {
  }
  
  impl<'a> MultipleResponseSet<'a> {
-    fn new(dictionary: &'a Dictionary, mrset: &'a DictIndexMultipleResponseSet) -> Self {
+    fn new_unchecked(dictionary: &'a Dictionary, mrset: &'a DictIndexMultipleResponseSet) -> Self {
          Self { dictionary, mrset }
      }
  
+    fn new(
+        dictionary: &'a Dictionary,
+        mrset: &'a DictIndexMultipleResponseSet,
+    ) -> Result<Self, MrSetError> {
+        let variables = MappedVariables::new(dictionary, &mrset.variables)?;
+        let (min_width, _max_width) = Self::widths(&variables)
+            .ok_or_else(|| MrSetError::MixedMrSet(mrset.name.clone()))?
+            .into_inner();
+
+        if let MultipleResponseType::MultipleDichotomy { datum, labels: _ } = &mrset.mr_type {
+            match (datum, min_width) {
+                (Datum::Number(_), VarWidth::Numeric) => (),
+                (Datum::String(s), VarWidth::String(min_width)) => {
+                    if s.without_trailing_spaces().len() > min_width as usize {}
+                }
+                _ => return Err(MrSetError::MixedMrSet(mrset.name.clone())),
+            }
+        }
+        Ok(Self::new_unchecked(dictionary, mrset))
+    }
+
+    fn widths(variables: &MappedVariables<'_>) -> Option<RangeInclusive<VarWidth>> {
+        variables
+            .iter()
+            .map(|v| Some((v.width, v.width)))
+            .reduce(|a, b| {
+                let (na, wa) = a?;
+                let (nb, wb) = b?;
+                Some((VarWidth::narrower(na, nb)?, VarWidth::wider(wa, wb)?))
+            })
+            .flatten()
+            .map(|(min_width, max_width)| min_width..=max_width)
+    }
+
      pub fn name(&self) -> &Identifier {
          &self.mrset.name
      }
@@ -1455,7 +1546,7 @@ impl<'a> MultipleResponseSet<'a> {
      }
  
      pub fn width(&self) -> RangeInclusive<VarWidth> {
-        self.mrset.width.clone()
+        Self::widths(&self.variables()).unwrap()
      }
  
      pub fn mr_type(&self) -> &MultipleResponseType {
@@ -1491,9 +1582,6 @@ pub struct DictIndexMultipleResponseSet {
      /// A description for the set.
      pub label: String,
  
-    /// Range of widths among the variables.
-    pub width: RangeInclusive<VarWidth>,
-
      /// What kind of multiple response set this is.
      pub mr_type: MultipleResponseType,
  
@@ -1592,7 +1680,11 @@ mod tests {
      use smallvec::SmallVec;
  
      use crate::{
-        dictionary::{DictIndexVector, Dictionary},
+        data::Datum,
+        dictionary::{
+            CategoryLabels, DictIndexMultipleResponseSet, DictIndexVector, Dictionary,
+            MultipleResponseType,
+        },
          identifier::Identifier,
          variable::{VarWidth, Variable},
      };
@@ -1716,12 +1808,72 @@ mod tests {
              .vectors_mut()
              .insert(DictIndexVector {
                  name: Identifier::new("ààààààààààààààààààààààààààààààààx").unwrap(),
+                variables: vec![1, 0],
+            })
+            .unwrap();
+
+        dictionary
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
+                name: Identifier::new("üüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüasdf").unwrap(),
+                label: String::from("my mcgroup"),
+                mr_type: MultipleResponseType::MultipleCategory,
+                variables: vec![0, 1],
+            })
+            .unwrap();
+        dictionary
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
+                name: Identifier::new("üüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüquux").unwrap(),
+                label: String::new(),
+                mr_type: MultipleResponseType::MultipleDichotomy {
+                    datum: Datum::Number(Some(55.0)),
+                    labels: CategoryLabels::VarLabels,
+                },
                  variables: vec![0, 1],
              })
              .unwrap();
  
          dictionary.codepage_to_unicode();
          dbg!(&dictionary);
-        todo!()
+        assert_eq!(
+            &dictionary.variables[0].name,
+            "éééééééééééééééééééééééééééééééé"
+        );
+        assert_eq!(&dictionary.variables[1].name, "Var1");
+        assert_eq!(
+            dictionary
+                .vectors()
+                .get(&Identifier::new("àààààààààààààààààààààààààààààààà").unwrap())
+                .unwrap()
+                .variables()
+                .dict_indexes(),
+            &[0, 1]
+        );
+        assert_eq!(
+            dictionary
+                .vectors()
+                .get(&Identifier::new("Vec1").unwrap())
+                .unwrap()
+                .variables()
+                .dict_indexes(),
+            &[1, 0]
+        );
+        assert!(matches!(
+            dictionary
+                .mrsets()
+                .get(&Identifier::new("üüüüüüüüüüüüüüüüüüüüüüüüüüüüüüüü").unwrap())
+                .unwrap()
+                .mr_type(),
+            MultipleResponseType::MultipleCategory
+        ));
+        assert!(matches!(
+            dictionary
+                .mrsets()
+                .get(&Identifier::new("MrSet1").unwrap())
+                .unwrap()
+                .mr_type(),
+            MultipleResponseType::MultipleDichotomy { .. }
+        ));
      }
  }
diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs

index 049c1527918d8c5ecce51aa77443bf9a67a5865b..f6e3233a137c8cc3b21ba560ea61435f5076a210 100644 (file)
--- a/rust/pspp/src/sys/cooked.rs
+++ b/rust/pspp/src/sys/cooked.rs
@@ -28,11 +28,12 @@ use crate::{
      crypto::EncryptedFile,
      data::{ByteString, Case, Datum, MutRawString, RawString},
      dictionary::{
-        DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, MultipleResponseType,
+        DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, MrSetError,
+        MultipleResponseType,
      },
      format::{Error as FormatError, Format, UncheckedFormat},
      hexfloat::HexFloat,
-    identifier::{ByIdentifier, Error as IdError, Identifier},
+    identifier::{Error as IdError, Identifier},
      output::pivot::{Group, Value},
      sys::{
          raw::{
@@ -49,7 +50,7 @@ use crate::{
          },
          serialize_endian,
      },
-    variable::{InvalidRole, MissingValues, MissingValuesError, VarWidth, Variable},
+    variable::{InvalidRole, MissingValues, MissingValuesError, VarType, VarWidth, Variable},
  };
  use anyhow::{anyhow, Error as AnyError};
  use binrw::{BinRead, BinWrite, Endian};
@@ -195,12 +196,9 @@ pub enum Error {
          Identifier,
      ),
  
-    /// Multiple response set {0} contains both string and numeric variables.
-    #[error("Multiple response set {0} contains both string and numeric variables.")]
-    MixedMrSet(
-        /// Multiple response set name.
-        Identifier,
-    ),
+    /// Error adding multiple response set.
+    #[error("{0}")]
+    MrSetError(#[from] MrSetError),
  
      /// Invalid numeric format for counted value {number} in multiple response set {mr_set}.
      #[error(
@@ -213,21 +211,6 @@ pub enum Error {
          number: String,
      },
  
-    /// Counted value {value} has width {width}, but it must be no wider than
-    /// {max_width}, the width of the narrowest variable in multiple response
-    /// set {mr_set}.
-    #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
-    TooWideMDGroupCountedValue {
-        /// Multiple response set name.
-        mr_set: Identifier,
-        /// Counted value.
-        value: String,
-        /// Width of counted value.
-        width: usize,
-        /// Maximum allowed width of counted value.
-        max_width: u16,
-    },
-
      /// Ignoring long string value label for unknown variable {0}.
      #[error("Ignoring long string value label for unknown variable {0}.")]
      UnknownLongStringValueLabelVariable(
@@ -1077,7 +1060,7 @@ impl Records {
          {
              match DictIndexMultipleResponseSet::decode(&dictionary, record, &mut warn) {
                  Ok(mrset) => {
-                    dictionary.mrsets.insert(ByIdentifier::new(mrset));
+                    dictionary.mrsets_mut().insert(mrset).unwrap();
                  }
                  Err(error) => warn(error),
              }
@@ -1574,21 +1557,18 @@ impl DictIndexMultipleResponseSet {
              _ => (),
          }
  
-        let Some((Some(min_width), Some(max_width))) = variables
+        let Ok(var_type) = variables
              .iter()
-            .copied()
-            .map(|dict_index| dictionary.variables[dict_index].width)
-            .map(|w| (Some(w), Some(w)))
-            .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
+            .map(|dict_index| VarType::from(dictionary.variables[*dict_index].width))
+            .all_equal_value()
          else {
-            return Err(Error::MixedMrSet(mr_set_name));
+            return Err(MrSetError::MixedMrSet(mr_set_name).into());
          };
  
-        let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width)?;
+        let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, var_type)?;
  
          Ok(DictIndexMultipleResponseSet {
              name: mr_set_name,
-            width: min_width..=max_width,
              label: input.label.to_string(),
              mr_type,
              variables,
@@ -1640,12 +1620,12 @@ impl MultipleResponseType {
      fn decode(
          mr_set: &Identifier,
          input: &raw::records::MultipleResponseType,
-        min_width: VarWidth,
+        var_type: VarType,
      ) -> Result<Self, Error> {
          match input {
              raw::records::MultipleResponseType::MultipleDichotomy { value, labels } => {
-                let value = match min_width {
-                    VarWidth::Numeric => {
+                let value = match var_type {
+                    VarType::Numeric => {
                          let string = String::from_utf8_lossy(&value.0);
                          let number: f64 = string.trim().parse().map_err(|_| {
                              Error::InvalidMDGroupCountedValue {
@@ -1655,21 +1635,10 @@ impl MultipleResponseType {
                          })?;
                          Datum::Number(Some(number))
                      }
-                    VarWidth::String(max_width) => {
-                        let mut value = value.0.as_slice();
-                        while value.ends_with(b" ") {
-                            value = &value[..value.len() - 1];
-                        }
-                        let width = value.len();
-                        if width > max_width as usize {
-                            return Err(Error::TooWideMDGroupCountedValue {
-                                mr_set: mr_set.clone(),
-                                value: String::from_utf8_lossy(value).into(),
-                                width,
-                                max_width,
-                            });
-                        };
-                        Datum::String(value.into())
+                    VarType::String => {
+                        let mut value = value.clone();
+                        value.trim_end();
+                        Datum::String(value)
                      }
                  };
                  Ok(MultipleResponseType::MultipleDichotomy {
diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs

index 651d4254f68d552cc8963586d666df819dc5320b..16e54e65387ad01e4ecd4deb523e9cbbb89b5d41 100644 (file)
--- a/rust/pspp/src/sys/write.rs
+++ b/rust/pspp/src/sys/write.rs
@@ -498,13 +498,13 @@ where
          let mut output = Vec::new();
          for set in self
              .dictionary
-            .mrsets
+            .mrsets()
              .iter()
-            .filter(|set| set.mr_type.supported_before_v14() == pre_v14)
+            .filter(|set| set.mr_type().supported_before_v14() == pre_v14)
          {
-            output.extend_from_slice(&self.dictionary.encoding().encode(&set.name).0[..]);
+            output.extend_from_slice(&self.dictionary.encoding().encode(set.name()).0[..]);
              output.push(b'=');
-            match &set.mr_type {
+            match set.mr_type() {
                  MultipleResponseType::MultipleDichotomy { datum, labels } => {
                      let leader = match labels {
                          CategoryLabels::VarLabels => b"D".as_slice(),
@@ -531,15 +531,15 @@ where
                  MultipleResponseType::MultipleCategory => write!(&mut output, "C ").unwrap(),
              }
  
-            let label = if set.mr_type.label_from_var_label() {
+            let label = if set.mr_type().label_from_var_label() {
                  Cow::from(&[])
              } else {
-                self.dictionary.encoding().encode(&set.label).0
+                self.dictionary.encoding().encode(set.label()).0
              };
              write!(&mut output, "{} ", label.len()).unwrap();
              output.extend_from_slice(&label[..]);
  
-            for variable in set.variables.iter().copied() {
+            for variable in set.variables().dict_indexes().iter().copied() {
                  // Only lowercase ASCII characters because other characters
                  // might expand upon lowercasing.
                  let short_name = self.short_names[variable][0].as_str().to_ascii_lowercase();
@@ -1215,7 +1215,7 @@ mod tests {
              CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary,
              MultipleResponseType,
          },
-        identifier::{ByIdentifier, Identifier},
+        identifier::Identifier,
          sys::{
              raw::{
                  records::{
@@ -1709,44 +1709,43 @@ mod tests {
              }
          }
          dictionary
-            .mrsets
-            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
                  name: Identifier::new("$a").unwrap(),
                  label: String::from("my mcgroup"),
-                width: VarWidth::Numeric..=VarWidth::Numeric,
                  mr_type: MultipleResponseType::MultipleCategory,
                  variables: vec![0, 1, 2],
-            }));
+            })
+            .unwrap();
          dictionary
-            .mrsets
-            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
                  name: Identifier::new("$b").unwrap(),
                  label: String::new(),
-                width: VarWidth::Numeric..=VarWidth::Numeric,
                  mr_type: MultipleResponseType::MultipleDichotomy {
                      datum: Datum::Number(Some(55.0)),
                      labels: CategoryLabels::VarLabels,
                  },
                  variables: vec![6, 4, 5, 3],
-            }));
+            })
+            .unwrap();
          dictionary
-            .mrsets
-            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
                  name: Identifier::new("$c").unwrap(),
                  label: String::from("mdgroup #2"),
-                width: VarWidth::String(3)..=VarWidth::String(3),
                  mr_type: MultipleResponseType::MultipleDichotomy {
                      datum: Datum::String("Yes".into()),
                      labels: CategoryLabels::VarLabels,
                  },
                  variables: vec![7, 8, 9],
-            }));
+            })
+            .unwrap();
          dictionary
-            .mrsets
-            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
                  name: Identifier::new("$d").unwrap(),
                  label: String::from("third mdgroup"),
-                width: VarWidth::Numeric..=VarWidth::Numeric,
                  mr_type: MultipleResponseType::MultipleDichotomy {
                      datum: Datum::Number(Some(34.0)),
                      labels: CategoryLabels::CountedValues {
@@ -1754,13 +1753,13 @@ mod tests {
                      },
                  },
                  variables: vec![10, 11, 12],
-            }));
+            })
+            .unwrap();
          dictionary
-            .mrsets
-            .insert(ByIdentifier::new(DictIndexMultipleResponseSet {
+            .mrsets_mut()
+            .insert(DictIndexMultipleResponseSet {
                  name: Identifier::new("$e").unwrap(),
                  label: String::new(),
-                width: VarWidth::String(6)..=VarWidth::String(6),
                  mr_type: MultipleResponseType::MultipleDichotomy {
                      datum: Datum::String("choice".into()),
                      labels: CategoryLabels::CountedValues {
@@ -1768,7 +1767,8 @@ mod tests {
                      },
                  },
                  variables: vec![13, 14, 15],
-            }));
+            })
+            .unwrap();
  
          fn get_mrsets(dictionary: &Dictionary, pre_v14: bool) -> String {
              let mut raw = Vec::new();
diff --git a/rust/pspp/src/variable.rs b/rust/pspp/src/variable.rs

index 3e09f16c2e3207f96862f09ee84f54ebb71853d7..01913dfd0fccc57975be71081aea97b4486d340b 100644 (file)
--- a/rust/pspp/src/variable.rs
+++ b/rust/pspp/src/variable.rs
@@ -78,7 +78,7 @@ impl Display for VarType {
  #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
  pub enum VarWidth {
      Numeric,
-    String(u16),
+    String(u16), // XXX change to NonZeroU16, or to 1..=32767 range type
  }
  
  impl VarWidth {
@@ -91,16 +91,10 @@ impl VarWidth {
          }
      }
  
-    fn width_predicate(
-        a: Option<VarWidth>,
-        b: Option<VarWidth>,
-        f: impl Fn(u16, u16) -> u16,
-    ) -> Option<VarWidth> {
+    fn width_predicate(a: VarWidth, b: VarWidth, f: impl Fn(u16, u16) -> u16) -> Option<VarWidth> {
          match (a, b) {
-            (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
-            (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
-                Some(VarWidth::String(f(a, b)))
-            }
+            (VarWidth::Numeric, VarWidth::Numeric) => Some(VarWidth::Numeric),
+            (VarWidth::String(a), VarWidth::String(b)) => Some(VarWidth::String(f(a, b))),
              _ => None,
          }
      }
@@ -109,13 +103,12 @@ impl VarWidth {
      /// - Numerical variable widths are equally wide.
      /// - Longer strings are wider than shorter strings.
      /// - Numerical and string types are incomparable, so result in `None`.
-    /// - Any `None` in the input yields `None` in the output.
-    pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+    pub fn wider(a: VarWidth, b: VarWidth) -> Option<VarWidth> {
          Self::width_predicate(a, b, |a, b| a.max(b))
      }
  
      /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
-    pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+    pub fn narrower(a: VarWidth, b: VarWidth) -> Option<VarWidth> {
          Self::width_predicate(a, b, |a, b| a.min(b))
      }
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 10 Aug 2025 03:38:36 +0000 (20:38 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 10 Aug 2025 03:38:36 +0000 (20:38 -0700)
rust/pspp/src/data.rs		patch \| blob \| history
rust/pspp/src/dictionary.rs		patch \| blob \| history
rust/pspp/src/sys/cooked.rs		patch \| blob \| history
rust/pspp/src/sys/write.rs		patch \| blob \| history
rust/pspp/src/variable.rs		patch \| blob \| history