From: Ben Pfaff Date: Fri, 25 Jul 2025 01:48:51 +0000 (-0700) Subject: generalize datum, without yet getting rid of dat X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=77d28fe762f1485a38d6c11e1ff472d5bcbbb479;p=pspp generalize datum, without yet getting rid of dat --- diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index b85033c4d7..883060ef82 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -271,7 +271,7 @@ pub enum EncodedDatum { } impl EncodedDatum { - pub fn into_raw(self) -> Datum { + pub fn into_raw(self) -> Datum { match self { EncodedDatum::Number(number) => Datum::Number(number), EncodedDatum::String(encoded_string) => Datum::String(encoded_string.into()), @@ -572,7 +572,10 @@ impl Dat<'_> { /// The value of a [Variable](crate::dictionary::Variable). #[derive(Clone)] -pub enum Datum { +pub enum Datum +where + B: Borrow, +{ /// A numeric value. Number( /// A number, or `None` for the system-missing value. @@ -581,11 +584,14 @@ pub enum Datum { /// A string value. String( /// The value, in the variable's encoding. - RawString, + B, ), } -impl Debug for Datum { +impl Debug for Datum +where + B: Borrow + Debug, +{ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { Self::Number(Some(number)) => write!(f, "{number:?}"), @@ -595,7 +601,10 @@ impl Debug for Datum { } } -impl Serialize for Datum { +impl Serialize for Datum +where + B: Borrow + Serialize, +{ fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -607,28 +616,37 @@ impl Serialize for Datum { } } -impl PartialEq for Datum { +impl PartialEq for Datum +where + B: Borrow, +{ fn eq(&self, other: &Self) -> bool { match (self, other) { (Self::Number(Some(l0)), Self::Number(Some(r0))) => { OrderedFloat(*l0) == OrderedFloat(*r0) } (Self::Number(None), Self::Number(None)) => true, - (Self::String(l0), Self::String(r0)) => l0 == r0, + (Self::String(l0), Self::String(r0)) => l0.borrow() == r0.borrow(), _ => false, } } } -impl Eq for Datum {} +impl Eq for Datum where B: Borrow {} -impl PartialOrd for Datum { +impl PartialOrd for Datum +where + B: Borrow, +{ fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for Datum { +impl Ord for Datum +where + B: Borrow, +{ fn cmp(&self, other: &Self) -> Ordering { match (self, other) { (Self::Number(a), Self::Number(b)) => match (a, b) { @@ -639,21 +657,24 @@ impl Ord for Datum { }, (Self::Number(_), Self::String(_)) => Ordering::Less, (Self::String(_), Self::Number(_)) => Ordering::Greater, - (Self::String(a), Self::String(b)) => a.cmp(b), + (Self::String(a), Self::String(b)) => a.borrow().cmp(b.borrow()), } } } -impl Hash for Datum { +impl Hash for Datum +where + B: Borrow, +{ fn hash(&self, state: &mut H) { match self { Self::Number(number) => number.map(OrderedFloat).hash(state), - Self::String(string) => string.hash(state), + Self::String(string) => string.borrow().hash(state), } } } -impl Datum { +impl Datum { /// Constructs a new numerical [Datum] for the system-missing value. pub const fn sysmis() -> Self { Self::Number(None) @@ -742,7 +763,7 @@ impl Datum { /// Compares this datum and `other` for equality, ignoring trailing ASCII /// spaces in either, if they are both strings, for the purpose of /// comparison. - pub fn eq_ignore_trailing_spaces(&self, other: &Datum) -> bool { + pub fn eq_ignore_trailing_spaces(&self, other: &Datum) -> bool { match (self, other) { (Self::String(a), Self::String(b)) => a.eq_ignore_trailing_spaces(b), _ => self == other, @@ -765,25 +786,37 @@ impl Datum { } } -impl From for Datum { +impl From for Datum +where + B: Borrow, +{ fn from(number: f64) -> Self { Some(number).into() } } -impl From> for Datum { +impl From> for Datum +where + B: Borrow, +{ fn from(value: Option) -> Self { Self::Number(value) } } -impl From<&str> for Datum { +impl From<&str> for Datum +where + B: Borrow + for<'a> From<&'a [u8]>, +{ fn from(value: &str) -> Self { value.as_bytes().into() } } -impl From<&[u8]> for Datum { +impl From<&[u8]> for Datum +where + B: Borrow + for<'a> From<&'a [u8]>, +{ fn from(value: &[u8]) -> Self { Self::String(value.into()) } @@ -796,17 +829,17 @@ pub struct RawCase( /// order. /// /// [Dictionary]: crate::dictionary::Dictionary - pub Vec, + pub Vec>, ); impl RawCase { - pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum]> { + pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum]> { Case { encoding, data: &self.0, } } - pub fn with_encoding(self, encoding: &'static Encoding) -> Case> { + pub fn with_encoding(self, encoding: &'static Encoding) -> Case>> { Case { encoding, data: self.0, @@ -816,7 +849,7 @@ impl RawCase { pub struct Case where - B: Borrow<[Datum]>, + B: Borrow<[Datum]>, { encoding: &'static Encoding, data: B, @@ -824,14 +857,14 @@ where impl Case where - B: Borrow<[Datum]>, + B: Borrow<[Datum]>, { fn len(&self) -> usize { self.data.borrow().len() } } -impl IntoIterator for Case> { +impl IntoIterator for Case>> { type Item = EncodedDatum; type IntoIter = CaseVecIter; @@ -846,7 +879,7 @@ impl IntoIterator for Case> { pub struct CaseVecIter { encoding: &'static Encoding, - iter: std::vec::IntoIter, + iter: std::vec::IntoIter>, } impl Iterator for CaseVecIter { diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 30a31f915e..1d4f9201f2 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -40,7 +40,7 @@ use thiserror::Error as ThisError; use unicase::UniCase; use crate::{ - data::{Datum, EncodedDat, EncodedDatum}, + data::{Datum, EncodedDat, EncodedDatum, RawString}, format::{DisplayPlain, Format}, identifier::{ByIdentifier, HasIdentifier, Identifier}, output::pivot::{ @@ -1845,7 +1845,7 @@ pub enum MultipleResponseType { /// one value (the "counted value") means that the box was checked, and any /// other value means that it was not. MultipleDichotomy { - datum: Datum, + datum: Datum, labels: CategoryLabels, }, @@ -1903,7 +1903,7 @@ impl DictIndexVariableSet { } #[derive(Clone, Default, PartialEq, Eq, Serialize)] -pub struct ValueLabels(pub HashMap); +pub struct ValueLabels(pub HashMap, String>); impl ValueLabels { pub fn new() -> Self { @@ -1914,11 +1914,11 @@ impl ValueLabels { self.0.is_empty() } - pub fn get(&self, datum: &Datum) -> Option<&str> { + pub fn get(&self, datum: &Datum) -> Option<&str> { self.0.get(datum).map(|s| s.as_str()) } - pub fn insert(&mut self, datum: Datum, label: String) -> Option { + pub fn insert(&mut self, datum: Datum, label: String) -> Option { self.0.insert(datum, label) } diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index 75bac4038a..ac7c98f213 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -393,7 +393,7 @@ impl Type { } } - pub fn default_value(&self) -> Datum { + pub fn default_value(&self) -> Datum { match self.var_type() { VarType::Numeric => Datum::sysmis(), VarType::String => Datum::String(RawString::default()), @@ -621,7 +621,7 @@ impl Format { Ok(self) } - pub fn default_value(&self) -> Datum { + pub fn default_value(&self) -> Datum { match self.var_width() { VarWidth::Numeric => Datum::sysmis(), VarWidth::String(width) => Datum::String(RawString::spaces(width as usize)), diff --git a/rust/pspp/src/format/parse.rs b/rust/pspp/src/format/parse.rs index 495037938b..e423dc98cf 100644 --- a/rust/pspp/src/format/parse.rs +++ b/rust/pspp/src/format/parse.rs @@ -16,7 +16,7 @@ use crate::{ calendar::{calendar_gregorian_to_offset, DateError}, - data::{Datum, EncodedStr, EncodedString}, + data::{Datum, EncodedStr, EncodedString, RawString}, endian::{Endian, Parse}, format::{DateTemplate, Decimals, Settings, TemplateItem, Type}, settings::{EndianSettings, Settings as PsppSettings}, @@ -190,7 +190,7 @@ impl<'a> ParseValue<'a> { /// input into UTF-8, but this will screw up parsing of binary formats, /// because recoding bytes from (e.g.) windows-1252 into UTF-8, and then /// interpreting them as a binary number yields nonsense. - pub fn parse<'b, T>(&self, input: T) -> Result + pub fn parse<'b, T>(&self, input: T) -> Result, ParseError> where T: Into>, { @@ -239,7 +239,7 @@ impl<'a> ParseValue<'a> { }) } - fn parse_number(&self, input: &str, type_: Type) -> Result { + fn parse_number(&self, input: &str, type_: Type) -> Result, ParseErrorKind> { let style = self.settings.number_style(type_); let input = input.trim(); @@ -312,14 +312,14 @@ impl<'a> ParseValue<'a> { } } - fn parse_n(&self, input: &str) -> Result { + fn parse_n(&self, input: &str) -> Result, ParseErrorKind> { match input.chars().find(|c| !c.is_ascii_digit()) { None => Ok(Datum::Number(Some(input.parse().unwrap()))), Some(nondigit) => Err(ParseErrorKind::Nondigit(nondigit)), } } - fn parse_z(&self, input: &str) -> Result { + fn parse_z(&self, input: &str) -> Result, ParseErrorKind> { let input = input.trim(); if input.is_empty() || input == "." { return Ok(Datum::sysmis()); @@ -396,12 +396,12 @@ impl<'a> ParseValue<'a> { } } - fn parse_pk(&self, input: &[u8]) -> Result { + fn parse_pk(&self, input: &[u8]) -> Result, ParseErrorKind> { let number = Self::parse_bcd(input)?; Ok(Datum::Number(Some(self.apply_decimals(number as f64)))) } - fn parse_p(&self, input: &[u8]) -> Result { + fn parse_p(&self, input: &[u8]) -> Result, ParseErrorKind> { if input.is_empty() { return Ok(Datum::Number(None)); }; @@ -423,7 +423,7 @@ impl<'a> ParseValue<'a> { } } - fn parse_ib(&self, input: &[u8]) -> Result { + fn parse_ib(&self, input: &[u8]) -> Result, ParseErrorKind> { let number = self.parse_binary(input); let sign_bit = 1 << (input.len() * 8 - 1); let number = if (number & sign_bit) == 0 { @@ -434,12 +434,12 @@ impl<'a> ParseValue<'a> { Ok(Datum::Number(Some(self.apply_decimals(number as f64)))) } - fn parse_pib(&self, input: &[u8]) -> Result { + fn parse_pib(&self, input: &[u8]) -> Result, ParseErrorKind> { let number = self.parse_binary(input); Ok(Datum::Number(Some(self.apply_decimals(number as f64)))) } - fn parse_rb(&self, input: &[u8]) -> Result { + fn parse_rb(&self, input: &[u8]) -> Result, ParseErrorKind> { let mut bytes = [0; 8]; let len = input.len().min(8); bytes[..len].copy_from_slice(&input[..len]); @@ -453,7 +453,7 @@ impl<'a> ParseValue<'a> { Ok(Datum::Number(number)) } - fn parse_ahex(&self, input: &str) -> Result { + fn parse_ahex(&self, input: &str) -> Result, ParseErrorKind> { let mut result = Vec::with_capacity(input.len() / 2); let mut iter = input.chars(); while let Some(hi) = iter.next() { @@ -483,17 +483,17 @@ impl<'a> ParseValue<'a> { } } - fn parse_pibhex(&self, input: &str) -> Result { + fn parse_pibhex(&self, input: &str) -> Result, ParseErrorKind> { self.parse_hex(input) .map(|value| Datum::Number(value.map(|number| number as f64))) } - fn parse_rbhex(&self, input: &str) -> Result { + fn parse_rbhex(&self, input: &str) -> Result, ParseErrorKind> { self.parse_hex(input) .map(|value| Datum::Number(value.map(f64::from_bits))) } - fn parse_date(&self, input: &str) -> Result { + fn parse_date(&self, input: &str) -> Result, ParseErrorKind> { let mut p = StrParser(input.trim()); if p.0.is_empty() || p.0 == "." { return Ok(Datum::sysmis()); @@ -609,7 +609,7 @@ impl<'a> ParseValue<'a> { Ok(time + seconds) } - fn parse_wkday(&self, input: &str) -> Result { + fn parse_wkday(&self, input: &str) -> Result, ParseErrorKind> { let mut p = StrParser(input.trim()); if p.0.is_empty() || p.0 == "." { Ok(Datum::sysmis()) @@ -620,7 +620,7 @@ impl<'a> ParseValue<'a> { } } - fn parse_month(&self, input: &str) -> Result { + fn parse_month(&self, input: &str) -> Result, ParseErrorKind> { let mut p = StrParser(input.trim()); if p.0.is_empty() || p.0 == "." { Ok(Datum::sysmis()) diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 8260b95488..7d9732261d 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -68,7 +68,7 @@ use thiserror::Error as ThisError; use tlo::parse_tlo; use crate::{ - data::{Datum, EncodedDat, EncodedDatum}, + data::{Datum, EncodedDat, EncodedDatum, RawString}, dictionary::{VarType, Variable}, format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat}, settings::{Settings, Show}, @@ -1866,7 +1866,7 @@ impl Value { EncodedDat::String(string) => Self::new_user_text(string.as_str()), } } - pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { + pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { let var_name = Some(variable.name.as_str().into()); let value_label = variable.value_labels.get(value).map(String::from); match value { diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 22662678e5..778bc778e2 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -1695,7 +1695,7 @@ impl Debug for Cases { } impl Iterator for Cases { - type Item = Result>, raw::Error>; + type Item = Result>>, raw::Error>; fn next(&mut self) -> Option { self.inner diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index 27ffb2f810..f330a0a5d4 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -930,7 +930,7 @@ impl RawDatum { /// Decodes a `RawDatum` into a [Datum] given that we now know the string /// width. - pub fn decode(&self, width: VarWidth) -> Datum { + pub fn decode(&self, width: VarWidth) -> Datum { match self { Self::Number(x) => Datum::Number(*x), Self::String(s) => { @@ -941,7 +941,7 @@ impl RawDatum { } } -impl Datum { +impl Datum { fn read_case( reader: &mut R, case_vars: &[CaseVar], diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index e46e86e2a0..c418c8f570 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -355,14 +355,14 @@ fn format_name(type_: u32) -> Cow<'static, str> { #[derive(Clone, Debug, Default, Serialize)] pub struct RawMissingValues { /// Individual missing values, up to 3 of them. - pub values: Vec, + pub values: Vec>, /// Optional range of missing values. pub range: Option, } impl RawMissingValues { - pub fn new(values: Vec, range: Option) -> Self { + pub fn new(values: Vec>, range: Option) -> Self { Self { values, range } } diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index e42f1c5066..785c6cb242 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -1,5 +1,5 @@ use std::{ - borrow::Cow, + borrow::{Borrow, Cow}, collections::HashMap, fmt::Write as _, fs::File, @@ -17,7 +17,7 @@ use itertools::zip_eq; use smallvec::SmallVec; use crate::{ - data::{Dat, Datum, EncodedDatum}, + data::{Dat, Datum, EncodedDatum, RawStr, RawString}, dictionary::{ Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType, ValueLabels, VarWidth, @@ -674,7 +674,7 @@ impl BinWrite for Pad { } } -impl BinWrite for Datum { +impl BinWrite for Datum { type Args<'a> = (); fn write_options( @@ -842,7 +842,7 @@ where fn write_case_uncompressed<'c>( &mut self, - case: impl Iterator, + case: impl Iterator>, ) -> Result<(), BinError> { for (var, datum) in zip_eq(self.case_vars, case) { match var { @@ -865,7 +865,7 @@ where } fn write_case_compressed<'c>( &mut self, - case: impl Iterator, + case: impl Iterator>, ) -> Result<(), BinError> { for (var, datum) in zip_eq(self.case_vars, case) { match var { @@ -991,7 +991,7 @@ where /// Panics if [try_finish](Self::try_finish) has been called. pub fn write_case<'a>( &mut self, - case: impl IntoIterator, + case: impl IntoIterator>, ) -> Result<(), BinError> { match self.inner.as_mut().unwrap() { Either::Left(inner) => {