From: Ben Pfaff Date: Sat, 26 Jul 2025 18:22:07 +0000 (-0700) Subject: generalize encodeddatum X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=907cfeb1f3039bbdb5ce628e9db8f7e76c5ac5a2;p=pspp generalize encodeddatum --- diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index 61cc4269e0..c9b6ed6869 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -139,7 +139,7 @@ impl OwnedRawString { pub fn with_encoding(self, encoding: &'static Encoding) -> OwnedEncodedString { EncodedString { - bytes: self, + raw: self, encoding, } } @@ -193,7 +193,7 @@ where pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedString<&BorrowedRawString> { EncodedString { encoding, - bytes: self.borrowed(), + raw: self.borrowed(), } } } @@ -224,7 +224,7 @@ impl From<[u8; N]> for OwnedRawString { impl From for OwnedRawString { fn from(value: OwnedEncodedString) -> Self { - value.bytes + value.raw } } @@ -273,10 +273,13 @@ where } } +pub type OwnedEncodedDatum = EncodedDatum; +pub type BorrowedEncodedDatum<'a> = EncodedDatum>; + /// The value of a [Variable](crate::dictionary::Variable), with a string /// encoding. #[derive(Clone)] -pub enum EncodedDatum { +pub enum EncodedDatum { /// A numeric value. Number( /// A number, or `None` for the system-missing value. @@ -285,18 +288,31 @@ pub enum EncodedDatum { /// A string value. String( /// The value, in the variable's encoding. - OwnedEncodedString, + D, ), } -impl EncodedDatum { - pub fn into_raw(self) -> Datum { +impl EncodedDatum> +where + R: Borrow, +{ + pub fn into_raw(self) -> Datum { match self { EncodedDatum::Number(number) => Datum::Number(number), - EncodedDatum::String(encoded_string) => Datum::String(encoded_string.into()), + EncodedDatum::String(encoded_string) => Datum::String(encoded_string.into_raw()), } } + /// Returns the [VarWidth] corresponding to this datum. + pub fn width(&self) -> VarWidth { + match self { + Self::Number(_) => VarWidth::Numeric, + Self::String(s) => VarWidth::String(s.len().try_into().unwrap()), + } + } +} + +impl EncodedDatum { /// Constructs a new numerical [EncodedDatum] for the system-missing value. pub const fn sysmis() -> Self { Self::Number(None) @@ -313,7 +329,7 @@ impl EncodedDatum { /// Returns the string inside this datum, or `None` if this is a numeric /// datum. - pub fn as_string(&self) -> Option<&OwnedEncodedString> { + pub fn as_string(&self) -> Option<&D> { match self { Self::Number(_) => None, Self::String(s) => Some(s), @@ -322,13 +338,23 @@ impl EncodedDatum { /// Returns the string inside this datum as a mutable borrow, or `None` if /// this is a numeric datum. - pub fn as_string_mut(&mut self) -> Option<&mut OwnedEncodedString> { + pub fn as_string_mut(&mut self) -> Option<&mut D> { match self { Self::Number(_) => None, Self::String(s) => Some(s), } } + /// Returns the [VarType] corresponding to this datum. + pub fn var_type(&self) -> VarType { + match self { + Self::Number(_) => VarType::Numeric, + Self::String(_) => VarType::String, + } + } +} + +impl EncodedDatum { /// Resizes this datum to the given `width`. Returns `Ok(())` if /// successful, if and only if this datum and `width` are both string or /// both numeric and, for string widths, resizing would not drop any @@ -341,22 +367,6 @@ impl EncodedDatum { } } - /// Returns the [VarType] corresponding to this datum. - pub fn var_type(&self) -> VarType { - match self { - Self::Number(_) => VarType::Numeric, - Self::String(_) => VarType::String, - } - } - - /// Returns the [VarWidth] corresponding to this datum. - pub fn width(&self) -> VarWidth { - match self { - Self::Number(_) => VarWidth::Numeric, - Self::String(s) => VarWidth::String(s.len().try_into().unwrap()), - } - } - /// Compares this datum and `other` for equality, ignoring trailing ASCII /// spaces in either, if they are both strings, for the purpose of /// comparison. @@ -709,7 +719,7 @@ where Datum::Number(number) => EncodedDat::Number(*number), Datum::String(raw_string) => EncodedDat::String(EncodedString { encoding, - bytes: raw_string.borrow(), + raw: raw_string.borrow(), }), } } @@ -877,7 +887,7 @@ pub type BorrowedEncodedString<'a> = EncodedString<&'a BorrowedRawString>; #[derive(Copy, Clone, Debug)] pub struct EncodedString { /// The bytes of the string. - bytes: R, + raw: R, /// The string's encoding. encoding: &'static Encoding, @@ -888,14 +898,15 @@ where R: Borrow, { pub fn new(raw: R, encoding: &'static Encoding) -> Self { - Self { - bytes: raw, - encoding, - } + Self { raw, encoding } + } + + pub fn into_raw(self) -> R { + self.raw } pub fn len(&self) -> usize { - self.bytes.borrow().len() + self.raw.borrow().len() } /// Returns this string recoded in UTF-8. Invalid characters will be @@ -923,7 +934,7 @@ where /// Returns the bytes in the string, in its encoding. pub fn as_bytes(&self) -> &[u8] { - &self.bytes.borrow().0 + &self.raw.borrow().0 } /// Compares this string and `other` for equality, ignoring trailing ASCII @@ -934,8 +945,8 @@ where R2: Borrow, { self.borrowed() - .bytes - .eq_ignore_trailing_spaces(&other.borrowed().bytes) + .raw + .eq_ignore_trailing_spaces(&other.borrowed().raw) } /// Returns the string's [Encoding]. @@ -947,13 +958,13 @@ where pub fn borrowed<'a>(&'a self) -> EncodedString<&'a BorrowedRawString> { EncodedString { encoding: self.encoding, - bytes: self.bytes.borrow(), + raw: self.raw.borrow(), } } /// Returns true if this string is empty. pub fn is_empty(&self) -> bool { - self.bytes.borrow().is_empty() + self.raw.borrow().is_empty() } /// Returns a helper for displaying this string in double quotes. @@ -969,24 +980,24 @@ impl OwnedEncodedString { if !self.as_bytes()[new_len..].iter().all(|b| *b == b' ') { return Err(()); } - self.bytes.0.truncate(new_len); + self.raw.0.truncate(new_len); } Ordering::Equal => (), - Ordering::Greater => self.bytes.0.extend((self.len()..new_len).map(|_| b' ')), + Ordering::Greater => self.raw.0.extend((self.len()..new_len).map(|_| b' ')), } Ok(()) } /// Removes any trailing ASCII spaces. pub fn trim_end(&mut self) { - while self.bytes.0.pop_if(|c| *c == b' ').is_some() {} + while self.raw.0.pop_if(|c| *c == b' ').is_some() {} } } impl<'a> From> for OwnedEncodedString { fn from(value: BorrowedEncodedString<'a>) -> Self { Self { - bytes: value.bytes.into(), + raw: value.raw.into(), encoding: value.encoding, } } @@ -995,7 +1006,7 @@ impl<'a> From> for OwnedEncodedString { impl From<&str> for OwnedEncodedString { fn from(value: &str) -> Self { Self { - bytes: RawString(value.into()), + raw: RawString(value.into()), encoding: UTF_8, } } @@ -1004,7 +1015,7 @@ impl From<&str> for OwnedEncodedString { impl<'a> From<&'a str> for BorrowedEncodedString<'a> { fn from(value: &'a str) -> Self { Self { - bytes: BorrowedRawString::new(value.as_bytes()), + raw: BorrowedRawString::new(value.as_bytes()), encoding: UTF_8, } } @@ -1041,7 +1052,7 @@ where { fn eq(&self, other: &EncodedString) -> bool { // XXX should this consider the encodings? - self.borrowed().bytes.eq(other.borrowed().bytes) + self.borrowed().raw.eq(other.borrowed().raw) } }