From: Ben Pfaff Date: Fri, 25 Jul 2025 23:24:47 +0000 (-0700) Subject: wokr on types X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3e52a002dffe990fa70909152216b3bb07735005;p=pspp wokr on types --- diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index a6f1519dcf..e1c9125b4b 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -47,32 +47,84 @@ use crate::{ sys::raw::RawDatum, }; -/// An owned string in an unspecified character encoding. +/// A string in an unspecified character encoding. /// -/// A [RawString] is usually associated with a [Variable] and uses the -/// variable's character encoding. We assume that the encoding is one supported -/// by [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of +/// `RawString` is usually associated with a [Variable], in the variable's +/// character encoding. We assume that the encoding is one supported by +/// [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of /// these encodings have some basic ASCII compatibility. /// -/// A [RawString] owns its contents and can grow and shrink, like a [Vec] or -/// [String]. For a borrowed raw string, see [RawStr]. +/// `RawString` is parameterized by its content type, which is either `Vec` +/// for an owned raw string (aliased as [OwnedRawString]) or `[u8]` for a +/// borrowed raw string (aliased as [BorrowedRawString]). /// /// [Variable]: crate::dictionary::Variable -#[derive(Clone, PartialEq, Default, Eq, PartialOrd, Ord, Hash)] -pub struct RawString(pub Vec); +#[derive(Clone, Default, Hash)] +pub struct RawString(pub B) +where + B: ?Sized; + +impl PartialEq> for RawString +where + B: Borrow<[u8]> + ?Sized, + B2: Borrow<[u8]> + ?Sized, +{ + fn eq(&self, other: &RawString) -> bool { + self.0.borrow().eq(other.0.borrow()) + } +} + +impl Eq for RawString where B: Borrow<[u8]> + ?Sized {} + +impl PartialOrd> for RawString +where + B: Borrow<[u8]> + ?Sized, + B2: Borrow<[u8]> + ?Sized, +{ + fn partial_cmp(&self, other: &RawString) -> Option { + self.0.borrow().partial_cmp(other.0.borrow()) + } +} -impl RawString { +impl Ord for RawString +where + B: Borrow<[u8]> + ?Sized, +{ + fn cmp(&self, other: &Self) -> Ordering { + self.0.borrow().cmp(other.0.borrow()) + } +} + +/// A [RawString] that owns its contents. +pub type OwnedRawString = RawString>; + +/// A [RawString] that borrows its contents. +/// +/// Because `[u8]` is not [Sized], [BorrowedRawString] may itself only be used +/// borrowed. +pub type BorrowedRawString = RawString<[u8]>; + +impl Borrow for OwnedRawString { + fn borrow(&self) -> &BorrowedRawString { + &BorrowedRawString::new(self.as_bytes()) + } +} + +impl BorrowedRawString { + pub fn new(s: &[u8]) -> &Self { + // SAFETY: `RawStr` is a transparent wrapper around `[u8]`, so we can + // turn a reference to the wrapped type into a reference to the wrapper + // type. + unsafe { &*(s as *const [u8] as *const BorrowedRawString) } + } +} + +impl OwnedRawString { /// Creates a new [RawString] that consists of `n` ASCII spaces. pub fn spaces(n: usize) -> Self { Self(std::iter::repeat_n(b' ', n).collect()) } - /// Creates an [EncodedStr] with `encoding` that borrows this string's - /// contents. - pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> { - EncodedStr::new(&self.0, encoding) - } - /// Extends or shortens this [RawString] to exactly `len` bytes. If the /// string needs to be extended, does so by appending spaces. /// @@ -95,140 +147,101 @@ impl RawString { } } -impl Borrow for RawString { - fn borrow(&self) -> &RawStr { - RawStr::from_bytes(&self.0) +impl RawString +where + B: Borrow<[u8]> + ?Sized, +{ + pub fn as_bytes(&self) -> &[u8] { + self.0.borrow() + } + + pub fn len(&self) -> usize { + self.0.borrow().len() + } + + /// Returns true if this raw string can be resized to `len` bytes without + /// dropping non-space characters. + pub fn is_resizable(&self, new_len: usize) -> bool { + new_len >= self.len() || self.0.borrow()[new_len..].iter().all(|b| *b == b' ') } -} -impl Deref for RawString { - type Target = RawStr; + /// Compares this string and `other` for equality, ignoring trailing ASCII + /// spaces in either string for the purpose of comparison. (This is + /// acceptable because we assume that the encoding is ASCII-compatible.) + pub fn eq_ignore_trailing_spaces(&self, other: &RawString) -> bool + where + B2: Borrow<[u8]> + ?Sized, + { + self.0 + .borrow() + .iter() + .zip_longest(other.0.borrow()) + .all(|elem| { + let (left, right) = elem.or(&b' ', &b' '); + *left == *right + }) + } - fn deref(&self) -> &Self::Target { - self.borrow() + /// Creates an [EncodedStr] with `encoding` that borrows this string's + /// contents. + pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> { + EncodedStr::new(self.0.borrow(), encoding) } } -impl From> for RawString { +impl From> for OwnedRawString { fn from(value: Cow<'_, [u8]>) -> Self { Self(value.into_owned()) } } -impl From> for RawString { +impl From> for OwnedRawString { fn from(source: Vec) -> Self { Self(source) } } -impl From<&[u8]> for RawString { +impl From<&[u8]> for OwnedRawString { fn from(source: &[u8]) -> Self { Self(source.into()) } } -impl From for RawString { - fn from(value: EncodedString) -> Self { - Self(value.bytes) - } -} - -impl Debug for RawString { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - ::fmt(&*self, f) +impl From<[u8; N]> for OwnedRawString { + fn from(source: [u8; N]) -> Self { + Self(source.into()) } } -impl Serialize for RawString { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.deref().serialize(serializer) +impl From for OwnedRawString { + fn from(value: EncodedString) -> Self { + Self(value.bytes) } } -/// A borrowed string in an unspecified encoding. -/// -/// A [RawString] is usually associated with a [Variable] and uses the -/// variable's character encoding. We assume that the encoding is one supported -/// by [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of -/// these encodings have some basic ASCII compatibility. -/// -/// For an owned raw string, see [RawString]. -/// -/// [Variable]: crate::dictionary::Variable -#[repr(transparent)] -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct RawStr(pub [u8]); - -impl RawStr { - /// Creates a new [RawStr] that contains `bytes`. - pub fn from_bytes(bytes: &[u8]) -> &Self { - // SAFETY: `RawStr` is a transparent wrapper around `[u8]`, so we can - // turn a reference to the wrapped type into a reference to the wrapper - // type. - unsafe { &*(bytes as *const [u8] as *const Self) } - } - - /// Returns the raw string's contents as a borrowed byte slice. - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } - - pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> { - EncodedStr { - bytes: &self.0, - encoding, - } - } - - /// Interprets the raw string's contents as the specified `encoding` and - /// returns it decoded into UTF-8, replacing any malformed sequences by - /// [REPLACEMENT_CHARACTER]. - /// - /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER - pub fn decode(&self, encoding: &'static Encoding) -> Cow<'_, str> { - encoding.decode_without_bom_handling(&self.0).0 - } - - /// Compares this string and `other` for equality, ignoring trailing ASCII - /// spaces in either string for the purpose of comparison. (This is - /// acceptable because we assume that the encoding is ASCII-compatible.) - pub fn eq_ignore_trailing_spaces(&self, other: &RawStr) -> bool { - let mut this = self.0.iter(); - let mut other = other.0.iter(); - loop { - match (this.next(), other.next()) { - (Some(a), Some(b)) if a == b => (), - (Some(_), Some(_)) => return false, - (None, None) => return true, - (Some(b' '), None) => return this.all(|c| *c == b' '), - (None, Some(b' ')) => return other.all(|c| *c == b' '), - (Some(_), None) | (None, Some(_)) => return false, - } - } - } - - /// Returns true if this raw string can be resized to `len` bytes without - /// dropping non-space characters. - pub fn is_resizable(&self, new_len: usize) -> bool { - new_len >= self.len() || self.0[new_len..].iter().all(|b| *b == b' ') - } - - /// Returns the string's length in bytes. - pub fn len(&self) -> usize { - self.0.len() +impl Debug for RawString +where + B: Borrow<[u8]> + ?Sized, +{ + // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1 + // (actually bytes interpreted as Unicode code points). + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + let s = + from_utf8(&self.0.borrow()).map_or_else(|_| decode_latin1(self.0.borrow()), Cow::from); + write!(f, "{s:?}") } } -impl Serialize for RawStr { +impl Serialize for RawString +where + B: Borrow<[u8]> + ?Sized, +{ fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - if let Ok(s) = str::from_utf8(&self.0) { - let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) { + if let Ok(s) = str::from_utf8(self.0.borrow()) { + let (variant_index, variant) = if self.0.borrow().iter().all(|b| b.is_ascii()) { (0, "Ascii") } else { (1, "Utf8") @@ -239,21 +252,12 @@ impl Serialize for RawStr { tuple.end() } else { let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?; - tuple.serialize_field(&decode_latin1(&self.0))?; + tuple.serialize_field(&decode_latin1(self.0.borrow()))?; tuple.end() } } } -impl Debug for RawStr { - // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1 - // (actually bytes interpreted as Unicode code points). - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let s = from_utf8(&self.0).map_or_else(|_| decode_latin1(&self.0), Cow::from); - write!(f, "{s:?}") - } -} - /// The value of a [Variable](crate::dictionary::Variable), with a string /// encoding. #[derive(Clone)] @@ -271,7 +275,7 @@ pub enum EncodedDatum { } impl EncodedDatum { - pub fn into_raw(self) -> Datum { + pub fn into_raw(self) -> Datum { match self { EncodedDatum::Number(number) => Datum::Number(number), EncodedDatum::String(encoded_string) => Datum::String(encoded_string.into()), @@ -498,10 +502,18 @@ impl<'a> PartialEq for EncodedDat<'a> { impl<'a> Eq for EncodedDat<'a> {} -pub type OwnedDatum = Datum; -pub type BorrowedDatum<'a> = Datum<&'a RawStr>; +/// A [Datum] that owns its string data (if any). +pub type OwnedDatum = Datum; + +/// A [Datum] that borrows its string data (if any). +pub type BorrowedDatum<'a> = Datum<&'a BorrowedRawString>; /// The value of a [Variable](crate::dictionary::Variable). +/// +/// `RawString` is parameterized by its string type, which is either +/// [OwnedRawString] if it owns its string value (aliased as [OwnedDatum]) or +/// [&BorrowedRawString](BorrowedRawString) if it borrows it (aliased as +/// [BorrowedDatum]). #[derive(Clone)] pub enum Datum { /// A numeric value. @@ -546,8 +558,8 @@ where impl PartialEq> for Datum where - B: Borrow, - B2: Borrow, + B: Borrow>, + B2: Borrow>, { fn eq(&self, other: &Datum) -> bool { match (self, other) { @@ -561,12 +573,12 @@ where } } -impl Eq for Datum where B: Borrow {} +impl Eq for Datum where B: Borrow> {} impl PartialOrd> for Datum where - B: Borrow, - B2: Borrow, + B: Borrow>, + B2: Borrow>, { fn partial_cmp(&self, other: &Datum) -> Option { Some(match (self, other) { @@ -585,7 +597,7 @@ where impl Ord for Datum where - B: Borrow, + B: Borrow>, { fn cmp(&self, other: &Self) -> Ordering { self.partial_cmp(other).unwrap() @@ -594,7 +606,7 @@ where impl Hash for Datum where - B: Borrow, + B: Borrow, { fn hash(&self, state: &mut H) { match self { @@ -630,11 +642,11 @@ impl Datum { impl Datum where - B: Borrow, + B: Borrow>, { /// Returns the string inside this datum, or `None` if this is a numeric /// datum. - pub fn as_string(&self) -> Option<&RawStr> { + pub fn as_string(&self) -> Option<&BorrowedRawString> { match self { Self::Number(_) => None, Self::String(s) => Some(s.borrow()), @@ -668,7 +680,7 @@ where /// comparison. pub fn eq_ignore_trailing_spaces(&self, other: &Datum) -> bool where - B2: Borrow, + B2: Borrow>, { match (self, other) { (Self::String(a), Datum::String(b)) => a.borrow().eq_ignore_trailing_spaces(b.borrow()), @@ -688,11 +700,11 @@ where impl Datum where - B: BorrowMut, + B: BorrowMut, { /// Returns the string inside this datum as a mutable borrow, or `None` if /// this is a numeric datum. - pub fn as_string_mut(&mut self) -> Option<&mut RawString> { + pub fn as_string_mut(&mut self) -> Option<&mut OwnedRawString> { match self { Self::Number(_) => None, Self::String(s) => Some(s.borrow_mut()), @@ -724,7 +736,7 @@ where } } -impl Datum { +impl Datum { pub fn with_encoding(self, encoding: &'static Encoding) -> EncodedDatum { match self { Datum::Number(number) => EncodedDatum::Number(number), @@ -735,7 +747,7 @@ impl Datum { impl From for Datum where - B: Borrow, + B: Borrow, { fn from(number: f64) -> Self { Some(number).into() @@ -744,7 +756,7 @@ where impl From> for Datum where - B: Borrow, + B: Borrow, { fn from(value: Option) -> Self { Self::Number(value) @@ -753,7 +765,7 @@ where impl From<&str> for Datum where - B: Borrow + for<'a> From<&'a [u8]>, + B: Borrow + for<'a> From<&'a [u8]>, { fn from(value: &str) -> Self { value.as_bytes().into() @@ -762,7 +774,7 @@ where impl From<&[u8]> for Datum where - B: Borrow + for<'a> From<&'a [u8]>, + B: Borrow + for<'a> From<&'a [u8]>, { fn from(value: &[u8]) -> Self { Self::String(value.into()) @@ -770,23 +782,23 @@ where } /// A case in a data set. -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] +#[derive(Clone, Debug, Serialize)] pub struct RawCase( /// One [Datum] per variable in the corresponding [Dictionary], in the same /// order. /// /// [Dictionary]: crate::dictionary::Dictionary - pub Vec>, + pub Vec>, ); impl RawCase { - pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum]> { + pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum]> { Case { encoding, data: &self.0, } } - pub fn with_encoding(self, encoding: &'static Encoding) -> Case>> { + pub fn with_encoding(self, encoding: &'static Encoding) -> Case>> { Case { encoding, data: self.0, @@ -796,7 +808,7 @@ impl RawCase { pub struct Case where - B: Borrow<[Datum]>, + B: Borrow<[Datum]>, { encoding: &'static Encoding, data: B, @@ -804,14 +816,14 @@ where impl Case where - B: Borrow<[Datum]>, + B: Borrow<[Datum]>, { fn len(&self) -> usize { self.data.borrow().len() } } -impl IntoIterator for Case>> { +impl IntoIterator for Case>> { type Item = EncodedDatum; type IntoIter = CaseVecIter; @@ -826,7 +838,7 @@ impl IntoIterator for Case>> { pub struct CaseVecIter { encoding: &'static Encoding, - iter: std::vec::IntoIter>, + iter: std::vec::IntoIter>, } impl Iterator for CaseVecIter { diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 1d4f9201f2..d2d338e23a 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -40,7 +40,7 @@ use thiserror::Error as ThisError; use unicase::UniCase; use crate::{ - data::{Datum, EncodedDat, EncodedDatum, RawString}, + data::{Datum, EncodedDat, EncodedDatum, OwnedRawString, RawString}, format::{DisplayPlain, Format}, identifier::{ByIdentifier, HasIdentifier, Identifier}, output::pivot::{ @@ -1845,7 +1845,7 @@ pub enum MultipleResponseType { /// one value (the "counted value") means that the box was checked, and any /// other value means that it was not. MultipleDichotomy { - datum: Datum, + datum: Datum, labels: CategoryLabels, }, @@ -1903,7 +1903,7 @@ impl DictIndexVariableSet { } #[derive(Clone, Default, PartialEq, Eq, Serialize)] -pub struct ValueLabels(pub HashMap, String>); +pub struct ValueLabels(pub HashMap, String>); impl ValueLabels { pub fn new() -> Self { @@ -1914,11 +1914,11 @@ impl ValueLabels { self.0.is_empty() } - pub fn get(&self, datum: &Datum) -> Option<&str> { + pub fn get(&self, datum: &Datum) -> Option<&str> { self.0.get(datum).map(|s| s.as_str()) } - pub fn insert(&mut self, datum: Datum, label: String) -> Option { + pub fn insert(&mut self, datum: Datum, label: String) -> Option { self.0.insert(datum, label) } diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs index ac7c98f213..aa92728582 100644 --- a/rust/pspp/src/format/mod.rs +++ b/rust/pspp/src/format/mod.rs @@ -30,7 +30,7 @@ use unicode_width::UnicodeWidthStr; use crate::{ data::Datum, - data::RawString, + data::OwnedRawString, dictionary::{VarType, VarWidth}, sys::raw, }; @@ -393,10 +393,10 @@ impl Type { } } - pub fn default_value(&self) -> Datum { + pub fn default_value(&self) -> Datum { match self.var_type() { VarType::Numeric => Datum::sysmis(), - VarType::String => Datum::String(RawString::default()), + VarType::String => Datum::String(OwnedRawString::default()), } } } @@ -621,10 +621,10 @@ impl Format { Ok(self) } - pub fn default_value(&self) -> Datum { + pub fn default_value(&self) -> Datum { match self.var_width() { VarWidth::Numeric => Datum::sysmis(), - VarWidth::String(width) => Datum::String(RawString::spaces(width as usize)), + VarWidth::String(width) => Datum::String(OwnedRawString::spaces(width as usize)), } } diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 7d9732261d..c7c899dd82 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -68,7 +68,7 @@ use thiserror::Error as ThisError; use tlo::parse_tlo; use crate::{ - data::{Datum, EncodedDat, EncodedDatum, RawString}, + data::{Datum, EncodedDat, EncodedDatum, OwnedRawString}, dictionary::{VarType, Variable}, format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat}, settings::{Settings, Show}, @@ -1866,7 +1866,7 @@ impl Value { EncodedDat::String(string) => Self::new_user_text(string.as_str()), } } - pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { + pub fn new_variable_value(variable: &Variable, value: &Datum) -> Self { let var_name = Some(variable.name.as_str().into()); let value_label = variable.value_labels.get(value).map(String::from); match value { @@ -1890,7 +1890,7 @@ impl Value { Datum::String(string) => Self::new(ValueInner::String(StringValue { show: None, hex: variable.print_format.type_() == Type::AHex, - s: string.decode(variable.encoding()).into_owned(), + s: string.as_encoded(variable.encoding()).as_str().into_owned(), var_name, value_label, })), diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 778bc778e2..d256183bb2 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -26,7 +26,7 @@ use std::{ use crate::{ calendar::date_time_to_pspp, crypto::EncryptedFile, - data::{Case, Datum, EncodedDatum, RawString}, + data::{Case, Datum, EncodedDatum, OwnedRawString}, dictionary::{ DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues, MissingValuesError, MultipleResponseType, VarWidth, Variable, @@ -1256,7 +1256,7 @@ impl Records { .missing_values .into_iter() .map(|v| { - let mut value = RawString::from(v.0.as_slice()); + let mut value = OwnedRawString::from(v.0.as_slice()); value.resize(variable.width.as_string_width().unwrap()); EncodedDatum::String(value.with_encoding(encoding)) }) @@ -1695,7 +1695,7 @@ impl Debug for Cases { } impl Iterator for Cases { - type Item = Result>>, raw::Error>; + type Item = Result>>, raw::Error>; fn next(&mut self) -> Option { self.inner diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index f330a0a5d4..ee199b191b 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -20,7 +20,7 @@ //! raw details. Most readers will want to use higher-level interfaces. use crate::{ - data::{Datum, RawCase, RawStr, RawString}, + data::{BorrowedRawString, Datum, OwnedRawString, RawCase, RawString}, dictionary::{VarType, VarWidth}, endian::{Endian, Parse, ToBytes}, identifier::{Error as IdError, Identifier}, @@ -395,7 +395,7 @@ pub enum Record { /// one variable record per 8-byte segment. Variable( /// The record. - VariableRecord, + VariableRecord, ), /// Value labels for numeric and short string variables. @@ -403,7 +403,7 @@ pub enum Record { /// These appear after the variable records. ValueLabel( /// The record. - ValueLabelRecord, + ValueLabelRecord, ), /// Document record. @@ -433,13 +433,13 @@ pub enum Record { /// Multiple response variable record. MultipleResponse( /// The record. - MultipleResponseRecord, + MultipleResponseRecord, ), /// Value labels for long string variables. LongStringValueLabels( /// The record. - LongStringValueLabelRecord, + LongStringValueLabelRecord, ), /// Missing values for long string variables. @@ -448,7 +448,7 @@ pub enum Record { /// variable records. LongStringMissingValues( /// The record. - LongStringMissingValueRecord, + LongStringMissingValueRecord, ), /// Encoding record. @@ -776,12 +776,12 @@ impl<'de> Decoder<'de> { output } - fn decode<'a>(&mut self, input: &'a RawString) -> Cow<'a, str> { + fn decode<'a>(&mut self, input: &'a OwnedRawString) -> Cow<'a, str> { self.decode_slice(input.0.as_slice()) } /// Decodes `input` to an [Identifier] using our encoding. - pub fn decode_identifier(&mut self, input: &RawString) -> Result { + pub fn decode_identifier(&mut self, input: &OwnedRawString) -> Result { let decoded = &self.decode(input); self.new_identifier(decoded) } @@ -901,7 +901,7 @@ impl Debug for RawDatum { match self { RawDatum::Number(Some(number)) => write!(f, "{number:?}"), RawDatum::Number(None) => write!(f, "SYSMIS"), - RawDatum::String(s) => write!(f, "{:?}", RawStr::from_bytes(s)), + RawDatum::String(s) => write!(f, "{:?}", BorrowedRawString::new(s)), } } } @@ -913,7 +913,7 @@ impl Serialize for RawDatum { { match self { RawDatum::Number(number) => number.serialize(serializer), - RawDatum::String(s) => RawStr::from_bytes(s).serialize(serializer), + RawDatum::String(s) => BorrowedRawString::new(s).serialize(serializer), } } } @@ -930,18 +930,18 @@ impl RawDatum { /// Decodes a `RawDatum` into a [Datum] given that we now know the string /// width. - pub fn decode(&self, width: VarWidth) -> Datum { + pub fn decode(&self, width: VarWidth) -> Datum { match self { Self::Number(x) => Datum::Number(*x), Self::String(s) => { let width = width.as_string_width().unwrap(); - Datum::String(RawString::from(&s[..width])) + Datum::String(OwnedRawString::from(&s[..width])) } } } } -impl Datum { +impl Datum { fn read_case( reader: &mut R, case_vars: &[CaseVar], @@ -989,7 +989,7 @@ impl Datum { skip_bytes(reader, segment.padding_bytes)?; offset += segment.data_bytes; } - values.push(Datum::String(RawString(datum))); + values.push(Datum::String(datum.into())); } } } @@ -1076,7 +1076,7 @@ impl Datum { n_chunks += 1; } } - values.push(Datum::String(RawString(datum))); + values.push(Datum::String(datum.into())); } } } @@ -1150,7 +1150,7 @@ where reader: Option, warn: Box, - header: FileHeader, + header: FileHeader, var_types: VarTypes, state: ReaderState, @@ -1179,7 +1179,7 @@ where } /// Returns the header in this reader. - pub fn header(&self) -> &FileHeader { + pub fn header(&self) -> &FileHeader { &self.header } @@ -1410,7 +1410,7 @@ impl RawCases { fn new( reader: R, var_types: VarTypes, - header: &FileHeader, + header: &FileHeader, ztrailer_offset: Option, ) -> Self where @@ -1579,7 +1579,7 @@ impl Debug for UntypedDatum { } else { big }; - write!(f, "{number}/{:?}", RawStr::from_bytes(&self.0)) + write!(f, "{number}/{:?}", BorrowedRawString::new(&self.0)) } } @@ -1598,7 +1598,7 @@ impl From<[u8; N]> for RawStrArray { impl Debug for RawStrArray { fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "{:?}", RawStr::from_bytes(&self.0)) + write!(f, "{:?}", BorrowedRawString::new(&self.0)) } } @@ -1607,7 +1607,7 @@ impl Serialize for RawStrArray { where S: serde::Serializer, { - RawStr::from_bytes(&self.0).serialize(serializer) + BorrowedRawString::new(&self.0).serialize(serializer) } } @@ -1657,7 +1657,7 @@ fn read_vec(r: &mut R, n: usize) -> Result, IoError> { Ok(vec) } -fn read_string(r: &mut R, endian: Endian) -> Result { +fn read_string(r: &mut R, endian: Endian) -> Result { let length: u32 = endian.parse(read_bytes(r)?); Ok(read_vec(r, length as usize)?.into()) } diff --git a/rust/pspp/src/sys/raw/records.rs b/rust/pspp/src/sys/raw/records.rs index c418c8f570..789571cc33 100644 --- a/rust/pspp/src/sys/raw/records.rs +++ b/rust/pspp/src/sys/raw/records.rs @@ -12,7 +12,7 @@ use std::{ }; use crate::{ - data::{Datum, RawString}, + data::{Datum, OwnedRawString, RawString}, dictionary::{ Alignment, Attributes, CategoryLabels, Measure, MissingValueRange, MissingValues, MissingValuesError, VarType, VarWidth, @@ -142,7 +142,7 @@ pub struct RawHeader { pub file_label: [u8; 64], } -impl FileHeader { +impl FileHeader { /// Reads a header record from `r`, reporting any warnings via `warn`. pub fn read(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result where @@ -208,10 +208,6 @@ impl FileHeader { )); } - let creation_date = RawString(header.creation_date.into()); - let creation_time = RawString(header.creation_time.into()); - let file_label = RawString(header.file_label.into()); - Ok(FileHeader { magic, layout_code: header.layout_code, @@ -220,10 +216,10 @@ impl FileHeader { weight_index, n_cases, bias: header.bias, - creation_date, - creation_time, - eye_catcher: RawString(header.eye_catcher.into()), - file_label, + creation_date: header.creation_date.into(), + creation_time: header.creation_time.into(), + eye_catcher: header.eye_catcher.into(), + file_label: header.file_label.into(), endian, }) } @@ -355,14 +351,14 @@ fn format_name(type_: u32) -> Cow<'static, str> { #[derive(Clone, Debug, Default, Serialize)] pub struct RawMissingValues { /// Individual missing values, up to 3 of them. - pub values: Vec>, + pub values: Vec>, /// Optional range of missing values. pub range: Option, } impl RawMissingValues { - pub fn new(values: Vec>, range: Option) -> Self { + pub fn new(values: Vec>, range: Option) -> Self { Self { values, range } } @@ -454,7 +450,7 @@ impl RawMissingValues { let width = width.min(8) as usize; let values = values .into_iter() - .map(|value| Datum::String(RawString::from(&value[..width]))) + .map(|value| Datum::String(OwnedRawString::from(&value[..width]))) .collect(); return Ok(Self::new(values, None)); } @@ -544,7 +540,7 @@ pub struct RawVariableRecord { pub name: [u8; 8], } -impl VariableRecord { +impl VariableRecord { /// Reads a variable record from `r`. pub fn read( r: &mut R, @@ -573,12 +569,12 @@ impl VariableRecord { 1 => { let len: u32 = endian.parse(read_bytes(r)?); let read_len = len.min(65535) as usize; - let label = RawString(read_vec(r, read_len)?); + let label = read_vec(r, read_len)?; let padding_bytes = len.next_multiple_of(4) - len; let _ = read_vec(r, padding_bytes as usize)?; - Some(label) + Some(label.into()) } _ => { return Err(Error::new( @@ -602,7 +598,7 @@ impl VariableRecord { Ok(Record::Variable(VariableRecord { offsets: start_offset..end_offset, width, - name: RawString(raw_record.name.into()), + name: raw_record.name.into(), print_format: raw_record.print_format, write_format: raw_record.write_format, missing_values, @@ -703,7 +699,7 @@ where pub const MAX_INDEXES: u32 = u32::MAX / 8; } -impl ValueLabelRecord { +impl ValueLabelRecord { pub(super) fn read( r: &mut R, endian: Endian, @@ -731,7 +727,7 @@ impl ValueLabelRecord { let mut label = read_vec(r, padded_len - 1)?; label.truncate(label_len); - labels.push((value, RawString(label))); + labels.push((value, label.into())); } let index_offset = r.stream_position()?; @@ -1040,7 +1036,7 @@ pub struct TextRecord { pub offsets: Range, /// The text content of the record. - pub text: RawString, + pub text: OwnedRawString, } impl TextRecord { @@ -1203,7 +1199,7 @@ pub enum MultipleResponseType { /// Multiple-dichotomy set. MultipleDichotomy { /// The value that is counted in the set. - value: RawString, + value: OwnedRawString, /// What categories are labeled. labels: CategoryLabels, @@ -1273,7 +1269,7 @@ where pub short_names: Vec, } -impl MultipleResponseSet { +impl MultipleResponseSet { /// Parses a multiple-response set from `input`. Returns the set and the /// input remaining to be parsed following the set. fn parse(input: &[u8]) -> Result<(Self, &[u8]), WarningDetails> { @@ -1370,7 +1366,7 @@ where pub sets: Vec>, } -impl MultipleResponseRecord { +impl MultipleResponseRecord { /// Parses a multiple-response set from `ext`. pub fn parse(ext: &Extension) -> Result { ext.check_size(Some(1), None, "multiple response set record")?; @@ -1395,7 +1391,7 @@ impl MultipleResponseRecord { } } -impl MultipleResponseRecord { +impl MultipleResponseRecord { /// Decodes this record using `decoder`. pub fn decode(self, decoder: &mut Decoder) -> MultipleResponseRecord { let mut sets = Vec::new(); @@ -1414,7 +1410,7 @@ impl MultipleResponseRecord { } } -fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), WarningDetails> { +fn parse_counted_string(input: &[u8]) -> Result<(OwnedRawString, &[u8]), WarningDetails> { let Some(space) = input.iter().position(|&b| b == b' ') else { return Err(MultipleResponseWarning::CountedStringMissingSpace.into()); }; @@ -1580,7 +1576,7 @@ where pub missing_values: Vec>, } -impl LongStringMissingValues { +impl LongStringMissingValues { /// Decodes these settings using `decoder`. fn decode( &self, @@ -1606,7 +1602,7 @@ where pub values: Vec>, } -impl LongStringMissingValueRecord { +impl LongStringMissingValueRecord { /// Parses this record from `ext`. pub fn parse( ext: &Extension, @@ -2363,10 +2359,10 @@ where pub width: u32, /// `(value, label)` pairs, where each value is `width` bytes. - pub labels: Vec<(RawString, S)>, + pub labels: Vec<(OwnedRawString, S)>, } -impl LongStringValueLabels { +impl LongStringValueLabels { /// Decodes a set of long string value labels using `decoder`. fn decode( &self, @@ -2404,7 +2400,7 @@ where pub labels: Vec>, } -impl LongStringValueLabelRecord { +impl LongStringValueLabelRecord { /// Parses this record from `ext` using `endian`. fn parse(ext: &Extension, endian: Endian) -> Result { ext.check_size(Some(1), None, "long string value labels record")?; diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 0fa1edef2a..3a6b13486b 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -17,7 +17,7 @@ use itertools::zip_eq; use smallvec::SmallVec; use crate::{ - data::{Datum, EncodedDatum, RawStr, RawString}, + data::{Datum, EncodedDatum, OwnedRawString, RawString}, dictionary::{ Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType, ValueLabels, VarWidth, @@ -674,7 +674,7 @@ impl BinWrite for Pad { } } -impl BinWrite for Datum { +impl BinWrite for Datum { type Args<'a> = (); fn write_options( @@ -842,7 +842,7 @@ where fn write_case_uncompressed<'c>( &mut self, - case: impl Iterator>, + case: impl Iterator>, ) -> Result<(), BinError> { for (var, datum) in zip_eq(self.case_vars, case) { match var { @@ -865,7 +865,7 @@ where } fn write_case_compressed<'c>( &mut self, - case: impl Iterator>, + case: impl Iterator>, ) -> Result<(), BinError> { for (var, datum) in zip_eq(self.case_vars, case) { match var { @@ -991,7 +991,7 @@ where /// Panics if [try_finish](Self::try_finish) has been called. pub fn write_case<'a>( &mut self, - case: impl IntoIterator>, + case: impl IntoIterator>, ) -> Result<(), BinError> { match self.inner.as_mut().unwrap() { Either::Left(inner) => {