From: Ben Pfaff Date: Mon, 19 May 2025 15:31:46 +0000 (-0700) Subject: Revert "start using encodedstring in variable" X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9c1963b299c36253b64b57c80b034d2b99d72dc3;p=pspp Revert "start using encodedstring in variable" This reverts commit 8c675eec48e1cd5f3dd9e7a04e3aa5f77820f89e. --- diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index d0e2080fd1..f8abe996d9 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -19,9 +19,7 @@ use unicase::UniCase; use crate::{ format::Format, identifier::{ByIdentifier, HasIdentifier, Identifier}, - sys::raw::{ - Alignment, CategoryLabels, EncodedString, Measure, MissingValues, RawString, VarType, - }, + sys::raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType}, }; /// An index within [Dictionary::variables]. @@ -169,15 +167,6 @@ pub enum Value { String(S), } -impl Value { - pub fn with_encoding(&self, encoding: &'static Encoding) -> Value { - match self { - Value::Number(number) => Value::Number(*number), - Value::String(string) => Value::String(string.with_encoding(encoding)), - } - } -} - impl Debug for Value where S: Debug, @@ -653,7 +642,7 @@ pub struct Variable { /// `None`). /// /// Both kinds of missing values are excluded from most analyses. - pub missing_values: MissingValues, + pub missing_values: MissingValues, /// Output format used in most contexts. pub print_format: Format, diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index b9bbfbae2a..5d59305def 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -9,17 +9,14 @@ use crate::{ endian::Endian, format::{Error as FormatError, Format, UncheckedFormat}, identifier::{ByIdentifier, Error as IdError, Identifier}, - sys::{ - encoding::Error as EncodingError, - raw::{ - self, Cases, DecodedRecord, DocumentRecord, EncodedString, EncodingRecord, Extension, - FileAttributeRecord, FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, - LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord, - MissingValues, MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, - RawStrArray, RawWidth, ValueLabel, ValueLabelRecord, VarDisplayRecord, - VariableAttributeRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord, - ZHeader, ZTrailer, - }, + sys::encoding::Error as EncodingError, + sys::raw::{ + self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord, + FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord, + LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues, + MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStrArray, RawWidth, + ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord, + VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer, }, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; @@ -506,7 +503,7 @@ pub fn decode( variable.label = input.label.clone(); - variable.missing_values = input.missing_values.with_encoding(encoding); + variable.missing_values = input.missing_values.clone(); variable.print_format = decode_format( input.print_format, @@ -770,7 +767,7 @@ pub fn decode( value.clear(); value.extend_from_slice(v.0.as_slice()); value.resize(variable.width.as_string_width().unwrap(), b' '); - Value::String(EncodedString::new(&*value, encoding)) + Value::String(Box::from(value.as_slice())) }) .collect::>(); variable.missing_values = MissingValues { diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index ec4e3643b7..c26ae08052 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -9,7 +9,7 @@ use encoding_rs::{mem::decode_latin1, Encoding, UTF_8}; use flate2::read::ZlibDecoder; use num::Integer; use std::{ - borrow::Cow, + borrow::{Borrow, Cow}, cell::RefCell, collections::{HashMap, VecDeque}, fmt::{Debug, Display, Formatter, Result as FmtResult}, @@ -1121,7 +1121,7 @@ fn format_name(type_: u32) -> Cow<'static, str> { } #[derive(Clone, Default)] -pub struct MissingValues +pub struct MissingValues> where S: Debug, { @@ -1129,39 +1129,17 @@ where pub values: Vec>, /// Optional range of missing values. - pub range: Option, + pub range: Option>, } -#[derive(Copy, Clone)] -pub enum MissingValueRange { - In { low: f64, high: f64 }, - From { low: f64 }, - To { high: f64 }, -} - -impl MissingValueRange { - pub fn new(low: f64, high: f64) -> Self { - const LOWEST: f64 = f64::MIN.next_up(); - match (low, high) { - (f64::MIN | LOWEST, _) => Self::To { high }, - (_, f64::MAX) => Self::From { low }, - (_, _) => Self::In { low, high }, - } - } - - pub fn low(&self) -> Option { - match self { - MissingValueRange::In { low, .. } | MissingValueRange::From { low } => Some(*low), - MissingValueRange::To { .. } => None, - } - } - - pub fn high(&self) -> Option { - match self { - MissingValueRange::In { high, .. } | MissingValueRange::To { high } => Some(*high), - MissingValueRange::From { .. } => None, - } - } +#[derive(Clone)] +pub enum MissingValueRange> +where + S: Debug, +{ + In { low: Value, high: Value }, + From { low: Value }, + To { high: Value }, } impl Debug for MissingValues @@ -1180,14 +1158,10 @@ where if !self.values.is_empty() { write!(f, ", ")?; } - match range.low() { - Some(low) => write!(f, "{low:?}")?, - None => write!(f, "LOW")?, - } - write!(f, " THRU ")?; - match range.high() { - Some(high) => write!(f, "{high:?}")?, - None => write!(f, "HIGH")?, + match range { + MissingValueRange::In { low, high } => write!(f, "{low:?} THRU {high:?}")?, + MissingValueRange::From { low } => write!(f, "{low:?} THRU HI")?, + MissingValueRange::To { high } => write!(f, "LOW THRU {high:?}")?, } } @@ -1244,9 +1218,22 @@ impl MissingValues { .map(|v| Value::Number(endian.parse(v))) .collect(); - let range = range.map(|(low, high)| { - MissingValueRange::new(endian.parse(low), endian.parse(high)) - }); + const LOWEST: f64 = f64::MIN.next_up(); + let range = + range.map( + |(low, high)| match (endian.parse(low), endian.parse(high)) { + (f64::MIN | LOWEST, high) => MissingValueRange::To { + high: Value::Number(Some(high)), + }, + (low, f64::MAX) => MissingValueRange::From { + low: Value::Number(Some(low)), + }, + (low, high) => MissingValueRange::In { + low: Value::Number(Some(low)), + high: Value::Number(Some(high)), + }, + }, + ); return Ok(Self { values, range }); } Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange), @@ -1254,7 +1241,7 @@ impl MissingValues { let width = width.min(8) as usize; let values = values .into_iter() - .map(|value| Value::String(RawString::from(&value[..width]))) + .map(|value| Value::String(Box::from(&value[..width]))) .collect(); return Ok(Self { values, @@ -1265,17 +1252,6 @@ impl MissingValues { } Ok(Self::default()) } - - pub fn with_encoding(&self, encoding: &'static Encoding) -> MissingValues { - MissingValues { - values: self - .values - .iter() - .map(|value| value.with_encoding(encoding)) - .collect(), - range: self.range, - } - } } #[derive(Clone)] @@ -1467,9 +1443,6 @@ impl RawString { pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> { EncodedStr::new(&self.0, encoding) } - pub fn with_encoding(&self, encoding: &'static Encoding) -> EncodedString { - EncodedString::new(&*self.0, encoding) - } pub fn as_slice(&self) -> &[u8] { &*self.0 } @@ -1525,85 +1498,39 @@ impl Debug for RawStrArray { } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum EncodedString { Encoded { bytes: Vec, encoding: &'static Encoding, }, - Utf8(String), + Utf8 { + s: String, + }, } impl EncodedString { - /// Creates a new `EncodedString` from `bytes` and `encoding`. - /// - /// It's cheaper to use `EncodedString::from(string)` if the input is in a - /// `&str` or `String`. - pub fn new(bytes: impl Into>, encoding: &'static Encoding) -> Self { - let bytes: Vec = bytes.into(); - if encoding == UTF_8 { - match String::from_utf8(bytes) { - Ok(string) => Self::Utf8(string), - Err(error) => Self::Encoded { - bytes: error.into_bytes(), - encoding, - }, - } - } else { - Self::Encoded { bytes, encoding } - } - } - pub fn borrowed(&self) -> EncodedStr<'_> { match self { EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded { bytes, encoding }, - EncodedString::Utf8(s) => EncodedStr::Utf8 { s }, + EncodedString::Utf8 { s } => EncodedStr::Utf8 { s }, } } pub fn as_utf8_bytes(&self) -> Option<&[u8]> { match self { EncodedString::Encoded { bytes, encoding } if *encoding == UTF_8 => Some(&bytes), - EncodedString::Utf8(s) => Some(s.as_bytes()), + EncodedString::Utf8 { s } => Some(s.as_bytes()), _ => None, } } pub fn as_encoded(&self) -> (&[u8], &'static Encoding) { match self { EncodedString::Encoded { bytes, encoding } => (&bytes, encoding), - EncodedString::Utf8(s) => (s.as_bytes(), UTF_8), + EncodedString::Utf8 { s } => (s.as_bytes(), UTF_8), } } } -impl Debug for EncodedString { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - match self { - Self::Encoded { bytes, encoding } => { - write!(f, "{:?}({})", self.borrowed().to_utf8(), encoding.name()) - } - Self::Utf8(string) => write!(f, "{string:?}"), - } - } -} - -impl From for EncodedString { - fn from(value: String) -> Self { - Self::Utf8(value) - } -} - -impl From<&'_ str> for EncodedString { - fn from(value: &'_ str) -> Self { - Self::Utf8(value.into()) - } -} - -impl Default for EncodedString { - fn default() -> Self { - Self::Utf8(String::new()) - } -} - impl<'a> From> for EncodedString { fn from(value: EncodedStr<'a>) -> Self { match value { @@ -1611,7 +1538,7 @@ impl<'a> From> for EncodedString { bytes: bytes.into(), encoding, }, - EncodedStr::Utf8 { s } => Self::Utf8(s.into()), + EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() }, } } }