Revert "start using encodedstring in variable"
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 19 May 2025 15:31:46 +0000 (08:31 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 19 May 2025 15:31:46 +0000 (08:31 -0700)
This reverts commit 8c675eec48e1cd5f3dd9e7a04e3aa5f77820f89e.

rust/pspp/src/dictionary.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/raw.rs

index d0e2080fd1fd77310dea8592eb3f074d1061fa3c..f8abe996d9b2fc8d023371dab012ae296850f266 100644 (file)
@@ -19,9 +19,7 @@ use unicase::UniCase;
 use crate::{
     format::Format,
     identifier::{ByIdentifier, HasIdentifier, Identifier},
-    sys::raw::{
-        Alignment, CategoryLabels, EncodedString, Measure, MissingValues, RawString, VarType,
-    },
+    sys::raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType},
 };
 
 /// An index within [Dictionary::variables].
@@ -169,15 +167,6 @@ pub enum Value<S = RawString> {
     String(S),
 }
 
-impl Value {
-    pub fn with_encoding(&self, encoding: &'static Encoding) -> Value<EncodedString> {
-        match self {
-            Value::Number(number) => Value::Number(*number),
-            Value::String(string) => Value::String(string.with_encoding(encoding)),
-        }
-    }
-}
-
 impl<S> Debug for Value<S>
 where
     S: Debug,
@@ -653,7 +642,7 @@ pub struct Variable {
     /// `None`).
     ///
     /// Both kinds of missing values are excluded from most analyses.
-    pub missing_values: MissingValues<EncodedString>,
+    pub missing_values: MissingValues,
 
     /// Output format used in most contexts.
     pub print_format: Format,
index b9bbfbae2a402219ce5ef427f0bc77d6edda7baa..5d59305defb5c1cdc063aa72abaa117f8798159b 100644 (file)
@@ -9,17 +9,14 @@ use crate::{
     endian::Endian,
     format::{Error as FormatError, Format, UncheckedFormat},
     identifier::{ByIdentifier, Error as IdError, Identifier},
-    sys::{
-        encoding::Error as EncodingError,
-        raw::{
-            self, Cases, DecodedRecord, DocumentRecord, EncodedString, EncodingRecord, Extension,
-            FileAttributeRecord, FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName,
-            LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord,
-            MissingValues, MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord,
-            RawStrArray, RawWidth, ValueLabel, ValueLabelRecord, VarDisplayRecord,
-            VariableAttributeRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord,
-            ZHeader, ZTrailer,
-        },
+    sys::encoding::Error as EncodingError,
+    sys::raw::{
+        self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
+        FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
+        LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues,
+        MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStrArray, RawWidth,
+        ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord,
+        VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
     },
 };
 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
@@ -506,7 +503,7 @@ pub fn decode(
 
         variable.label = input.label.clone();
 
-        variable.missing_values = input.missing_values.with_encoding(encoding);
+        variable.missing_values = input.missing_values.clone();
 
         variable.print_format = decode_format(
             input.print_format,
@@ -770,7 +767,7 @@ pub fn decode(
                 value.clear();
                 value.extend_from_slice(v.0.as_slice());
                 value.resize(variable.width.as_string_width().unwrap(), b' ');
-                Value::String(EncodedString::new(&*value, encoding))
+                Value::String(Box::from(value.as_slice()))
             })
             .collect::<Vec<_>>();
         variable.missing_values = MissingValues {
index ec4e3643b72b5f65fa0b5330ad8521509a179b54..c26ae08052c6722d61dae5ab1c634481f8727d35 100644 (file)
@@ -9,7 +9,7 @@ use encoding_rs::{mem::decode_latin1, Encoding, UTF_8};
 use flate2::read::ZlibDecoder;
 use num::Integer;
 use std::{
-    borrow::Cow,
+    borrow::{Borrow, Cow},
     cell::RefCell,
     collections::{HashMap, VecDeque},
     fmt::{Debug, Display, Formatter, Result as FmtResult},
@@ -1121,7 +1121,7 @@ fn format_name(type_: u32) -> Cow<'static, str> {
 }
 
 #[derive(Clone, Default)]
-pub struct MissingValues<S = RawString>
+pub struct MissingValues<S = Box<[u8]>>
 where
     S: Debug,
 {
@@ -1129,39 +1129,17 @@ where
     pub values: Vec<Value<S>>,
 
     /// Optional range of missing values.
-    pub range: Option<MissingValueRange>,
+    pub range: Option<MissingValueRange<S>>,
 }
 
-#[derive(Copy, Clone)]
-pub enum MissingValueRange {
-    In { low: f64, high: f64 },
-    From { low: f64 },
-    To { high: f64 },
-}
-
-impl MissingValueRange {
-    pub fn new(low: f64, high: f64) -> Self {
-        const LOWEST: f64 = f64::MIN.next_up();
-        match (low, high) {
-            (f64::MIN | LOWEST, _) => Self::To { high },
-            (_, f64::MAX) => Self::From { low },
-            (_, _) => Self::In { low, high },
-        }
-    }
-
-    pub fn low(&self) -> Option<f64> {
-        match self {
-            MissingValueRange::In { low, .. } | MissingValueRange::From { low } => Some(*low),
-            MissingValueRange::To { .. } => None,
-        }
-    }
-
-    pub fn high(&self) -> Option<f64> {
-        match self {
-            MissingValueRange::In { high, .. } | MissingValueRange::To { high } => Some(*high),
-            MissingValueRange::From { .. } => None,
-        }
-    }
+#[derive(Clone)]
+pub enum MissingValueRange<S = Box<[u8]>>
+where
+    S: Debug,
+{
+    In { low: Value<S>, high: Value<S> },
+    From { low: Value<S> },
+    To { high: Value<S> },
 }
 
 impl<S> Debug for MissingValues<S>
@@ -1180,14 +1158,10 @@ where
             if !self.values.is_empty() {
                 write!(f, ", ")?;
             }
-            match range.low() {
-                Some(low) => write!(f, "{low:?}")?,
-                None => write!(f, "LOW")?,
-            }
-            write!(f, " THRU ")?;
-            match range.high() {
-                Some(high) => write!(f, "{high:?}")?,
-                None => write!(f, "HIGH")?,
+            match range {
+                MissingValueRange::In { low, high } => write!(f, "{low:?} THRU {high:?}")?,
+                MissingValueRange::From { low } => write!(f, "{low:?} THRU HI")?,
+                MissingValueRange::To { high } => write!(f, "LOW THRU {high:?}")?,
             }
         }
 
@@ -1244,9 +1218,22 @@ impl MissingValues {
                     .map(|v| Value::Number(endian.parse(v)))
                     .collect();
 
-                let range = range.map(|(low, high)| {
-                    MissingValueRange::new(endian.parse(low), endian.parse(high))
-                });
+                const LOWEST: f64 = f64::MIN.next_up();
+                let range =
+                    range.map(
+                        |(low, high)| match (endian.parse(low), endian.parse(high)) {
+                            (f64::MIN | LOWEST, high) => MissingValueRange::To {
+                                high: Value::Number(Some(high)),
+                            },
+                            (low, f64::MAX) => MissingValueRange::From {
+                                low: Value::Number(Some(low)),
+                            },
+                            (low, high) => MissingValueRange::In {
+                                low: Value::Number(Some(low)),
+                                high: Value::Number(Some(high)),
+                            },
+                        },
+                    );
                 return Ok(Self { values, range });
             }
             Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange),
@@ -1254,7 +1241,7 @@ impl MissingValues {
                 let width = width.min(8) as usize;
                 let values = values
                     .into_iter()
-                    .map(|value| Value::String(RawString::from(&value[..width])))
+                    .map(|value| Value::String(Box::from(&value[..width])))
                     .collect();
                 return Ok(Self {
                     values,
@@ -1265,17 +1252,6 @@ impl MissingValues {
         }
         Ok(Self::default())
     }
-
-    pub fn with_encoding(&self, encoding: &'static Encoding) -> MissingValues<EncodedString> {
-        MissingValues {
-            values: self
-                .values
-                .iter()
-                .map(|value| value.with_encoding(encoding))
-                .collect(),
-            range: self.range,
-        }
-    }
 }
 
 #[derive(Clone)]
@@ -1467,9 +1443,6 @@ impl RawString {
     pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
         EncodedStr::new(&self.0, encoding)
     }
-    pub fn with_encoding(&self, encoding: &'static Encoding) -> EncodedString {
-        EncodedString::new(&*self.0, encoding)
-    }
     pub fn as_slice(&self) -> &[u8] {
         &*self.0
     }
@@ -1525,85 +1498,39 @@ impl<const N: usize> Debug for RawStrArray<N> {
     }
 }
 
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub enum EncodedString {
     Encoded {
         bytes: Vec<u8>,
         encoding: &'static Encoding,
     },
-    Utf8(String),
+    Utf8 {
+        s: String,
+    },
 }
 
 impl EncodedString {
-    /// Creates a new `EncodedString` from `bytes` and `encoding`.
-    ///
-    /// It's cheaper to use `EncodedString::from(string)` if the input is in a
-    /// `&str` or `String`.
-    pub fn new(bytes: impl Into<Vec<u8>>, encoding: &'static Encoding) -> Self {
-        let bytes: Vec<u8> = bytes.into();
-        if encoding == UTF_8 {
-            match String::from_utf8(bytes) {
-                Ok(string) => Self::Utf8(string),
-                Err(error) => Self::Encoded {
-                    bytes: error.into_bytes(),
-                    encoding,
-                },
-            }
-        } else {
-            Self::Encoded { bytes, encoding }
-        }
-    }
-
     pub fn borrowed(&self) -> EncodedStr<'_> {
         match self {
             EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded { bytes, encoding },
-            EncodedString::Utf8(s) => EncodedStr::Utf8 { s },
+            EncodedString::Utf8 { s } => EncodedStr::Utf8 { s },
         }
     }
     pub fn as_utf8_bytes(&self) -> Option<&[u8]> {
         match self {
             EncodedString::Encoded { bytes, encoding } if *encoding == UTF_8 => Some(&bytes),
-            EncodedString::Utf8(s) => Some(s.as_bytes()),
+            EncodedString::Utf8 { s } => Some(s.as_bytes()),
             _ => None,
         }
     }
     pub fn as_encoded(&self) -> (&[u8], &'static Encoding) {
         match self {
             EncodedString::Encoded { bytes, encoding } => (&bytes, encoding),
-            EncodedString::Utf8(s) => (s.as_bytes(), UTF_8),
+            EncodedString::Utf8 { s } => (s.as_bytes(), UTF_8),
         }
     }
 }
 
-impl Debug for EncodedString {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        match self {
-            Self::Encoded { bytes, encoding } => {
-                write!(f, "{:?}({})", self.borrowed().to_utf8(), encoding.name())
-            }
-            Self::Utf8(string) => write!(f, "{string:?}"),
-        }
-    }
-}
-
-impl From<String> for EncodedString {
-    fn from(value: String) -> Self {
-        Self::Utf8(value)
-    }
-}
-
-impl From<&'_ str> for EncodedString {
-    fn from(value: &'_ str) -> Self {
-        Self::Utf8(value.into())
-    }
-}
-
-impl Default for EncodedString {
-    fn default() -> Self {
-        Self::Utf8(String::new())
-    }
-}
-
 impl<'a> From<EncodedStr<'a>> for EncodedString {
     fn from(value: EncodedStr<'a>) -> Self {
         match value {
@@ -1611,7 +1538,7 @@ impl<'a> From<EncodedStr<'a>> for EncodedString {
                 bytes: bytes.into(),
                 encoding,
             },
-            EncodedStr::Utf8 { s } => Self::Utf8(s.into()),
+            EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
         }
     }
 }