Revert "work"
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 19 May 2025 15:31:49 +0000 (08:31 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 19 May 2025 15:31:49 +0000 (08:31 -0700)
This reverts commit 09d295be4e3da0997be047ba0adcd6c18ca339f0.

rust/pspp/src/sys/raw.rs

index c26ae08052c6722d61dae5ab1c634481f8727d35..ea77e3b4c2b72dd47c08f259707cebe817c02192 100644 (file)
@@ -5,11 +5,11 @@ use crate::{
     sys::encoding::{default_encoding, get_encoding, Error as EncodingError},
 };
 
-use encoding_rs::{mem::decode_latin1, Encoding, UTF_8};
+use encoding_rs::{mem::decode_latin1, Encoding};
 use flate2::read::ZlibDecoder;
 use num::Integer;
 use std::{
-    borrow::{Borrow, Cow},
+    borrow::Cow,
     cell::RefCell,
     collections::{HashMap, VecDeque},
     fmt::{Debug, Display, Formatter, Result as FmtResult},
@@ -1516,19 +1516,6 @@ impl EncodedString {
             EncodedString::Utf8 { s } => EncodedStr::Utf8 { s },
         }
     }
-    pub fn as_utf8_bytes(&self) -> Option<&[u8]> {
-        match self {
-            EncodedString::Encoded { bytes, encoding } if *encoding == UTF_8 => Some(&bytes),
-            EncodedString::Utf8 { s } => Some(s.as_bytes()),
-            _ => None,
-        }
-    }
-    pub fn as_encoded(&self) -> (&[u8], &'static Encoding) {
-        match self {
-            EncodedString::Encoded { bytes, encoding } => (&bytes, encoding),
-            EncodedString::Utf8 { s } => (s.as_bytes(), UTF_8),
-        }
-    }
 }
 
 impl<'a> From<EncodedStr<'a>> for EncodedString {
@@ -1543,24 +1530,6 @@ impl<'a> From<EncodedStr<'a>> for EncodedString {
     }
 }
 
-impl PartialEq for EncodedString {
-    fn eq(&self, other: &Self) -> bool {
-        if let Some(self_utf8) = self.as_utf8_bytes() {
-            if let Some(other_utf8) = other.as_utf8_bytes() {
-                return self_utf8 == other_utf8;
-            }
-        }
-
-        let (self_bytes, self_encoding) = self.as_encoded();
-        let (other_bytes, other_encoding) = other.as_encoded();
-        if self_encoding == other_encoding {
-            self_bytes == other_bytes
-        } else {
-            self.borrowed().to_utf8() == other.borrowed().to_utf8()
-        }
-    }
-}
-
 pub enum EncodedStr<'a> {
     Encoded {
         bytes: &'a [u8],
@@ -1589,18 +1558,19 @@ impl<'a> EncodedStr<'a> {
             EncodedStr::Utf8 { s } => s.as_bytes(),
         }
     }
-    pub fn to_utf8(&self) -> Cow<'a, str> {
+    pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
         match self {
             EncodedStr::Encoded { bytes, encoding } => {
-                encoding.decode_without_bom_handling(bytes).0
+                let utf8 = encoding.decode_without_bom_handling(bytes).0;
+                match encoding.encode(&utf8).0 {
+                    Cow::Borrowed(_) => {
+                        // Recoding into UTF-8 and then back did not change anything.
+                        Cow::from(*bytes)
+                    }
+                    Cow::Owned(owned) => Cow::Owned(owned),
+                }
             }
-            EncodedStr::Utf8 { s } => Cow::from(*s),
-        }
-    }
-    pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
-        match encoding.encode(&self.to_utf8()).0 {
-            Cow::Borrowed(_) => Cow::Borrowed(self.as_bytes()),
-            Cow::Owned(string) => Cow::Owned(string),
+            EncodedStr::Utf8 { s } => encoding.encode(s).0,
         }
     }
     pub fn is_empty(&self) -> bool {
@@ -1716,7 +1686,7 @@ impl ValueLabelRecord<RawStrArray<8>, RawString> {
             let label_len: u8 = endian.parse(read_bytes(r)?);
             let label_len = label_len as usize;
 
-            let label = read_slice(r, label_len)?;
+            let mut label = read_slice(r, label_len)?;
             let padding_len = Integer::next_multiple_of(&(label_len + 1), &8) - (label_len + 1);
             read_slice(r, padding_len)?;
             labels.push((value, RawString(label)));