From 1a55eeba199f50491e476098aad1e5545b133cb8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 19 May 2025 08:31:49 -0700 Subject: [PATCH] Revert "work" This reverts commit 09d295be4e3da0997be047ba0adcd6c18ca339f0. --- rust/pspp/src/sys/raw.rs | 56 ++++++++++------------------------------ 1 file changed, 13 insertions(+), 43 deletions(-) diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs index c26ae08052..ea77e3b4c2 100644 --- a/rust/pspp/src/sys/raw.rs +++ b/rust/pspp/src/sys/raw.rs @@ -5,11 +5,11 @@ use crate::{ sys::encoding::{default_encoding, get_encoding, Error as EncodingError}, }; -use encoding_rs::{mem::decode_latin1, Encoding, UTF_8}; +use encoding_rs::{mem::decode_latin1, Encoding}; use flate2::read::ZlibDecoder; use num::Integer; use std::{ - borrow::{Borrow, Cow}, + borrow::Cow, cell::RefCell, collections::{HashMap, VecDeque}, fmt::{Debug, Display, Formatter, Result as FmtResult}, @@ -1516,19 +1516,6 @@ impl EncodedString { EncodedString::Utf8 { s } => EncodedStr::Utf8 { s }, } } - pub fn as_utf8_bytes(&self) -> Option<&[u8]> { - match self { - EncodedString::Encoded { bytes, encoding } if *encoding == UTF_8 => Some(&bytes), - EncodedString::Utf8 { s } => Some(s.as_bytes()), - _ => None, - } - } - pub fn as_encoded(&self) -> (&[u8], &'static Encoding) { - match self { - EncodedString::Encoded { bytes, encoding } => (&bytes, encoding), - EncodedString::Utf8 { s } => (s.as_bytes(), UTF_8), - } - } } impl<'a> From> for EncodedString { @@ -1543,24 +1530,6 @@ impl<'a> From> for EncodedString { } } -impl PartialEq for EncodedString { - fn eq(&self, other: &Self) -> bool { - if let Some(self_utf8) = self.as_utf8_bytes() { - if let Some(other_utf8) = other.as_utf8_bytes() { - return self_utf8 == other_utf8; - } - } - - let (self_bytes, self_encoding) = self.as_encoded(); - let (other_bytes, other_encoding) = other.as_encoded(); - if self_encoding == other_encoding { - self_bytes == other_bytes - } else { - self.borrowed().to_utf8() == other.borrowed().to_utf8() - } - } -} - pub enum EncodedStr<'a> { Encoded { bytes: &'a [u8], @@ -1589,18 +1558,19 @@ impl<'a> EncodedStr<'a> { EncodedStr::Utf8 { s } => s.as_bytes(), } } - pub fn to_utf8(&self) -> Cow<'a, str> { + pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> { match self { EncodedStr::Encoded { bytes, encoding } => { - encoding.decode_without_bom_handling(bytes).0 + let utf8 = encoding.decode_without_bom_handling(bytes).0; + match encoding.encode(&utf8).0 { + Cow::Borrowed(_) => { + // Recoding into UTF-8 and then back did not change anything. + Cow::from(*bytes) + } + Cow::Owned(owned) => Cow::Owned(owned), + } } - EncodedStr::Utf8 { s } => Cow::from(*s), - } - } - pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> { - match encoding.encode(&self.to_utf8()).0 { - Cow::Borrowed(_) => Cow::Borrowed(self.as_bytes()), - Cow::Owned(string) => Cow::Owned(string), + EncodedStr::Utf8 { s } => encoding.encode(s).0, } } pub fn is_empty(&self) -> bool { @@ -1716,7 +1686,7 @@ impl ValueLabelRecord, RawString> { let label_len: u8 = endian.parse(read_bytes(r)?); let label_len = label_len as usize; - let label = read_slice(r, label_len)?; + let mut label = read_slice(r, label_len)?; let padding_len = Integer::next_multiple_of(&(label_len + 1), &8) - (label_len + 1); read_slice(r, padding_len)?; labels.push((value, RawString(label))); -- 2.30.2