From: Ben Pfaff Date: Thu, 31 Jul 2025 21:24:44 +0000 (-0700) Subject: more experimenting X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3645ebe30165fe488518ff7f1ac992996b48fda2;p=pspp more experimenting --- diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index 294023fdeb..7c97aa4e58 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -104,30 +104,107 @@ impl RawStringTrait for String { } } -impl RawStringTrait for Vec { +#[derive(PartialEq, Eq, PartialOrd, Ord)] +struct ByteStr<'a>(&'a [u8]); + +impl Serialize for ByteStr<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if let Ok(s) = str::from_utf8(self.0) { + let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) { + (0, "Ascii") + } else { + (1, "Utf8") + }; + let mut tuple = + serializer.serialize_tuple_variant("RawString", variant_index, variant, 1)?; + tuple.serialize_field(s)?; + tuple.end() + } else { + let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?; + tuple.serialize_field(&decode_latin1(self.0))?; + tuple.end() + } + } +} + +impl Debug for ByteStr<'_> { + // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1 + // (actually bytes interpreted as Unicode code points). + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + let s = from_utf8(&self.0).map_or_else(|_| decode_latin1(self.0), Cow::from); + write!(f, "{s:?}") + } +} + +impl RawStringTrait for ByteString { + fn raw_string_bytes(&self) -> &[u8] { + self.0.as_slice() + } +} + +#[derive(PartialEq, Eq, PartialOrd, Ord)] +struct ByteString(Vec); + +impl Serialize for ByteString { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if let Ok(s) = str::from_utf8(&self.0) { + let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) { + (0, "Ascii") + } else { + (1, "Utf8") + }; + let mut tuple = + serializer.serialize_tuple_variant("RawString", variant_index, variant, 1)?; + tuple.serialize_field(s)?; + tuple.end() + } else { + let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?; + tuple.serialize_field(&decode_latin1(&self.0))?; + tuple.end() + } + } +} + +impl Debug for ByteString { + // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1 + // (actually bytes interpreted as Unicode code points). + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + let s = + from_utf8(&self.0.borrow()).map_or_else(|_| decode_latin1(self.0.borrow()), Cow::from); + write!(f, "{s:?}") + } +} + +impl RawStringTrait for ByteStr<'_> { fn raw_string_bytes(&self) -> &[u8] { - self.as_slice() + self.0 } } -impl MutRawString for Vec { +impl MutRawString for ByteString { fn resize(&mut self, new_len: usize) -> Result<(), ()> { - match new_len.cmp(&self.len()) { + match new_len.cmp(&self.0.len()) { Ordering::Less => { - if !self[new_len..].iter().all(|b| *b == b' ') { + if !self.0[new_len..].iter().all(|b| *b == b' ') { return Err(()); } - self.truncate(new_len); + self.0.truncate(new_len); } Ordering::Equal => (), - Ordering::Greater => self.extend((self.len()..new_len).map(|_| b' ')), + Ordering::Greater => self.0.extend((self.0.len()..new_len).map(|_| b' ')), } Ok(()) } /// Removes any trailing ASCII spaces. fn trim_end(&mut self) { - while self.pop_if(|c| *c == b' ').is_some() {} + while self.0.pop_if(|c| *c == b' ').is_some() {} } }