cmp::Ordering,
fmt::{Debug, Display, Formatter},
hash::Hash,
- ops::Deref,
str::from_utf8,
};
use crate::{
dictionary::{VarType, VarWidth},
format::DisplayPlain,
- sys::raw::RawDatum,
};
/// A string in an unspecified character encoding.
})
}
+ pub fn borrowed(&self) -> &BorrowedRawString {
+ RawString::new(self.0.borrow())
+ }
+
/// Creates an [EncodedStr] with `encoding` that borrows this string's
/// contents.
- pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
- EncodedStr::new(self.0.borrow(), encoding)
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedString<&BorrowedRawString> {
+ EncodedString {
+ encoding,
+ bytes: self.borrowed(),
+ }
}
}
}
}
+impl<'a> From<&'a BorrowedRawString> for OwnedRawString {
+ fn from(value: &'a BorrowedRawString) -> Self {
+ Self(value.0.into())
+ }
+}
+
impl<B> Debug for RawString<B>
where
B: Borrow<[u8]> + ?Sized,
pub fn eq_ignore_trailing_spaces<'b>(&self, other: EncodedDat<'b>) -> bool {
match (self, other) {
- (Self::String(a), EncodedDat::String(b)) => a.eq_ignore_trailing_spaces(b),
+ (Self::String(a), EncodedDat::String(b)) => a.eq_ignore_trailing_spaces(&b),
_ => *self == other,
}
}
}
}
+pub type OwnedEncodedString = EncodedString<OwnedRawString>;
+pub type BorrowedEncodedString<'a> = EncodedString<&'a BorrowedRawString>;
+
/// An owned string and its [Encoding].
///
/// The string is not guaranteed to be valid in the encoding.
-///
-/// The borrowed form of such a string is [EncodedStr].
#[derive(Copy, Clone, Debug)]
pub struct EncodedString<R = OwnedRawString> {
/// The bytes of the string.
where
R: Borrow<BorrowedRawString>,
{
+ pub fn new(raw: R, encoding: &'static Encoding) -> Self {
+ Self {
+ bytes: raw,
+ encoding,
+ }
+ }
+
pub fn len(&self) -> usize {
self.bytes.borrow().len()
}
self.encoding.decode_without_bom_handling(self.as_bytes()).0
}
+ /// Returns this string recoded in `encoding`. Invalid characters will be
+ /// replaced by [REPLACEMENT_CHARACTER].
+ ///
+ /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
+ pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
+ let utf8 = self.as_str();
+ match encoding.encode(&utf8).0 {
+ Cow::Borrowed(_) => {
+ // Recoding into UTF-8 and then back did not change anything.
+ Cow::from(self.as_bytes())
+ }
+ Cow::Owned(owned) => Cow::Owned(owned),
+ }
+ }
+
/// Returns the bytes in the string, in its encoding.
pub fn as_bytes(&self) -> &[u8] {
&self.bytes.borrow().0
/// Compares this string and `other` for equality, ignoring trailing ASCII
/// spaces in either string for the purpose of comparison. (This is
/// acceptable because we assume that the encoding is ASCII-compatible.)
- pub fn eq_ignore_trailing_spaces<R2>(&self, other: impl Into<EncodedString<R2>>) -> bool
+ pub fn eq_ignore_trailing_spaces<R2>(&self, other: &EncodedString<R2>) -> bool
where
R2: Borrow<BorrowedRawString>,
{
self.borrowed()
.bytes
- .eq_ignore_trailing_spaces(&other.into().borrowed().bytes)
+ .eq_ignore_trailing_spaces(&other.borrowed().bytes)
}
/// Returns the string's [Encoding].
}
}
+impl<'a> From<BorrowedEncodedString<'a>> for OwnedEncodedString {
+ fn from(value: BorrowedEncodedString<'a>) -> Self {
+ Self {
+ bytes: value.bytes.into(),
+ encoding: value.encoding,
+ }
+ }
+}
+
impl From<&str> for EncodedString {
fn from(value: &str) -> Self {
Self {
}
}
-impl<'a> From<EncodedStr<'a>> for EncodedString {
- fn from(value: EncodedStr<'a>) -> Self {
+impl<'a> From<&'a str> for BorrowedEncodedString<'a> {
+ fn from(value: &'a str) -> Self {
Self {
- bytes: value.bytes.into(),
- encoding: value.encoding,
+ bytes: BorrowedRawString::new(value.as_bytes()),
+ encoding: UTF_8,
}
}
}
+impl<'a> From<&'a String> for BorrowedEncodedString<'a> {
+ fn from(value: &'a String) -> Self {
+ value.as_str().into()
+ }
+}
+
impl Serialize for EncodedString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
}
}
-/// A borrowed string and its [Encoding].
-///
-/// The string is not guaranteed to be valid in the encoding.
-///
-/// The owned form of such a string is [EncodedString].
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct EncodedStr<'a> {
- /// The bytes of the string.
- bytes: &'a [u8],
-
- /// The string's encoding.
- encoding: &'static Encoding,
-}
-
-impl<'a> EncodedStr<'a> {
- /// Construct a new string with an arbitrary encoding.
- pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
- Self { bytes, encoding }
- }
-
- /// Returns this string recoded in UTF-8. Invalid characters will be
- /// replaced by [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn as_str(&self) -> Cow<'_, str> {
- self.encoding.decode_without_bom_handling(self.bytes).0
- }
-
- /// Returns the bytes in the string, in its encoding.
- pub fn as_bytes(&self) -> &[u8] {
- self.bytes
- }
-
- /// Returns this string recoded in `encoding`. Invalid characters will be
- /// replaced by [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
- let utf8 = self.as_str();
- match encoding.encode(&utf8).0 {
- Cow::Borrowed(_) => {
- // Recoding into UTF-8 and then back did not change anything.
- Cow::from(self.bytes)
- }
- Cow::Owned(owned) => Cow::Owned(owned),
- }
- }
-
- /// Returns true if this string is empty.
- pub fn is_empty(&self) -> bool {
- self.bytes.is_empty()
- }
-
- pub fn eq_ignore_trailing_spaces<'b>(&self, other: EncodedStr<'b>) -> bool {
- self.bytes.iter().zip_longest(other.bytes).all(|elem| {
- let (left, right) = elem.or(&b' ', &b' ');
- *left == *right
- })
- }
-
- /// Returns a helper for displaying this string in double quotes.
- pub fn quoted(&self) -> impl Display {
- Quoted(self.as_str())
- }
-}
-
-impl<'a> Display for EncodedStr<'a> {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- write!(f, "{}", self.as_str())
- }
-}
-
-impl<'a> Debug for EncodedStr<'a> {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- write!(f, "{:?}", self.as_str())
- }
-}
-
-impl<'a> From<&'a str> for EncodedStr<'a> {
- fn from(s: &'a str) -> Self {
- Self {
- bytes: s.as_bytes(),
- encoding: UTF_8,
- }
- }
-}
-
-impl<'a> From<&'a String> for EncodedStr<'a> {
- fn from(s: &'a String) -> Self {
- Self::from(s.as_str())
- }
-}
-
-impl<'a> Serialize for EncodedStr<'a> {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- self.as_str().serialize(serializer)
- }
-}
-
/// Helper struct for displaying a value in double quotes.
pub struct Quoted<T>(T)
where
use crate::{
calendar::{calendar_gregorian_to_offset, DateError},
- data::{Datum, EncodedStr, EncodedString, OwnedDatum, RawString},
+ data::{BorrowedEncodedString, Datum, EncodedString, OwnedDatum},
endian::{Endian, Parse},
format::{DateTemplate, Decimals, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
/// interpreting them as a binary number yields nonsense.
pub fn parse<'b, T>(&self, input: T) -> Result<OwnedDatum, ParseError>
where
- T: Into<EncodedStr<'b>>,
+ T: Into<BorrowedEncodedString<'b>>,
{
- let input: EncodedStr = input.into();
+ let input: BorrowedEncodedString = input.into();
if input.is_empty() {
return Ok(self.type_.default_value());
}
use crate::{
calendar::{days_in_month, is_leap_year},
- data::{Datum, EncodedStr, OwnedDatum},
+ data::{BorrowedRawString, Datum, EncodedString, OwnedDatum},
endian::Endian,
format::{
parse::{ParseError, ParseErrorKind, Sign},
let parsed = Type::RB
.parser(UTF_8)
.with_endian(EndianSettings::new(Endian::Big))
- .parse(EncodedStr::new(&raw[..], UTF_8))
+ .parse(EncodedString::new(BorrowedRawString::new(&raw[..]), UTF_8))
.unwrap()
.as_number()
.unwrap()