pub fn with_encoding(self, encoding: &'static Encoding) -> EncodedString {
EncodedString {
- bytes: self,
+ bytes: self.0,
encoding,
}
}
/// Creates an [EncodedStr] with `encoding` that borrows this string's
/// contents.
- pub fn as_encoded(&self, encoding: &'static Encoding) -> BorrowedEncodedString {
- EncodedString {
- encoding,
- bytes: self.0.borrow(),
- }
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
+ EncodedStr::new(self.0.borrow(), encoding)
}
}
impl From<EncodedString> for OwnedRawString {
fn from(value: EncodedString) -> Self {
- value.bytes
+ Self(value.bytes)
}
}
/// A string value.
String(
/// The value, in the variable's encoding.
- &'a BorrowedEncodedString,
+ EncodedStr<'a>,
),
}
/// Returns the string inside this datum, or `None` if this is a numeric
/// datum.
- pub fn as_string(&self) -> Option<&BorrowedEncodedString> {
+ pub fn as_string(&self) -> Option<&EncodedStr> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(s),
/// Returns the string inside this datum as a mutable borrow, or `None` if
/// this is a numeric datum.
- pub fn as_string_mut(&'a mut self) -> Option<&mut BorrowedEncodedString> {
+ pub fn as_string_mut(&'a mut self) -> Option<EncodedStr<'a>> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(*s),
/// A [Datum] that borrows its string data (if any).
pub type BorrowedDatum<'a> = Datum<&'a BorrowedRawString>;
-/// The value of a [Variable](crate::dictionary::Variable): either a number or a
-/// string.
+/// The value of a [Variable](crate::dictionary::Variable).
///
/// `RawString` is parameterized by its string type, which is either
/// [OwnedRawString] if it owns its string value (aliased as [OwnedDatum]) or
}
}
-pub type OwnedEncodedString = EncodedString<Vec<u8>>;
-pub type BorrowedEncodedString = EncodedString<[u8]>;
-
/// An owned string and its [Encoding].
///
/// The string is not guaranteed to be valid in the encoding.
///
/// The borrowed form of such a string is [EncodedStr].
-#[derive(Clone)]
-pub struct EncodedString<B = Vec<u8>>
-where
- B: ?Sized,
-{
- /// The string's encoding.
- encoding: &'static Encoding,
-
+#[derive(Clone, Debug)]
+pub struct EncodedString {
/// The bytes of the string.
- bytes: RawString<B>,
-}
+ bytes: Vec<u8>,
-impl<B> Debug for EncodedString<B>
-where
- B: Borrow<[u8]>,
-{
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
- write!(f, "{:?}", self.as_str())
- }
+ /// The string's encoding.
+ encoding: &'static Encoding,
}
-impl<B> EncodedString<B>
-where
- B: Borrow<[u8]>,
-{
+impl EncodedString {
pub fn len(&self) -> usize {
self.bytes.len()
}
///
/// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
pub fn as_str(&self) -> Cow<'_, str> {
- self.encoding.decode_without_bom_handling(self.as_bytes()).0
+ self.encoding.decode_without_bom_handling(&self.bytes).0
}
/// Returns the bytes in the string, in its encoding.
pub fn as_bytes(&self) -> &[u8] {
- self.bytes.as_bytes()
+ &self.bytes
}
/// Compares this string and `other` for equality, ignoring trailing ASCII
/// spaces in either string for the purpose of comparison. (This is
/// acceptable because we assume that the encoding is ASCII-compatible.)
- pub fn eq_ignore_trailing_spaces<'a>(
- &self,
- other: impl Into<&'a BorrowedEncodedString>,
- ) -> bool {
+ pub fn eq_ignore_trailing_spaces<'a>(&self, other: impl Into<EncodedStr<'a>>) -> bool {
self.borrowed().eq_ignore_trailing_spaces(other.into())
}
}
/// Returns a borrowed form of this string.
- pub fn borrowed(&self) -> &EncodedString<[u8]> {
- EncodedString {
- encoding: self.encoding,
- bytes: self.bytes.borrow(),
- }
+ pub fn borrowed(&self) -> EncodedStr<'_> {
+ EncodedStr::new(&self.bytes, self.encoding)
}
/// Removes any trailing ASCII spaces.
}
}
-impl<'a> From<&'a BorrowedEncodedString> for EncodedString {
- fn from(value: &'a BorrowedEncodedString) -> Self {
+impl<'a> From<&'a EncodedString> for EncodedStr<'a> {
+ fn from(value: &'a EncodedString) -> Self {
+ value.borrowed()
+ }
+}
+
+impl<'a> From<EncodedStr<'a>> for EncodedString {
+ fn from(value: EncodedStr<'a>) -> Self {
Self {
bytes: value.bytes.into(),
encoding: value.encoding,
self.borrowed().serialize(serializer)
}
}
+
+/// A borrowed string and its [Encoding].
+///
+/// The string is not guaranteed to be valid in the encoding.
+///
+/// The owned form of such a string is [EncodedString].
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct EncodedStr<'a> {
+ /// The bytes of the string.
+ bytes: &'a [u8],
+
+ /// The string's encoding.
+ encoding: &'static Encoding,
+}
+
+impl<'a> EncodedStr<'a> {
+ /// Construct a new string with an arbitrary encoding.
+ pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
+ Self { bytes, encoding }
+ }
+
+ /// Returns this string recoded in UTF-8. Invalid characters will be
+ /// replaced by [REPLACEMENT_CHARACTER].
+ ///
+ /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
+ pub fn as_str(&self) -> Cow<'_, str> {
+ self.encoding.decode_without_bom_handling(self.bytes).0
+ }
+
+ /// Returns the bytes in the string, in its encoding.
+ pub fn as_bytes(&self) -> &[u8] {
+ self.bytes
+ }
+
+ /// Returns this string recoded in `encoding`. Invalid characters will be
+ /// replaced by [REPLACEMENT_CHARACTER].
+ ///
+ /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
+ pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
+ let utf8 = self.as_str();
+ match encoding.encode(&utf8).0 {
+ Cow::Borrowed(_) => {
+ // Recoding into UTF-8 and then back did not change anything.
+ Cow::from(self.bytes)
+ }
+ Cow::Owned(owned) => Cow::Owned(owned),
+ }
+ }
+
+ /// Returns true if this string is empty.
+ pub fn is_empty(&self) -> bool {
+ self.bytes.is_empty()
+ }
+
+ pub fn eq_ignore_trailing_spaces<'b>(&self, other: EncodedStr<'b>) -> bool {
+ self.bytes.iter().zip_longest(other.bytes).all(|elem| {
+ let (left, right) = elem.or(&b' ', &b' ');
+ *left == *right
+ })
+ }
+
+ /// Returns a helper for displaying this string in double quotes.
+ pub fn quoted(&self) -> QuotedEncodedStr {
+ QuotedEncodedStr(self)
+ }
+}
+
+impl<'a> Display for EncodedStr<'a> {
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ write!(f, "{}", self.as_str())
+ }
+}
+
+impl<'a> Debug for EncodedStr<'a> {
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ write!(f, "{:?}", self.as_str())
+ }
+}
+
+impl<'a> From<&'a str> for EncodedStr<'a> {
+ fn from(s: &'a str) -> Self {
+ Self {
+ bytes: s.as_bytes(),
+ encoding: UTF_8,
+ }
+ }
+}
+
+impl<'a> From<&'a String> for EncodedStr<'a> {
+ fn from(s: &'a String) -> Self {
+ Self::from(s.as_str())
+ }
+}
+
+impl<'a> Serialize for EncodedStr<'a> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ self.as_str().serialize(serializer)
+ }
+}
+
+/// Helper struct for displaying a [QuotedEncodedStr] in double quotes.
+pub struct QuotedEncodedStr<'a>(&'a EncodedStr<'a>);
+
+impl Display for QuotedEncodedStr<'_> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self.0.as_str())
+ }
+}
use crate::{
calendar::{calendar_gregorian_to_offset, DateError},
- data::{BorrowedEncodedString, Datum, EncodedString, OwnedDatum, RawString},
+ data::{Datum, EncodedStr, EncodedString, OwnedDatum, RawString},
endian::{Endian, Parse},
format::{DateTemplate, Decimals, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
/// interpreting them as a binary number yields nonsense.
pub fn parse<'b, T>(&self, input: T) -> Result<OwnedDatum, ParseError>
where
- T: Into<&'b BorrowedEncodedString>,
+ T: Into<EncodedStr<'b>>,
{
- let input: &BorrowedEncodedString = input.into();
+ let input: EncodedStr = input.into();
if input.is_empty() {
return Ok(self.type_.default_value());
}
use crate::{
calendar::{days_in_month, is_leap_year},
- data::{Datum, OwnedDatum},
+ data::{Datum, EncodedStr, OwnedDatum},
endian::Endian,
format::{
parse::{ParseError, ParseErrorKind, Sign},