pub fn with_encoding(self, encoding: &'static Encoding) -> EncodedString {
EncodedString {
- bytes: self.0,
+ bytes: self,
encoding,
}
}
/// Creates an [EncodedStr] with `encoding` that borrows this string's
/// contents.
- pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
- EncodedStr::new(self.0.borrow(), encoding)
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> BorrowedEncodedString {
+ EncodedString {
+ encoding,
+ bytes: self.0.borrow(),
+ }
}
}
impl From<EncodedString> for OwnedRawString {
fn from(value: EncodedString) -> Self {
- Self(value.bytes)
+ value.bytes
}
}
/// A string value.
String(
/// The value, in the variable's encoding.
- EncodedStr<'a>,
+ &'a BorrowedEncodedString,
),
}
/// Returns the string inside this datum, or `None` if this is a numeric
/// datum.
- pub fn as_string(&self) -> Option<&EncodedStr> {
+ pub fn as_string(&self) -> Option<&BorrowedEncodedString> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(s),
/// Returns the string inside this datum as a mutable borrow, or `None` if
/// this is a numeric datum.
- pub fn as_string_mut(&'a mut self) -> Option<EncodedStr<'a>> {
+ pub fn as_string_mut(&'a mut self) -> Option<&mut BorrowedEncodedString> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(*s),
/// A [Datum] that borrows its string data (if any).
pub type BorrowedDatum<'a> = Datum<&'a BorrowedRawString>;
-/// The value of a [Variable](crate::dictionary::Variable).
+/// The value of a [Variable](crate::dictionary::Variable): either a number or a
+/// string.
///
/// `RawString` is parameterized by its string type, which is either
/// [OwnedRawString] if it owns its string value (aliased as [OwnedDatum]) or
}
}
+pub type OwnedEncodedString = EncodedString<Vec<u8>>;
+pub type BorrowedEncodedString = EncodedString<[u8]>;
+
/// An owned string and its [Encoding].
///
/// The string is not guaranteed to be valid in the encoding.
///
/// The borrowed form of such a string is [EncodedStr].
-#[derive(Clone, Debug)]
-pub struct EncodedString {
- /// The bytes of the string.
- bytes: Vec<u8>,
-
+#[derive(Clone)]
+pub struct EncodedString<B = Vec<u8>>
+where
+ B: ?Sized,
+{
/// The string's encoding.
encoding: &'static Encoding,
+
+ /// The bytes of the string.
+ bytes: RawString<B>,
}
-impl EncodedString {
+impl<B> Debug for EncodedString<B>
+where
+ B: Borrow<[u8]>,
+{
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self.as_str())
+ }
+}
+
+impl<B> EncodedString<B>
+where
+ B: Borrow<[u8]>,
+{
pub fn len(&self) -> usize {
self.bytes.len()
}
///
/// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
pub fn as_str(&self) -> Cow<'_, str> {
- self.encoding.decode_without_bom_handling(&self.bytes).0
+ self.encoding.decode_without_bom_handling(self.as_bytes()).0
}
/// Returns the bytes in the string, in its encoding.
pub fn as_bytes(&self) -> &[u8] {
- &self.bytes
+ self.bytes.as_bytes()
}
/// Compares this string and `other` for equality, ignoring trailing ASCII
/// spaces in either string for the purpose of comparison. (This is
/// acceptable because we assume that the encoding is ASCII-compatible.)
- pub fn eq_ignore_trailing_spaces<'a>(&self, other: impl Into<EncodedStr<'a>>) -> bool {
+ pub fn eq_ignore_trailing_spaces<'a>(
+ &self,
+ other: impl Into<&'a BorrowedEncodedString>,
+ ) -> bool {
self.borrowed().eq_ignore_trailing_spaces(other.into())
}
}
/// Returns a borrowed form of this string.
- pub fn borrowed(&self) -> EncodedStr<'_> {
- EncodedStr::new(&self.bytes, self.encoding)
+ pub fn borrowed(&self) -> &EncodedString<[u8]> {
+ EncodedString {
+ encoding: self.encoding,
+ bytes: self.bytes.borrow(),
+ }
}
/// Removes any trailing ASCII spaces.
}
}
-impl<'a> From<&'a EncodedString> for EncodedStr<'a> {
- fn from(value: &'a EncodedString) -> Self {
- value.borrowed()
- }
-}
-
-impl<'a> From<EncodedStr<'a>> for EncodedString {
- fn from(value: EncodedStr<'a>) -> Self {
+impl<'a> From<&'a BorrowedEncodedString> for EncodedString {
+ fn from(value: &'a BorrowedEncodedString) -> Self {
Self {
bytes: value.bytes.into(),
encoding: value.encoding,
self.borrowed().serialize(serializer)
}
}
-
-/// A borrowed string and its [Encoding].
-///
-/// The string is not guaranteed to be valid in the encoding.
-///
-/// The owned form of such a string is [EncodedString].
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct EncodedStr<'a> {
- /// The bytes of the string.
- bytes: &'a [u8],
-
- /// The string's encoding.
- encoding: &'static Encoding,
-}
-
-impl<'a> EncodedStr<'a> {
- /// Construct a new string with an arbitrary encoding.
- pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
- Self { bytes, encoding }
- }
-
- /// Returns this string recoded in UTF-8. Invalid characters will be
- /// replaced by [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn as_str(&self) -> Cow<'_, str> {
- self.encoding.decode_without_bom_handling(self.bytes).0
- }
-
- /// Returns the bytes in the string, in its encoding.
- pub fn as_bytes(&self) -> &[u8] {
- self.bytes
- }
-
- /// Returns this string recoded in `encoding`. Invalid characters will be
- /// replaced by [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
- let utf8 = self.as_str();
- match encoding.encode(&utf8).0 {
- Cow::Borrowed(_) => {
- // Recoding into UTF-8 and then back did not change anything.
- Cow::from(self.bytes)
- }
- Cow::Owned(owned) => Cow::Owned(owned),
- }
- }
-
- /// Returns true if this string is empty.
- pub fn is_empty(&self) -> bool {
- self.bytes.is_empty()
- }
-
- pub fn eq_ignore_trailing_spaces<'b>(&self, other: EncodedStr<'b>) -> bool {
- self.bytes.iter().zip_longest(other.bytes).all(|elem| {
- let (left, right) = elem.or(&b' ', &b' ');
- *left == *right
- })
- }
-
- /// Returns a helper for displaying this string in double quotes.
- pub fn quoted(&self) -> QuotedEncodedStr {
- QuotedEncodedStr(self)
- }
-}
-
-impl<'a> Display for EncodedStr<'a> {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- write!(f, "{}", self.as_str())
- }
-}
-
-impl<'a> Debug for EncodedStr<'a> {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- write!(f, "{:?}", self.as_str())
- }
-}
-
-impl<'a> From<&'a str> for EncodedStr<'a> {
- fn from(s: &'a str) -> Self {
- Self {
- bytes: s.as_bytes(),
- encoding: UTF_8,
- }
- }
-}
-
-impl<'a> From<&'a String> for EncodedStr<'a> {
- fn from(s: &'a String) -> Self {
- Self::from(s.as_str())
- }
-}
-
-impl<'a> Serialize for EncodedStr<'a> {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- self.as_str().serialize(serializer)
- }
-}
-
-/// Helper struct for displaying a [QuotedEncodedStr] in double quotes.
-pub struct QuotedEncodedStr<'a>(&'a EncodedStr<'a>);
-
-impl Display for QuotedEncodedStr<'_> {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "{:?}", self.0.as_str())
- }
-}
use crate::{
calendar::{calendar_gregorian_to_offset, DateError},
- data::{Datum, EncodedStr, EncodedString, OwnedDatum, RawString},
+ data::{BorrowedEncodedString, Datum, EncodedString, OwnedDatum, RawString},
endian::{Endian, Parse},
format::{DateTemplate, Decimals, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
/// interpreting them as a binary number yields nonsense.
pub fn parse<'b, T>(&self, input: T) -> Result<OwnedDatum, ParseError>
where
- T: Into<EncodedStr<'b>>,
+ T: Into<&'b BorrowedEncodedString>,
{
- let input: EncodedStr = input.into();
+ let input: &BorrowedEncodedString = input.into();
if input.is_empty() {
return Ok(self.type_.default_value());
}
use crate::{
calendar::{days_in_month, is_leap_year},
- data::{Datum, EncodedStr, OwnedDatum},
+ data::{Datum, OwnedDatum},
endian::Endian,
format::{
parse::{ParseError, ParseErrorKind, Sign},