format::DisplayPlain,
};
-pub trait RawStringTrait: Debug + PartialEq + Eq + PartialOrd + Ord {
+pub trait RawStringTrait: Debug + PartialEq + Eq + PartialOrd + Ord + Hash {
fn raw_string_bytes(&self) -> &[u8];
/// Compares this string and `other` for equality, ignoring trailing ASCII
///
/// This compares the bytes of the strings, disregarding their encodings (if
/// known).
- fn eq_ignore_trailing_spaces<R>(&self, other: &impl RawStringTrait) -> bool {
+ fn eq_ignore_trailing_spaces<R>(&self, other: &R) -> bool
+ where
+ R: RawStringTrait,
+ {
self.raw_string_bytes()
.iter()
.copied()
fn len(&self) -> usize {
self.raw_string_bytes().len()
}
+
+ fn as_ref(&self) -> ByteStr<'_> {
+ ByteStr(self.raw_string_bytes())
+ }
+
+ fn as_encoded(&self, encoding: &'static Encoding) -> WithEncoding<ByteStr<'_>>
+ where
+ Self: Sized,
+ {
+ WithEncoding::new(self.as_ref(), encoding)
+ }
+
+ fn with_encoding(self, encoding: &'static Encoding) -> WithEncoding<Self>
+ where
+ Self: Sized,
+ {
+ WithEncoding::new(self, encoding)
+ }
}
pub trait MutRawString: RawStringTrait {
- fn resize(&mut self, new_len: usize) -> Result<(), ()>;
+ fn resize(&mut self, new_len: usize) -> Result<(), ResizeError>;
fn trim_end(&mut self);
}
-impl RawStringTrait for str {
+impl RawStringTrait for &'_ str {
fn raw_string_bytes(&self) -> &[u8] {
self.as_bytes()
}
}
}
-#[derive(PartialEq, Eq, PartialOrd, Ord)]
-struct ByteStr<'a>(&'a [u8]);
+impl RawStringTrait for &'_ String {
+ fn raw_string_bytes(&self) -> &[u8] {
+ self.as_bytes()
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct ByteStr<'a>(pub &'a [u8]);
+
+impl RawStringTrait for ByteStr<'_> {
+ fn raw_string_bytes(&self) -> &[u8] {
+ self.0
+ }
+}
impl Serialize for ByteStr<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
}
}
-impl RawStringTrait for ByteString {
- fn raw_string_bytes(&self) -> &[u8] {
- self.0.as_slice()
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct ByteCow<'a>(pub Cow<'a, [u8]>);
+
+impl ByteCow<'_> {
+ pub fn into_owned(self) -> ByteString {
+ ByteString(self.0.into_owned())
}
}
-#[derive(PartialEq, Eq, PartialOrd, Ord)]
-struct ByteString(Vec<u8>);
-
-impl Serialize for ByteString {
+impl Serialize for ByteCow<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
- if let Ok(s) = str::from_utf8(&self.0) {
- let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) {
- (0, "Ascii")
- } else {
- (1, "Utf8")
- };
- let mut tuple =
- serializer.serialize_tuple_variant("RawString", variant_index, variant, 1)?;
- tuple.serialize_field(s)?;
- tuple.end()
- } else {
- let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?;
- tuple.serialize_field(&decode_latin1(&self.0))?;
- tuple.end()
- }
+ ByteStr(&self.0).serialize(serializer)
}
}
-impl Debug for ByteString {
- // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
- // (actually bytes interpreted as Unicode code points).
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- let s =
- from_utf8(&self.0.borrow()).map_or_else(|_| decode_latin1(self.0.borrow()), Cow::from);
- write!(f, "{s:?}")
- }
-}
-
-impl RawStringTrait for ByteStr<'_> {
+impl RawStringTrait for ByteCow<'_> {
fn raw_string_bytes(&self) -> &[u8] {
- self.0
- }
-}
-
-impl MutRawString for ByteString {
- fn resize(&mut self, new_len: usize) -> Result<(), ()> {
- match new_len.cmp(&self.0.len()) {
- Ordering::Less => {
- if !self.0[new_len..].iter().all(|b| *b == b' ') {
- return Err(());
- }
- self.0.truncate(new_len);
- }
- Ordering::Equal => (),
- Ordering::Greater => self.0.extend((self.0.len()..new_len).map(|_| b' ')),
- }
- Ok(())
- }
-
- /// Removes any trailing ASCII spaces.
- fn trim_end(&mut self) {
- while self.0.pop_if(|c| *c == b' ').is_some() {}
+ &self.0
}
}
-/// A string in an unspecified character encoding.
-///
-/// `RawString` is usually associated with a [Variable], in the variable's
-/// character encoding. We assume that the encoding is one supported by
-/// [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of
-/// these encodings have some basic ASCII compatibility.
-///
-/// `RawString` is parameterized by its content type, which is either `Vec<u8>`
-/// for an owned raw string (aliased as [OwnedRawString]) or `[u8]` for a
-/// borrowed raw string (aliased as [BorrowedRawString]).
-///
-/// [Variable]: crate::dictionary::Variable
-#[derive(Clone, Default, Hash)]
-pub struct RawString<B>(pub B)
-where
- B: ?Sized;
-
-impl<B, B2> PartialEq<RawString<B2>> for RawString<B>
-where
- B: Borrow<[u8]> + ?Sized,
- B2: Borrow<[u8]> + ?Sized,
-{
- fn eq(&self, other: &RawString<B2>) -> bool {
- self.0.borrow().eq(other.0.borrow())
- }
-}
-
-impl<B> Eq for RawString<B> where B: Borrow<[u8]> + ?Sized {}
-
-impl<B, B2> PartialOrd<RawString<B2>> for RawString<B>
-where
- B: Borrow<[u8]> + ?Sized,
- B2: Borrow<[u8]> + ?Sized,
-{
- fn partial_cmp(&self, other: &RawString<B2>) -> Option<Ordering> {
- self.0.borrow().partial_cmp(other.0.borrow())
- }
-}
-
-impl<B> Ord for RawString<B>
-where
- B: Borrow<[u8]> + ?Sized,
-{
- fn cmp(&self, other: &Self) -> Ordering {
- self.0.borrow().cmp(other.0.borrow())
- }
-}
-
-/// A [RawString] that owns its contents.
-pub type OwnedRawString = RawString<Vec<u8>>;
-
-/// A [RawString] that borrows its contents.
-///
-/// Because `[u8]` is not [Sized], [BorrowedRawString] may itself only be used
-/// borrowed.
-pub type BorrowedRawString = RawString<[u8]>;
-
-impl Borrow<BorrowedRawString> for OwnedRawString {
- fn borrow(&self) -> &BorrowedRawString {
- &BorrowedRawString::new(self.as_bytes())
+impl Debug for ByteCow<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ ByteStr(&self.0).fmt(f)
}
}
-impl BorrowedRawString {
- pub fn new(s: &[u8]) -> &Self {
- // SAFETY: `RawStr` is a transparent wrapper around `[u8]`, so we can
- // turn a reference to the wrapped type into a reference to the wrapper
- // type.
- unsafe { &*(s as *const [u8] as *const BorrowedRawString) }
- }
-}
+#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct ByteString(pub Vec<u8>);
-impl OwnedRawString {
- /// Creates a new [RawString] that consists of `n` ASCII spaces.
+impl ByteString {
+ /// Creates a new [ByteString] that consists of `n` ASCII spaces.
pub fn spaces(n: usize) -> Self {
Self(std::iter::repeat_n(b' ', n).collect())
}
-
- /// Extends or shortens this [RawString] to exactly `len` bytes. If the
- /// string needs to be extended, does so by appending spaces.
- ///
- /// If this shortens the string, it can cut off a multibyte character in the
- /// middle ([is_resizable](Self::is_resizable) checks for this).
- pub fn resize(&mut self, len: usize) {
- self.0.resize(len, b' ');
- }
-
- /// Removes any trailing ASCII spaces.
- pub fn trim_end(&mut self) {
- while self.0.pop_if(|c| *c == b' ').is_some() {}
- }
-
- pub fn with_encoding(self, encoding: &'static Encoding) -> OwnedEncodedString {
- EncodedString {
- raw: self,
- encoding,
- }
- }
}
-impl<B> RawString<B>
-where
- B: Borrow<[u8]> + ?Sized,
-{
- pub fn as_bytes(&self) -> &[u8] {
- self.0.borrow()
- }
-
- pub fn len(&self) -> usize {
- self.0.borrow().len()
- }
-
- pub fn is_empty(&self) -> bool {
- self.len() == 0
- }
-
- /// Returns true if this raw string can be resized to `len` bytes without
- /// dropping non-space characters.
- pub fn is_resizable(&self, new_len: usize) -> bool {
- new_len >= self.len() || self.0.borrow()[new_len..].iter().all(|b| *b == b' ')
- }
-
- /// Compares this string and `other` for equality, ignoring trailing ASCII
- /// spaces in either string for the purpose of comparison. (This is
- /// acceptable because we assume that the encoding is ASCII-compatible.)
- pub fn eq_ignore_trailing_spaces<B2>(&self, other: &RawString<B2>) -> bool
- where
- B2: Borrow<[u8]> + ?Sized,
- {
- self.0
- .borrow()
- .iter()
- .zip_longest(other.0.borrow())
- .all(|elem| {
- let (left, right) = elem.or(&b' ', &b' ');
- *left == *right
- })
- }
-
- pub fn borrowed(&self) -> &BorrowedRawString {
- RawString::new(self.0.borrow())
- }
-
- /// Creates an [EncodedStr] with `encoding` that borrows this string's
- /// contents.
- pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedString<&BorrowedRawString> {
- EncodedString {
- encoding,
- raw: self.borrowed(),
- }
- }
-}
-
-impl From<Cow<'_, [u8]>> for OwnedRawString {
- fn from(value: Cow<'_, [u8]>) -> Self {
- Self(value.into_owned())
+impl From<String> for ByteString {
+ fn from(value: String) -> Self {
+ value.into_bytes().into()
}
}
-impl From<Vec<u8>> for OwnedRawString {
- fn from(source: Vec<u8>) -> Self {
- Self(source)
- }
-}
-
-impl From<&[u8]> for OwnedRawString {
- fn from(source: &[u8]) -> Self {
- Self(source.into())
+impl From<&'_ str> for ByteString {
+ fn from(value: &str) -> Self {
+ value.as_bytes().into()
}
}
-impl<const N: usize> From<[u8; N]> for OwnedRawString {
- fn from(source: [u8; N]) -> Self {
- Self(source.into())
+impl From<Vec<u8>> for ByteString {
+ fn from(value: Vec<u8>) -> Self {
+ Self(value)
}
}
-impl From<OwnedEncodedString> for OwnedRawString {
- fn from(value: OwnedEncodedString) -> Self {
- value.raw
+impl From<&[u8]> for ByteString {
+ fn from(value: &[u8]) -> Self {
+ Self(value.into())
}
}
-impl<'a> From<&'a BorrowedRawString> for OwnedRawString {
- fn from(value: &'a BorrowedRawString) -> Self {
- Self(value.0.into())
+impl<const N: usize> From<[u8; N]> for ByteString {
+ fn from(value: [u8; N]) -> Self {
+ value.as_slice().into()
}
}
-impl<B> Debug for RawString<B>
-where
- B: Borrow<[u8]> + ?Sized,
-{
- // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
- // (actually bytes interpreted as Unicode code points).
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- let s =
- from_utf8(&self.0.borrow()).map_or_else(|_| decode_latin1(self.0.borrow()), Cow::from);
- write!(f, "{s:?}")
+impl RawStringTrait for ByteString {
+ fn raw_string_bytes(&self) -> &[u8] {
+ self.0.as_slice()
}
}
-impl<B> Serialize for RawString<B>
-where
- B: Borrow<[u8]> + ?Sized,
-{
+impl Serialize for ByteString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
- if let Ok(s) = str::from_utf8(self.0.borrow()) {
- let (variant_index, variant) = if self.0.borrow().iter().all(|b| b.is_ascii()) {
+ if let Ok(s) = str::from_utf8(&self.0) {
+ let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) {
(0, "Ascii")
} else {
(1, "Utf8")
tuple.end()
} else {
let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?;
- tuple.serialize_field(&decode_latin1(self.0.borrow()))?;
+ tuple.serialize_field(&decode_latin1(&self.0))?;
tuple.end()
}
}
}
+impl Debug for ByteString {
+ // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
+ // (actually bytes interpreted as Unicode code points).
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ let s =
+ from_utf8(&self.0.borrow()).map_or_else(|_| decode_latin1(self.0.borrow()), Cow::from);
+ write!(f, "{s:?}")
+ }
+}
+
+impl MutRawString for ByteString {
+ fn resize(&mut self, new_len: usize) -> Result<(), ResizeError> {
+ match new_len.cmp(&self.0.len()) {
+ Ordering::Less => {
+ if !self.0[new_len..].iter().all(|b| *b == b' ') {
+ return Err(ResizeError::TooWide);
+ }
+ self.0.truncate(new_len);
+ }
+ Ordering::Equal => (),
+ Ordering::Greater => self.0.extend((self.0.len()..new_len).map(|_| b' ')),
+ }
+ Ok(())
+ }
+
+ /// Removes any trailing ASCII spaces.
+ fn trim_end(&mut self) {
+ while self.0.pop_if(|c| *c == b' ').is_some() {}
+ }
+}
+
mod encoded;
-pub use encoded::{BorrowedEncodedString, EncodedString, OwnedEncodedString};
+pub use encoded::{Encoded, EncodedStringTrait, WithEncoding};
/// A [Datum] that owns its string data (if any).
-pub type OwnedDatum = Datum<OwnedRawString>;
+pub type OwnedDatum = Datum<WithEncoding<ByteString>>;
/// A [Datum] that borrows its string data (if any).
-pub type BorrowedDatum<'a> = Datum<&'a BorrowedRawString>;
+pub type BorrowedDatum<'a> = Datum<WithEncoding<ByteStr<'a>>>;
/// The value of a [Variable](crate::dictionary::Variable).
///
/// [&BorrowedRawString](BorrowedRawString) if it borrows it (aliased as
/// [BorrowedDatum]).
#[derive(Clone)]
-pub enum Datum<B> {
+pub enum Datum<T> {
/// A numeric value.
Number(
/// A number, or `None` for the system-missing value.
/// A string value.
String(
/// The value, in the variable's encoding.
- B,
+ T,
),
}
-impl Datum<OwnedEncodedString> {
+impl Datum<WithEncoding<ByteString>> {
pub fn new_utf8(s: impl Into<String>) -> Self {
- Datum::String(OwnedRawString::from(s.into().into_bytes()).with_encoding(UTF_8))
+ let s: String = s.into();
+ Datum::String(ByteString::from(s).with_encoding(UTF_8))
+ }
+}
+
+impl<'a> Datum<WithEncoding<ByteCow<'a>>> {
+ pub fn into_owned(self) -> Datum<WithEncoding<ByteString>> {
+ match self {
+ Self::Number(number) => Datum::Number(number),
+ Self::String(string) => Datum::String(string.into_owned()),
+ }
+ }
+}
+
+impl<T> Datum<T>
+where
+ T: EncodedStringTrait,
+{
+ pub fn as_borrowed(&self) -> Datum<WithEncoding<ByteStr<'_>>> {
+ match self {
+ Datum::Number(number) => Datum::Number(*number),
+ Datum::String(string) => Datum::String(string.as_encoded_byte_str()),
+ }
+ }
+ pub fn cloned(&self) -> Datum<WithEncoding<ByteString>> {
+ match self {
+ Datum::Number(number) => Datum::Number(*number),
+ Datum::String(string) => Datum::String(string.cloned()),
+ }
}
}
}
}
-impl<'a, B> Display for Datum<B>
+impl<T> Display for Datum<T>
where
- B: Borrow<BorrowedEncodedString<'a>>,
+ T: Display,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Number(None) => write!(f, "SYSMIS"),
Self::Number(Some(number)) => number.display_plain().fmt(f),
- Self::String(string) => write!(f, "{}", string.borrow()),
+ Self::String(string) => string.fmt(f),
}
}
}
}
}
-impl<B, B2> PartialEq<Datum<B2>> for Datum<B>
+impl<T, R> PartialEq<Datum<R>> for Datum<T>
where
- B: Borrow<RawString<[u8]>>,
- B2: Borrow<RawString<[u8]>>,
+ T: PartialEq<R>,
{
- fn eq(&self, other: &Datum<B2>) -> bool {
+ fn eq(&self, other: &Datum<R>) -> bool {
match (self, other) {
- (Self::Number(Some(l0)), Datum::Number(Some(r0))) => {
- OrderedFloat(*l0) == OrderedFloat(*r0)
+ (Self::Number(Some(n1)), Datum::Number(Some(n2))) => {
+ OrderedFloat(*n1) == OrderedFloat(*n2)
}
(Self::Number(None), Datum::Number(None)) => true,
- (Self::String(l0), Datum::String(r0)) => l0.borrow() == r0.borrow(),
+ (Self::String(s1), Datum::String(s2)) => s1 == s2,
_ => false,
}
}
}
-impl<B> Eq for Datum<B> where B: Borrow<RawString<[u8]>> {}
+impl<T> Eq for Datum<T> where T: Eq {}
-impl<B, B2> PartialOrd<Datum<B2>> for Datum<B>
+impl<T, R> PartialOrd<Datum<R>> for Datum<T>
where
- B: Borrow<RawString<[u8]>>,
- B2: Borrow<RawString<[u8]>>,
+ T: PartialOrd<R>,
{
- fn partial_cmp(&self, other: &Datum<B2>) -> Option<Ordering> {
- Some(match (self, other) {
- (Self::Number(a), Datum::Number(b)) => match (a, b) {
- (None, None) => Ordering::Equal,
- (None, Some(_)) => Ordering::Less,
- (Some(_), None) => Ordering::Greater,
- (Some(a), Some(b)) => a.total_cmp(b),
- },
- (Self::Number(_), Datum::String(_)) => Ordering::Less,
- (Self::String(_), Datum::Number(_)) => Ordering::Greater,
- (Self::String(a), Datum::String(b)) => a.borrow().cmp(b.borrow()),
- })
- }
-}
-
-impl<B> Ord for Datum<B>
+ fn partial_cmp(&self, other: &Datum<R>) -> Option<Ordering> {
+ match (self, other) {
+ (Self::Number(a), Datum::Number(b)) => {
+ a.map(OrderedFloat).partial_cmp(&b.map(OrderedFloat))
+ }
+ (Self::Number(_), Datum::String(_)) => Some(Ordering::Less),
+ (Self::String(_), Datum::Number(_)) => Some(Ordering::Greater),
+ (Self::String(a), Datum::String(b)) => a.partial_cmp(b),
+ }
+ }
+}
+
+impl<T> Ord for Datum<T>
where
- B: Borrow<RawString<[u8]>>,
+ T: Ord,
{
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
-impl<B> Hash for Datum<B>
+impl<T> Hash for Datum<T>
where
- B: Borrow<BorrowedRawString>,
+ T: Hash,
{
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self {
Self::Number(number) => number.map(OrderedFloat).hash(state),
- Self::String(string) => string.borrow().hash(state),
+ Self::String(string) => string.hash(state),
}
}
}
}
}
-impl<B> Datum<B>
+impl<T> Datum<T>
where
- B: Borrow<RawString<[u8]>>,
+ T: RawStringTrait,
{
/// Returns true if this datum can be resized to the given `width` without
/// loss, which is true only if this datum and `width` are both string or
pub fn is_resizable(&self, width: VarWidth) -> bool {
match (self, width) {
(Self::Number(_), VarWidth::Numeric) => true,
- (Self::String(s), VarWidth::String(new_width)) => {
- s.borrow().is_resizable(new_width as usize)
- }
+ (Self::String(s), VarWidth::String(new_width)) => s.is_resizable(new_width as usize),
_ => false,
}
}
pub fn width(&self) -> VarWidth {
match self {
Self::Number(_) => VarWidth::Numeric,
- Self::String(s) => VarWidth::String(s.borrow().len().try_into().unwrap()),
+ Self::String(s) => VarWidth::String(s.len().try_into().unwrap()),
}
}
/// Compares this datum and `other` for equality, ignoring trailing ASCII
/// spaces in either, if they are both strings, for the purpose of
/// comparison.
- pub fn eq_ignore_trailing_spaces<B2>(&self, other: &Datum<B2>) -> bool
+ pub fn eq_ignore_trailing_spaces<R>(&self, other: &Datum<R>) -> bool
where
- B2: Borrow<RawString<[u8]>>,
+ R: RawStringTrait,
{
match (self, other) {
- (Self::String(a), Datum::String(b)) => a.borrow().eq_ignore_trailing_spaces(b.borrow()),
- _ => self == other,
- }
- }
-
- pub fn as_encoded<'a>(
- &'a self,
- encoding: &'static Encoding,
- ) -> Datum<BorrowedEncodedString<'a>> {
- match self {
- Datum::Number(number) => Datum::Number(*number),
- Datum::String(raw_string) => Datum::String(EncodedString {
- raw: raw_string.borrow(),
- encoding,
- }),
+ (Self::String(a), Datum::String(b)) => a.eq_ignore_trailing_spaces(b),
+ (Self::Number(a), Datum::Number(b)) => a == b,
+ _ => false,
}
}
-}
-impl Datum<OwnedRawString> {
- pub fn borrowed(&self) -> BorrowedDatum {
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> Datum<WithEncoding<ByteStr<'_>>> {
match self {
Datum::Number(number) => Datum::Number(*number),
- Datum::String(string) => Datum::String(Borrow::borrow(string)),
+ Datum::String(raw_string) => Datum::String(raw_string.as_encoded(encoding)),
}
}
-}
-
-impl<'a> Datum<&'a BorrowedRawString> {
- pub fn borrowed(&self) -> BorrowedDatum {
- self.clone()
- }
-}
-impl Datum<OwnedEncodedString> {
- pub fn borrowed<'a>(&'a self) -> Datum<BorrowedEncodedString<'a>> {
+ pub fn with_encoding(self, encoding: &'static Encoding) -> Datum<WithEncoding<T>> {
match self {
- Datum::Number(number) => Datum::Number(*number),
- Datum::String(string) => Datum::String(string.borrowed()),
- }
- }
-}
-
-impl<'a> Datum<BorrowedEncodedString<'a>> {
- pub fn borrowed(&self) -> Datum<BorrowedEncodedString<'a>> {
- self.clone()
- }
-}
-
-impl<D> Datum<D>
-where
- D: BorrowString,
-{
- pub fn borrowed_string<'a>(&'a self) -> Datum<D::Borrowed<'a>> {
- match self {
- Datum::Number(number) => Datum::Number(*number),
- Datum::String(string) => Datum::String(string.borrow_string()),
+ Datum::Number(number) => Datum::Number(number),
+ Datum::String(string) => Datum::String(string.with_encoding(encoding)),
}
}
}
-pub trait BorrowString {
- type Borrowed<'a>
- where
- Self: 'a;
- fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a>;
-}
-
-impl BorrowString for OwnedRawString {
- type Borrowed<'a> = &'a BorrowedRawString;
- fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
- BorrowedRawString::new(&self.0)
- }
-}
-
-impl BorrowString for BorrowedRawString {
- type Borrowed<'a> = &'a BorrowedRawString;
- fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
- self
- }
-}
-
-impl BorrowString for OwnedEncodedString {
- type Borrowed<'a> = BorrowedEncodedString<'a>;
- fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
- BorrowedEncodedString::new(self.raw.borrowed(), self.encoding)
- }
-}
-
-impl<'b> BorrowString for BorrowedEncodedString<'b> {
- type Borrowed<'a>
- = BorrowedEncodedString<'b>
- where
- Self: 'a;
-
- fn borrow_string<'a>(&'a self) -> Self::Borrowed<'a> {
- self.clone()
- }
-}
-
-pub trait AsEncodedString: Borrow<BorrowedRawString> {
- fn as_encoded_string<'a>(&'a self) -> BorrowedEncodedString<'a>;
-}
-
-impl AsEncodedString for OwnedEncodedString {
- fn as_encoded_string<'a>(&'a self) -> BorrowedEncodedString<'a> {
- self.borrowed()
- }
-}
-
-impl<'b> AsEncodedString for BorrowedEncodedString<'b> {
- fn as_encoded_string<'a>(&'a self) -> BorrowedEncodedString<'a> {
- self.clone()
- }
-}
-
impl<B> Datum<B>
where
- B: AsEncodedString,
+ B: EncodedStringTrait,
{
pub fn quoted<'a>(&'a self) -> QuotedDatum<'a, B> {
QuotedDatum(self)
impl<'a, B> Display for QuotedDatum<'a, B>
where
- B: AsEncodedString,
+ B: Display,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match &self.0 {
Datum::Number(None) => write!(f, "SYSMIS"),
Datum::Number(Some(number)) => number.display_plain().fmt(f),
- Datum::String(string) => write!(f, "\"{}\"", string.as_encoded_string().as_str()),
+ Datum::String(string) => write!(f, "\"{string}\""),
}
}
}
TooWide,
}
-impl<B> Datum<B>
-where
- B: BorrowMut<OwnedRawString>,
-{
+impl<T> Datum<T> {
/// Returns the string inside this datum as a mutable borrow, or `None` if
/// this is a numeric datum.
- pub fn as_string_mut(&mut self) -> Option<&mut OwnedRawString> {
+ pub fn as_string_mut(&mut self) -> Option<&mut T> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(s.borrow_mut()),
}
/// Removes trailing ASCII spaces from this datum, if it is a string.
- pub fn trim_end(&mut self) {
+ pub fn trim_end(&mut self)
+ where
+ T: MutRawString,
+ {
self.as_string_mut().map(|s| s.trim_end());
}
/// Resizes this datum to the given `width`. Returns an error, without
/// modifying the datum, if [is_resizable](Self::is_resizable) would return
/// false.
- pub fn resize(&mut self, width: VarWidth) -> Result<(), ResizeError> {
+ pub fn resize(&mut self, width: VarWidth) -> Result<(), ResizeError>
+ where
+ T: MutRawString,
+ {
match (self, width) {
(Self::Number(_), VarWidth::Numeric) => Ok(()),
- (Self::String(s), VarWidth::String(new_width)) => {
- let s = s.borrow_mut();
- if s.is_resizable(new_width as usize) {
- s.resize(new_width as usize);
- Ok(())
- } else {
- Err(ResizeError::TooWide)
- }
- }
+ (Self::String(s), VarWidth::String(new_width)) => s.resize(new_width as usize),
_ => Err(ResizeError::MixedTypes),
}
}
}
-impl Datum<OwnedRawString> {
- pub fn with_encoding(self, encoding: &'static Encoding) -> Datum<OwnedEncodedString> {
- match self {
- Datum::Number(number) => Datum::Number(number),
- Datum::String(raw_string) => Datum::String(raw_string.with_encoding(encoding)),
- }
- }
-}
-
-impl<B> From<f64> for Datum<B>
-where
- B: Borrow<BorrowedRawString>,
-{
+impl<B> From<f64> for Datum<B> {
fn from(number: f64) -> Self {
Some(number).into()
}
}
-impl<B> From<Option<f64>> for Datum<B>
-where
- B: Borrow<BorrowedRawString>,
-{
+impl<B> From<Option<f64>> for Datum<B> {
fn from(value: Option<f64>) -> Self {
Self::Number(value)
}
}
-impl<B> From<&str> for Datum<B>
-where
- B: Borrow<BorrowedRawString> + for<'a> From<&'a [u8]>,
-{
- fn from(value: &str) -> Self {
- value.as_bytes().into()
+impl<'a> From<&'a str> for Datum<ByteStr<'a>> {
+ fn from(value: &'a str) -> Self {
+ Datum::String(ByteStr(value.as_bytes()))
}
}
-impl<B> From<&[u8]> for Datum<B>
-where
- B: Borrow<BorrowedRawString> + for<'a> From<&'a [u8]>,
-{
- fn from(value: &[u8]) -> Self {
- Self::String(value.into())
+impl<'a> From<&'a [u8]> for Datum<ByteStr<'a>> {
+ fn from(value: &'a [u8]) -> Self {
+ Self::String(ByteStr(value))
}
}
/// order.
///
/// [Dictionary]: crate::dictionary::Dictionary
- pub Vec<Datum<OwnedRawString>>,
+ pub Vec<Datum<ByteString>>,
);
impl RawCase {
- pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum<OwnedRawString>]> {
+ pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum<ByteString>]> {
Case {
encoding,
data: &self.0,
}
}
- pub fn with_encoding(self, encoding: &'static Encoding) -> Case<Vec<Datum<OwnedRawString>>> {
+ pub fn with_encoding(self, encoding: &'static Encoding) -> Case<Vec<Datum<ByteString>>> {
Case {
encoding,
data: self.0,
pub struct Case<B>
where
- B: Borrow<[Datum<OwnedRawString>]>,
+ B: Borrow<[Datum<ByteString>]>,
{
encoding: &'static Encoding,
data: B,
impl<B> Case<B>
where
- B: Borrow<[Datum<OwnedRawString>]>,
+ B: Borrow<[Datum<ByteString>]>,
{
pub fn len(&self) -> usize {
self.data.borrow().len()
impl<B> Serialize for Case<B>
where
- B: Borrow<[Datum<OwnedRawString>]>,
+ B: Borrow<[Datum<ByteString>]>,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
pub struct CaseIter<'a> {
encoding: &'static Encoding,
- iter: std::slice::Iter<'a, Datum<OwnedRawString>>,
+ iter: std::slice::Iter<'a, Datum<ByteString>>,
}
impl<'a> Iterator for CaseIter<'a> {
- type Item = Datum<BorrowedEncodedString<'a>>;
+ type Item = Datum<WithEncoding<ByteStr<'a>>>;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|d| d.as_encoded(self.encoding))
impl<'a, B> IntoIterator for &'a Case<B>
where
- B: Borrow<[Datum<OwnedRawString>]>,
+ B: Borrow<[Datum<ByteString>]>,
{
- type Item = Datum<BorrowedEncodedString<'a>>;
+ type Item = Datum<WithEncoding<ByteStr<'a>>>;
type IntoIter = CaseIter<'a>;
}
}
-impl IntoIterator for Case<Vec<Datum<OwnedRawString>>> {
- type Item = Datum<OwnedEncodedString>;
+impl IntoIterator for Case<Vec<Datum<ByteString>>> {
+ type Item = Datum<WithEncoding<ByteString>>;
type IntoIter = CaseIntoIter;
pub struct CaseIntoIter {
encoding: &'static Encoding,
- iter: std::vec::IntoIter<Datum<OwnedRawString>>,
+ iter: std::vec::IntoIter<Datum<ByteString>>,
}
impl Iterator for CaseIntoIter {
- type Item = Datum<OwnedEncodedString>;
+ type Item = Datum<WithEncoding<ByteString>>;
fn next(&mut self) -> Option<Self::Item> {
self.iter
use std::{
- borrow::{Borrow, BorrowMut, Cow},
+ borrow::Cow,
cmp::Ordering,
fmt::{Debug, Display},
+ hash::Hash,
};
use encoding_rs::{Encoding, UTF_8};
use serde::Serialize;
-use crate::data::{BorrowedRawString, OwnedRawString, Quoted, RawString, RawStringTrait};
+use crate::data::{
+ ByteCow, ByteStr, ByteString, MutRawString, Quoted, RawStringTrait, ResizeError,
+};
pub trait Encoded {
fn encoding(&self) -> &'static Encoding;
}
-impl Encoded for str {
+impl Encoded for &'_ str {
fn encoding(&self) -> &'static Encoding {
UTF_8
}
}
}
+impl Encoded for &'_ String {
+ fn encoding(&self) -> &'static Encoding {
+ UTF_8
+ }
+}
+
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct WithEncoding<T> {
- pub inner: T,
pub encoding: &'static Encoding,
+ pub inner: T,
}
impl<T> WithEncoding<T> {
}
}
+impl<'a> WithEncoding<ByteCow<'a>> {
+ pub fn into_owned(self) -> WithEncoding<ByteString> {
+ WithEncoding::new(self.inner.into_owned(), self.encoding)
+ }
+}
+
impl<T> PartialOrd for WithEncoding<T>
where
T: PartialOrd,
}
}
+impl<T> Serialize for WithEncoding<T>
+where
+ WithEncoding<T>: EncodedStringTrait,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ self.as_str().serialize(serializer)
+ }
+}
+
pub trait EncodedStringTrait: Encoded + RawStringTrait + Display + Debug {
fn as_str(&self) -> Cow<'_, str>;
- fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]>;
+ fn into_string(self) -> String
+ where
+ Self: Sized,
+ {
+ self.as_str().into_owned()
+ }
+ fn to_encoding(&self, encoding: &'static Encoding) -> WithEncoding<ByteCow<'_>>;
+ fn as_encoded_byte_str(&self) -> WithEncoding<ByteStr<'_>> {
+ WithEncoding::new(ByteStr(self.raw_string_bytes()), self.encoding())
+ }
+ fn cloned(&self) -> WithEncoding<ByteString> {
+ WithEncoding::new(ByteString::from(self.raw_string_bytes()), self.encoding())
+ }
+ fn quoted(&self) -> Quoted<&Self>
+ where
+ Self: Sized,
+ {
+ Quoted(self)
+ }
}
-impl<'a> EncodedStringTrait for str {
+impl<'a> EncodedStringTrait for &'a str {
fn as_str(&self) -> Cow<'_, str> {
- Cow::from(self)
+ Cow::from(*self)
}
- fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
- encoding.encode(self).0
+ fn to_encoding(&self, encoding: &'static Encoding) -> WithEncoding<ByteCow<'_>> {
+ WithEncoding::new(ByteCow(encoding.encode(self).0), encoding)
+ }
+}
+
+impl EncodedStringTrait for String {
+ fn as_str(&self) -> Cow<'_, str> {
+ Cow::from(String::as_str(&self))
+ }
+
+ fn to_encoding(&self, encoding: &'static Encoding) -> WithEncoding<ByteCow<'_>> {
+ WithEncoding::new(ByteCow(encoding.encode(&self).0), encoding)
+ }
+}
+
+impl EncodedStringTrait for &'_ String {
+ fn as_str(&self) -> Cow<'_, str> {
+ Cow::from(String::as_str(&self))
+ }
+
+ fn to_encoding(&self, encoding: &'static Encoding) -> WithEncoding<ByteCow<'_>> {
+ WithEncoding::new(ByteCow(encoding.encode(String::as_str(&self)).0), encoding)
}
}
}
}
+impl<T> MutRawString for WithEncoding<T>
+where
+ T: MutRawString,
+{
+ fn resize(&mut self, new_len: usize) -> Result<(), ResizeError> {
+ self.inner.resize(new_len)
+ }
+
+ fn trim_end(&mut self) {
+ self.inner.trim_end();
+ }
+}
+
impl<T> EncodedStringTrait for WithEncoding<T>
where
T: RawStringTrait,
/// replaced by [REPLACEMENT_CHARACTER].
///
/// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
+ fn to_encoding(&self, encoding: &'static Encoding) -> WithEncoding<ByteCow<'_>> {
let utf8 = self.as_str();
- match encoding.encode(&utf8).0 {
+ let inner = match encoding.encode(&utf8).0 {
Cow::Borrowed(_) => {
// Recoding into UTF-8 and then back did not change anything.
Cow::from(self.raw_string_bytes())
}
Cow::Owned(owned) => Cow::Owned(owned),
+ };
+ WithEncoding {
+ encoding,
+ inner: ByteCow(inner),
}
}
}
}
}
-pub type OwnedEncodedString = EncodedString<OwnedRawString>;
-pub type BorrowedEncodedString<'a> = EncodedString<&'a BorrowedRawString>;
-
-/// An owned string and its [Encoding].
-///
-/// The string is not guaranteed to be valid in the encoding.
-#[derive(Copy, Clone, Debug)]
-pub struct EncodedString<R> {
- /// The bytes of the string.
- pub raw: R,
-
- /// The string's encoding.
- pub encoding: &'static Encoding,
-}
-
-impl<R> Encoded for EncodedString<R> {
- fn encoding(&self) -> &'static Encoding {
- self.encoding
- }
-}
-
-impl<R> EncodedString<R>
-where
- R: Borrow<BorrowedRawString>,
-{
- pub fn new(raw: R, encoding: &'static Encoding) -> Self {
- Self { raw, encoding }
- }
-
- pub fn into_raw(self) -> R {
- self.raw
- }
-
- pub fn len(&self) -> usize {
- self.raw.borrow().len()
- }
-
- /// Returns this string recoded in UTF-8. Invalid characters will be
- /// replaced by [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn as_str(&self) -> Cow<'_, str> {
- self.encoding.decode_without_bom_handling(self.as_bytes()).0
- }
-
- /// Returns this string recoded in `encoding`. Invalid characters will be
- /// replaced by [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
- let utf8 = self.as_str();
- match encoding.encode(&utf8).0 {
- Cow::Borrowed(_) => {
- // Recoding into UTF-8 and then back did not change anything.
- Cow::from(self.as_bytes())
- }
- Cow::Owned(owned) => Cow::Owned(owned),
- }
- }
-
- /// Returns the bytes in the string, in its encoding.
- pub fn as_bytes(&self) -> &[u8] {
- &self.raw.borrow().0
- }
-
- /// Compares this string and `other` for equality, ignoring trailing ASCII
- /// spaces in either string for the purpose of comparison. (This is
- /// acceptable because we assume that the encoding is ASCII-compatible.)
- pub fn eq_ignore_trailing_spaces<R2>(&self, other: &EncodedString<R2>) -> bool
- where
- R2: Borrow<BorrowedRawString>,
- {
- self.borrowed()
- .raw
- .eq_ignore_trailing_spaces(&other.borrowed().raw)
- }
-
- /// Returns the string's [Encoding].
- pub fn encoding(&self) -> &'static Encoding {
- self.encoding
- }
-
- /// Returns a borrowed form of this string.
- pub fn borrowed<'a>(&'a self) -> EncodedString<&'a BorrowedRawString> {
- EncodedString {
- encoding: self.encoding,
- raw: self.raw.borrow(),
- }
- }
-
- /// Returns true if this string is empty.
- pub fn is_empty(&self) -> bool {
- self.raw.borrow().is_empty()
- }
-
- /// Returns a helper for displaying this string in double quotes.
- pub fn quoted(&self) -> impl Display {
- Quoted(self.as_str())
- }
-}
-
-impl<R> Borrow<BorrowedRawString> for EncodedString<R>
-where
- R: Borrow<BorrowedRawString>,
-{
- fn borrow(&self) -> &BorrowedRawString {
- self.raw.borrow()
- }
-}
-
-impl Borrow<OwnedRawString> for OwnedEncodedString {
- fn borrow(&self) -> &OwnedRawString {
- &self.raw
- }
-}
-
-impl BorrowMut<OwnedRawString> for OwnedEncodedString {
- fn borrow_mut(&mut self) -> &mut OwnedRawString {
- &mut self.raw
- }
-}
-
-impl OwnedEncodedString {
- pub fn resize(&mut self, new_len: usize) -> Result<(), ()> {
- match new_len.cmp(&self.len()) {
- Ordering::Less => {
- if !self.as_bytes()[new_len..].iter().all(|b| *b == b' ') {
- return Err(());
- }
- self.raw.0.truncate(new_len);
- }
- Ordering::Equal => (),
- Ordering::Greater => self.raw.0.extend((self.len()..new_len).map(|_| b' ')),
- }
- Ok(())
- }
-
- /// Removes any trailing ASCII spaces.
- pub fn trim_end(&mut self) {
- while self.raw.0.pop_if(|c| *c == b' ').is_some() {}
- }
-}
-
-impl<'a> From<BorrowedEncodedString<'a>> for OwnedEncodedString {
- fn from(value: BorrowedEncodedString<'a>) -> Self {
- Self {
- raw: value.raw.into(),
- encoding: value.encoding,
- }
- }
-}
-
-impl From<&str> for OwnedEncodedString {
- fn from(value: &str) -> Self {
- Self {
- raw: RawString(value.into()),
- encoding: UTF_8,
- }
- }
-}
-
-impl<'a> From<&'a str> for BorrowedEncodedString<'a> {
- fn from(value: &'a str) -> Self {
- Self {
- raw: BorrowedRawString::new(value.as_bytes()),
- encoding: UTF_8,
- }
- }
-}
-
-impl<'a> From<&'a String> for BorrowedEncodedString<'a> {
- fn from(value: &'a String) -> Self {
- value.as_str().into()
- }
-}
-
-impl<R> Serialize for EncodedString<R>
-where
- R: Borrow<BorrowedRawString>,
-{
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- self.as_str().serialize(serializer)
- }
-}
-
-impl<R> Display for EncodedString<R>
-where
- R: Borrow<BorrowedRawString>,
-{
- fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- write!(f, "{}", self.as_str())
- }
-}
-
-impl<R, R2> PartialEq<EncodedString<R2>> for EncodedString<R>
+impl<T> Hash for WithEncoding<T>
where
- R: Borrow<BorrowedRawString>,
- R2: Borrow<BorrowedRawString>,
+ T: Hash,
{
- fn eq(&self, other: &EncodedString<R2>) -> bool {
- // XXX should this consider the encodings?
- self.borrowed().raw.eq(other.borrowed().raw)
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.inner.hash(state);
}
}
use unicase::UniCase;
use crate::{
- data::{AsEncodedString, Datum, OwnedEncodedString, OwnedRawString, ResizeError},
+ data::{ByteString, Datum, EncodedStringTrait, ResizeError, WithEncoding},
format::{DisplayPlain, Format},
identifier::{ByIdentifier, HasIdentifier, Identifier},
output::pivot::{
/// one value (the "counted value") means that the box was checked, and any
/// other value means that it was not.
MultipleDichotomy {
- datum: Datum<OwnedRawString>,
+ datum: Datum<ByteString>,
labels: CategoryLabels,
},
}
#[derive(Clone, Default, PartialEq, Eq, Serialize)]
-pub struct ValueLabels(pub HashMap<Datum<OwnedRawString>, String>);
+pub struct ValueLabels(pub HashMap<Datum<ByteString>, String>);
impl ValueLabels {
pub fn new() -> Self {
self.0.is_empty()
}
- pub fn get(&self, datum: &Datum<OwnedRawString>) -> Option<&str> {
+ pub fn get(&self, datum: &Datum<ByteString>) -> Option<&str> {
self.0.get(datum).map(|s| s.as_str())
}
- pub fn insert(&mut self, datum: Datum<OwnedRawString>, label: String) -> Option<String> {
+ pub fn insert(&mut self, datum: Datum<ByteString>, label: String) -> Option<String> {
self.0.insert(datum, label)
}
pub fn add_value(
&mut self,
- mut value: Datum<OwnedEncodedString>,
+ mut value: Datum<WithEncoding<ByteString>>,
) -> Result<(), MissingValuesError> {
if self.inner.values.len() > 2
|| (self.inner.range().is_some() && self.inner.values.len() > 1)
pub fn add_values(
&mut self,
- values: impl IntoIterator<Item = Datum<OwnedEncodedString>>,
+ values: impl IntoIterator<Item = Datum<WithEncoding<ByteString>>>,
) -> Result<(), MissingValuesError> {
let n = self.inner.values.len();
for value in values {
#[derive(Clone, Default, Serialize)]
pub struct MissingValues {
/// Individual missing values, up to 3 of them.
- values: Vec<Datum<OwnedEncodedString>>,
+ values: Vec<Datum<WithEncoding<ByteString>>>,
/// Optional range of missing values.
range: Option<MissingValueRange>,
pub fn clear(&mut self) {
*self = Self::default();
}
- pub fn values(&self) -> &[Datum<OwnedEncodedString>] {
+ pub fn values(&self) -> &[Datum<WithEncoding<ByteString>>] {
&self.values
}
}
pub fn new(
- mut values: Vec<Datum<OwnedEncodedString>>,
+ mut values: Vec<Datum<WithEncoding<ByteString>>>,
range: Option<MissingValueRange>,
) -> Result<Self, MissingValuesError> {
if values.len() > 3 {
pub fn contains<S>(&self, value: &Datum<S>) -> bool
where
- S: AsEncodedString,
+ S: EncodedStringTrait,
{
if self
.values
.iter()
- .any(|datum| datum.eq_ignore_trailing_spaces(&value))
+ .any(|datum| datum.eq_ignore_trailing_spaces(value))
{
return true;
}
use crate::{
calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name},
- data::{AsEncodedString, BorrowString, Datum, QuotedDatum},
+ data::{ByteStr, Datum, EncodedStringTrait, QuotedDatum, WithEncoding},
endian::{endian_to_smallvec, ToBytes},
format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
impl<'a, D> Datum<D>
where
- D: AsEncodedString + BorrowString,
+ D: EncodedStringTrait,
{
/// Returns an object that implements [Display] for printing this
/// [EncodedDatum] as `format`.
///
/// [Display]: std::fmt::Display
- pub fn display(&'a self, format: Format) -> DisplayDatum<'a, D::Borrowed<'a>>
- where
- D::Borrowed<'a>: AsEncodedString,
- {
- DisplayDatum::new(format, self.borrowed_string())
+ pub fn display(&'a self, format: Format) -> DisplayDatum<'a, WithEncoding<ByteStr<'a>>> {
+ DisplayDatum::new(format, self.as_borrowed())
}
pub fn display_plain(&self) -> QuotedDatum<'_, D> {
impl<'a, 'b, B> Display for DisplayDatum<'b, B>
where
- B: AsEncodedString,
+ B: EncodedStringTrait,
{
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
let number = match &self.datum {
Datum::Number(number) => *number,
Datum::String(string) => {
if self.format.type_() == Type::AHex {
- for byte in string.as_encoded_string().as_bytes() {
+ for byte in string.raw_string_bytes() {
write!(f, "{byte:02x}")?;
}
} else {
let quote = if self.quote_strings { "\"" } else { "" };
- let s = string.as_encoded_string();
- let s = s.as_str();
+ let s = string.as_str();
let s = if self.trim_spaces {
s.trim_end_matches(' ')
} else {
impl<'b, B> DisplayDatum<'b, B>
where
- B: AsEncodedString,
+ B: EncodedStringTrait,
{
pub fn new(format: Format, datum: Datum<B>) -> Self {
let settings = PsppSettings::global();
use smallvec::SmallVec;
use crate::{
- data::{Datum, OwnedEncodedString},
+ data::{ByteString, Datum, WithEncoding},
endian::Endian,
format::{AbstractFormat, Epoch, Format, Settings, Type, UncheckedFormat, CC},
lex::{scan::StringScanner, segment::Syntax, Punct, Token},
let format: Format = format.try_into().unwrap();
assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon)));
let expected = tokens[2].as_string().unwrap();
- let actual = Datum::<OwnedEncodedString>::Number(value)
+ let actual = Datum::<WithEncoding<ByteString>>::Number(value)
.display(format)
.with_settings(&settings)
.with_endian(endian)
}
fn test_with_settings(value: f64, expected: [&str; 2], settings: &Settings) {
- let value = Datum::<OwnedEncodedString>::from(value);
+ let value = Datum::<WithEncoding<ByteString>>::from(value);
for (expected, d) in expected.into_iter().zip([2, 1].into_iter()) {
assert_eq!(
&value
fn non_ascii_cc() {
fn test(settings: &Settings, value: f64, expected: &str) {
assert_eq!(
- &Datum::<OwnedEncodedString>::from(value)
+ &Datum::<WithEncoding<ByteString>>::from(value)
.display(Format::new(Type::CC(CC::A), 10, 2).unwrap())
.with_settings(settings)
.to_string(),
assert_eq!(tokens.get(1), Some(&Token::Punct(Punct::Colon)));
let expected = tokens[2].as_string().unwrap();
let mut actual = SmallVec::<[u8; 16]>::new();
- Datum::<OwnedEncodedString>::Number(value)
+ Datum::<WithEncoding<ByteString>>::Number(value)
.display(format)
.with_endian(endian)
.write(&mut actual, UTF_8)
.zip(1..)
{
let formatted = parser
- .parse(&input)
+ .parse(input)
.unwrap()
.with_encoding(UTF_8)
.display(format)
use unicode_width::UnicodeWidthStr;
use crate::{
- data::Datum,
- data::OwnedRawString,
+ data::{ByteString, Datum, },
dictionary::{VarType, VarWidth},
sys::raw,
};
}
}
- pub fn default_value(&self) -> Datum<OwnedRawString> {
+ pub fn default_value(&self) -> Datum<ByteString> {
match self.var_type() {
VarType::Numeric => Datum::sysmis(),
- VarType::String => Datum::String(OwnedRawString::default()),
+ VarType::String => Datum::String(ByteString::default()),
}
}
}
Ok(self)
}
- pub fn default_value(&self) -> Datum<OwnedRawString> {
+ pub fn default_value(&self) -> Datum<ByteString> {
match self.var_width() {
VarWidth::Numeric => Datum::sysmis(),
- VarWidth::String(width) => Datum::String(OwnedRawString::spaces(width as usize)),
+ VarWidth::String(width) => Datum::String(ByteString::spaces(width as usize)),
}
}
use crate::{
calendar::{calendar_gregorian_to_offset, DateError},
- data::{BorrowedEncodedString, Datum, OwnedDatum, OwnedEncodedString},
+ data::{ByteString, Datum, EncodedStringTrait, OwnedDatum, RawStringTrait, WithEncoding},
endian::{Endian, Parse},
format::{DateTemplate, Decimals, Settings, TemplateItem, Type},
settings::{EndianSettings, Settings as PsppSettings},
#[derive(Clone, Debug)]
pub struct ParseError {
type_: Type,
- input: OwnedEncodedString,
+ input: WithEncoding<ByteString>,
kind: ParseErrorKind,
}
write!(
f,
"{} cannot be parsed as {}: {}",
- self.input.borrowed().quoted(),
+ self.input.quoted(),
&self.type_,
&self.kind
)
/// input into UTF-8, but this will screw up parsing of binary formats,
/// because recoding bytes from (e.g.) windows-1252 into UTF-8, and then
/// interpreting them as a binary number yields nonsense.
- pub fn parse<'b, T>(&self, input: T) -> Result<OwnedDatum, ParseError>
- where
- T: Into<BorrowedEncodedString<'b>>,
- {
- let input: BorrowedEncodedString = input.into();
+ pub fn parse(&self, input: impl EncodedStringTrait) -> Result<OwnedDatum, ParseError> {
if input.is_empty() {
- return Ok(self.type_.default_value());
+ return Ok(self
+ .type_
+ .default_value()
+ .with_encoding(self.output_encoding));
}
match self.type_ {
Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => {
| Type::DTime => self.parse_date(&input.as_str()),
Type::WkDay => self.parse_wkday(&input.as_str()),
Type::Month => self.parse_month(&input.as_str()),
- Type::P => self.parse_p(input.as_bytes()),
- Type::PK => self.parse_pk(input.as_bytes()),
- Type::IB => self.parse_ib(input.as_bytes()),
- Type::PIB => self.parse_pib(input.as_bytes()),
- Type::RB => self.parse_rb(input.as_bytes()),
+ Type::P => self.parse_p(input.raw_string_bytes()),
+ Type::PK => self.parse_pk(input.raw_string_bytes()),
+ Type::IB => self.parse_ib(input.raw_string_bytes()),
+ Type::PIB => self.parse_pib(input.raw_string_bytes()),
+ Type::RB => self.parse_rb(input.raw_string_bytes()),
Type::A => Ok(Datum::String(
- input.to_encoding(self.output_encoding).into(),
+ input.to_encoding(self.output_encoding).into_owned(),
)),
Type::AHex => self.parse_ahex(&input.as_str()),
}
.map_err(|kind| ParseError {
type_: self.type_,
- input: input.into(),
+ input: input.cloned(),
kind,
})
}
};
result.push((hi * 16 + lo) as u8);
}
- Ok(Datum::String(result.into()))
+ Ok(Datum::String(
+ ByteString(result).with_encoding(self.output_encoding),
+ ))
}
fn parse_hex(&self, input: &str) -> Result<Option<u64>, ParseErrorKind> {
use crate::{
calendar::{days_in_month, is_leap_year},
- data::{BorrowedRawString, Datum, EncodedString, OwnedDatum},
+ data::{ByteStr, Datum, EncodedStringTrait, OwnedDatum, RawStringTrait},
endian::Endian,
format::{
parse::{ParseError, ParseErrorKind, Sign},
let result = type_.parser(UTF_8).parse(&input);
let error = result.clone().err();
let value = result
- .unwrap_or(type_.default_value())
- .with_encoding(UTF_8)
+ .unwrap_or(type_.default_value().with_encoding(UTF_8))
.display(Format::new(Type::F, 10, 4).unwrap())
.to_string();
if value != expected {
let parsed = Type::RB
.parser(UTF_8)
.with_endian(EndianSettings::new(Endian::Big))
- .parse(EncodedString::new(BorrowedRawString::new(&raw[..]), UTF_8))
+ .parse(ByteStr(raw.as_slice()).with_encoding(UTF_8))
.unwrap()
.as_number()
.unwrap()
.unwrap()
.as_string()
.unwrap()
- .as_encoded(UTF_8)
.as_str(),
"abcdefgh"
);
use tlo::parse_tlo;
use crate::{
- data::{AsEncodedString, Datum, OwnedEncodedString, OwnedRawString},
+ data::{ByteString, Datum, EncodedStringTrait, RawStringTrait},
dictionary::{VarType, Variable},
format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
settings::{Settings, Show},
}
pub fn new_datum<B>(value: &Datum<B>) -> Self
where
- B: AsEncodedString,
+ B: EncodedStringTrait,
{
match value {
Datum::Number(number) => Self::new_number(*number),
- Datum::String(string) => Self::new_user_text(string.as_encoded_string().as_str()),
+ Datum::String(string) => Self::new_user_text(string.as_str()),
}
}
- pub fn new_variable_value(variable: &Variable, value: &Datum<OwnedRawString>) -> Self {
+ pub fn new_variable_value(variable: &Variable, value: &Datum<ByteString>) -> Self {
let var_name = Some(variable.name.as_str().into());
let value_label = variable.value_labels.get(value).map(String::from);
match value {
Datum::String(string) => Self::new(ValueInner::String(StringValue {
show: None,
hex: variable.print_format.type_() == Type::AHex,
- s: string.as_encoded(variable.encoding()).as_str().into_owned(),
+ s: string
+ .as_ref()
+ .with_encoding(variable.encoding())
+ .into_string(),
var_name,
value_label,
})),
write!(
&mut buf,
"{}",
- Datum::<OwnedEncodedString>::Number(*value).display(format)
+ Datum::<&str>::Number(*value).display(format)
)
.unwrap();
write!(f, "{}", buf.trim_start_matches(' '))?;
use crate::{
calendar::date_time_to_pspp,
crypto::EncryptedFile,
- data::{Case, Datum, OwnedRawString},
+ data::{ByteString, Case, Datum, MutRawString, RawStringTrait},
dictionary::{
DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues,
MissingValuesError, MultipleResponseType, VarWidth, Variable,
variable
.missing_values_mut()
- .replace(input.missing_values.decode(encoding).unwrap());
+ .replace(input.missing_values.decode(encoding).unwrap())
+ .unwrap();
variable.print_format = decode_format(
input.print_format,
.missing_values
.into_iter()
.map(|v| {
- let mut value = OwnedRawString::from(v.0.as_slice());
- value.resize(variable.width.as_string_width().unwrap());
+ let mut value = ByteString::from(v.0.as_slice());
+ let _ = value.resize(variable.width.as_string_width().unwrap()); // XXX check error
Datum::String(value.with_encoding(encoding))
})
.collect::<Vec<_>>();
}
impl Iterator for Cases {
- type Item = Result<Case<Vec<Datum<OwnedRawString>>>, raw::Error>;
+ type Item = Result<Case<Vec<Datum<ByteString>>>, raw::Error>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
//! raw details. Most readers will want to use higher-level interfaces.
use crate::{
- data::{BorrowedRawString, Datum, OwnedRawString, RawCase, },
+ data::{ByteStr, ByteString, Datum, RawCase},
dictionary::{VarType, VarWidth},
endian::{Endian, Parse, ToBytes},
identifier::{Error as IdError, Identifier},
/// one variable record per 8-byte segment.
Variable(
/// The record.
- VariableRecord<OwnedRawString>,
+ VariableRecord<ByteString>,
),
/// Value labels for numeric and short string variables.
/// These appear after the variable records.
ValueLabel(
/// The record.
- ValueLabelRecord<RawDatum, OwnedRawString>,
+ ValueLabelRecord<RawDatum, ByteString>,
),
/// Document record.
/// Multiple response variable record.
MultipleResponse(
/// The record.
- MultipleResponseRecord<OwnedRawString, OwnedRawString>,
+ MultipleResponseRecord<ByteString, ByteString>,
),
/// Value labels for long string variables.
LongStringValueLabels(
/// The record.
- LongStringValueLabelRecord<OwnedRawString, OwnedRawString>,
+ LongStringValueLabelRecord<ByteString, ByteString>,
),
/// Missing values for long string variables.
/// variable records.
LongStringMissingValues(
/// The record.
- LongStringMissingValueRecord<OwnedRawString>,
+ LongStringMissingValueRecord<ByteString>,
),
/// Encoding record.
output
}
- fn decode<'a>(&mut self, input: &'a OwnedRawString) -> Cow<'a, str> {
+ fn decode<'a>(&mut self, input: &'a ByteString) -> Cow<'a, str> {
self.decode_slice(input.0.as_slice())
}
/// Decodes `input` to an [Identifier] using our encoding.
- pub fn decode_identifier(&mut self, input: &OwnedRawString) -> Result<Identifier, IdError> {
+ pub fn decode_identifier(&mut self, input: &ByteString) -> Result<Identifier, IdError> {
let decoded = &self.decode(input);
self.new_identifier(decoded)
}
match self {
RawDatum::Number(Some(number)) => write!(f, "{number:?}"),
RawDatum::Number(None) => write!(f, "SYSMIS"),
- RawDatum::String(s) => write!(f, "{:?}", BorrowedRawString::new(s)),
+ RawDatum::String(s) => write!(f, "{:?}", ByteStr(s)),
}
}
}
{
match self {
RawDatum::Number(number) => number.serialize(serializer),
- RawDatum::String(s) => BorrowedRawString::new(s).serialize(serializer),
+ RawDatum::String(s) => ByteStr(s).serialize(serializer),
}
}
}
/// Decodes a `RawDatum` into a [Datum] given that we now know the string
/// width.
- pub fn decode(&self, width: VarWidth) -> Datum<OwnedRawString> {
+ pub fn decode(&self, width: VarWidth) -> Datum<ByteString> {
match self {
Self::Number(x) => Datum::Number(*x),
Self::String(s) => {
let width = width.as_string_width().unwrap();
- Datum::String(OwnedRawString::from(&s[..width]))
+ Datum::String(ByteString::from(&s[..width]))
}
}
}
}
-impl Datum<OwnedRawString> {
+impl Datum<ByteString> {
fn read_case<R: Read + Seek>(
reader: &mut R,
case_vars: &[CaseVar],
reader: Option<R>,
warn: Box<dyn FnMut(Warning) + 'a>,
- header: FileHeader<OwnedRawString>,
+ header: FileHeader<ByteString>,
var_types: VarTypes,
state: ReaderState,
}
/// Returns the header in this reader.
- pub fn header(&self) -> &FileHeader<OwnedRawString> {
+ pub fn header(&self) -> &FileHeader<ByteString> {
&self.header
}
fn new<R>(
reader: R,
var_types: VarTypes,
- header: &FileHeader<OwnedRawString>,
+ header: &FileHeader<ByteString>,
ztrailer_offset: Option<u64>,
) -> Self
where
} else {
big
};
- write!(f, "{number}/{:?}", BorrowedRawString::new(&self.0))
+ write!(f, "{number}/{:?}", ByteStr(&self.0))
}
}
impl<const N: usize> Debug for RawStrArray<N> {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", BorrowedRawString::new(&self.0))
+ write!(f, "{:?}", ByteStr(&self.0))
}
}
where
S: serde::Serializer,
{
- BorrowedRawString::new(&self.0).serialize(serializer)
+ ByteStr(&self.0).serialize(serializer)
}
}
Ok(vec)
}
-fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<OwnedRawString, IoError> {
+fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<ByteString, IoError> {
let length: u32 = endian.parse(read_bytes(r)?);
Ok(read_vec(r, length as usize)?.into())
}
};
use crate::{
- data::{Datum, OwnedRawString, },
+ data::{ByteString, Datum},
dictionary::{
Alignment, Attributes, CategoryLabels, Measure, MissingValueRange, MissingValues,
MissingValuesError, VarType, VarWidth,
pub file_label: [u8; 64],
}
-impl FileHeader<OwnedRawString> {
+impl FileHeader<ByteString> {
/// Reads a header record from `r`, reporting any warnings via `warn`.
pub fn read<R>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error>
where
#[derive(Clone, Debug, Default, Serialize)]
pub struct RawMissingValues {
/// Individual missing values, up to 3 of them.
- pub values: Vec<Datum<OwnedRawString>>,
+ pub values: Vec<Datum<ByteString>>,
/// Optional range of missing values.
pub range: Option<MissingValueRange>,
}
impl RawMissingValues {
- pub fn new(values: Vec<Datum<OwnedRawString>>, range: Option<MissingValueRange>) -> Self {
+ pub fn new(values: Vec<Datum<ByteString>>, range: Option<MissingValueRange>) -> Self {
Self { values, range }
}
let width = width.min(8) as usize;
let values = values
.into_iter()
- .map(|value| Datum::String(OwnedRawString::from(&value[..width])))
+ .map(|value| Datum::String(ByteString::from(&value[..width])))
.collect();
return Ok(Self::new(values, None));
}
pub name: [u8; 8],
}
-impl VariableRecord<OwnedRawString> {
+impl VariableRecord<ByteString> {
/// Reads a variable record from `r`.
pub fn read<R>(
r: &mut R,
pub const MAX_INDEXES: u32 = u32::MAX / 8;
}
-impl ValueLabelRecord<RawDatum, OwnedRawString> {
+impl ValueLabelRecord<RawDatum, ByteString> {
pub(super) fn read<R: Read + Seek>(
r: &mut R,
endian: Endian,
pub offsets: Range<u64>,
/// The text content of the record.
- pub text: OwnedRawString,
+ pub text: ByteString,
}
impl TextRecord {
/// Multiple-dichotomy set.
MultipleDichotomy {
/// The value that is counted in the set.
- value: OwnedRawString,
+ value: ByteString,
/// What categories are labeled.
labels: CategoryLabels,
pub short_names: Vec<I>,
}
-impl MultipleResponseSet<OwnedRawString, OwnedRawString> {
+impl MultipleResponseSet<ByteString, ByteString> {
/// Parses a multiple-response set from `input`. Returns the set and the
/// input remaining to be parsed following the set.
fn parse(input: &[u8]) -> Result<(Self, &[u8]), WarningDetails> {
pub sets: Vec<MultipleResponseSet<I, S>>,
}
-impl MultipleResponseRecord<OwnedRawString, OwnedRawString> {
+impl MultipleResponseRecord<ByteString, ByteString> {
/// Parses a multiple-response set from `ext`.
pub fn parse(ext: &Extension) -> Result<Record, WarningDetails> {
ext.check_size(Some(1), None, "multiple response set record")?;
}
}
-impl MultipleResponseRecord<OwnedRawString, OwnedRawString> {
+impl MultipleResponseRecord<ByteString, ByteString> {
/// Decodes this record using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> MultipleResponseRecord<Identifier, String> {
let mut sets = Vec::new();
}
}
-fn parse_counted_string(input: &[u8]) -> Result<(OwnedRawString, &[u8]), WarningDetails> {
+fn parse_counted_string(input: &[u8]) -> Result<(ByteString, &[u8]), WarningDetails> {
let Some(space) = input.iter().position(|&b| b == b' ') else {
return Err(MultipleResponseWarning::CountedStringMissingSpace.into());
};
pub missing_values: Vec<RawStrArray<8>>,
}
-impl LongStringMissingValues<OwnedRawString> {
+impl LongStringMissingValues<ByteString> {
/// Decodes these settings using `decoder`.
fn decode(
&self,
pub values: Vec<LongStringMissingValues<N>>,
}
-impl LongStringMissingValueRecord<OwnedRawString> {
+impl LongStringMissingValueRecord<ByteString> {
/// Parses this record from `ext`.
pub fn parse(
ext: &Extension,
pub width: u32,
/// `(value, label)` pairs, where each value is `width` bytes.
- pub labels: Vec<(OwnedRawString, S)>,
+ pub labels: Vec<(ByteString, S)>,
}
-impl LongStringValueLabels<OwnedRawString, OwnedRawString> {
+impl LongStringValueLabels<ByteString, ByteString> {
/// Decodes a set of long string value labels using `decoder`.
fn decode(
&self,
pub labels: Vec<LongStringValueLabels<N, S>>,
}
-impl LongStringValueLabelRecord<OwnedRawString, OwnedRawString> {
+impl LongStringValueLabelRecord<ByteString, ByteString> {
/// Parses this record from `ext` using `endian`.
fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
ext.check_size(Some(1), None, "long string value labels record")?;
use crate::{
crypto::EncryptedFile,
- data::{BorrowedDatum, Datum, OwnedDatum, RawString},
+ data::{BorrowedDatum, ByteString, Datum, OwnedDatum},
dictionary::{Dictionary, VarWidth, Variable},
endian::Endian,
identifier::Identifier,
let mut dictionary = Dictionary::new(UTF_8);
let mut s1 = Variable::new(Identifier::new("s1").unwrap(), VarWidth::String(9), UTF_8);
s1.value_labels.insert(
- OwnedDatum::String(RawString(String::from("abc ").into_bytes())),
+ Datum::String(ByteString::from("abc ")),
String::from("First value label"),
);
s1.value_labels.insert(
- OwnedDatum::String(RawString(String::from("abcdefgh ").into_bytes())),
+ Datum::String(ByteString::from("abcdefgh ")),
String::from("Second value label"),
);
s1.value_labels.insert(
- OwnedDatum::String(RawString(String::from("abcdefghi").into_bytes())),
+ Datum::String(ByteString::from("abcdefghi")),
String::from("Third value label"),
);
s1.missing_values_mut()
use std::{
- borrow::{Borrow, Cow},
+ borrow::Cow,
collections::HashMap,
fmt::Write as _,
fs::File,
use smallvec::SmallVec;
use crate::{
- data::{BorrowedRawString, Datum},
+ data::{Datum, RawStringTrait},
dictionary::{
Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType,
ValueLabels, VarWidth,
let label = self.dictionary.encoding().encode(&label).0;
(
value.len() as u32,
- value.as_bytes(),
+ value.raw_string_bytes(),
label.len() as u32,
&label[..],
)
.write_le(&mut cursor)?;
for value in variable.missing_values().values() {
- let value = value.as_string().unwrap().as_bytes();
+ let value = value.as_string().unwrap().raw_string_bytes();
let bytes = value.get(..8).unwrap_or(value);
Padded::exact(bytes, 8, b' ').write_le(&mut cursor).unwrap();
}
impl<B> BinWrite for Datum<B>
where
- B: Borrow<BorrowedRawString>,
+ B: RawStringTrait,
{
type Args<'a> = ();
) -> binrw::BinResult<()> {
match self {
Datum::Number(number) => number.unwrap_or(f64::MIN).write_options(writer, endian, ()),
- Datum::String(raw_string) => raw_string.borrow().0.write_options(writer, endian, ()),
+ Datum::String(raw_string) => {
+ raw_string
+ .raw_string_bytes()
+ .write_options(writer, endian, ())
+ }
}
}
}
case: impl Iterator<Item = Datum<B>>,
) -> Result<(), BinError>
where
- B: Borrow<BorrowedRawString>,
+ B: RawStringTrait,
{
for (var, datum) in zip_eq(self.case_vars, case) {
match var {
.unwrap_or(f64::MIN)
.write_le(&mut self.inner)?,
CaseVar::String(encoding) => {
- let mut s = datum.as_string().unwrap().borrow().as_bytes();
+ let mut s = datum.as_string().unwrap().raw_string_bytes();
for segment in encoding {
let data;
(data, s) = s.split_at(segment.data_bytes);
case: impl Iterator<Item = Datum<B>>,
) -> Result<(), BinError>
where
- B: Borrow<BorrowedRawString>,
+ B: RawStringTrait,
{
for (var, datum) in zip_eq(self.case_vars, case) {
match var {
},
CaseVar::String(encoding) => {
- let mut s = datum.as_string().unwrap().borrow().as_bytes();
+ let mut s = datum.as_string().unwrap().raw_string_bytes();
for segment in encoding {
let data;
(data, s) = s.split_at(segment.data_bytes);
case: impl IntoIterator<Item = Datum<B>>,
) -> Result<(), BinError>
where
- B: Borrow<BorrowedRawString>,
+ B: RawStringTrait,
{
match self.inner.as_mut().unwrap() {
Either::Left(inner) => {