sys::raw::RawDatum,
};
-/// An owned string in an unspecified character encoding.
+/// A string in an unspecified character encoding.
///
-/// A [RawString] is usually associated with a [Variable] and uses the
-/// variable's character encoding. We assume that the encoding is one supported
-/// by [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of
+/// `RawString` is usually associated with a [Variable], in the variable's
+/// character encoding. We assume that the encoding is one supported by
+/// [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of
/// these encodings have some basic ASCII compatibility.
///
-/// A [RawString] owns its contents and can grow and shrink, like a [Vec] or
-/// [String]. For a borrowed raw string, see [RawStr].
+/// `RawString` is parameterized by its content type, which is either `Vec<u8>`
+/// for an owned raw string (aliased as [OwnedRawString]) or `[u8]` for a
+/// borrowed raw string (aliased as [BorrowedRawString]).
///
/// [Variable]: crate::dictionary::Variable
-#[derive(Clone, PartialEq, Default, Eq, PartialOrd, Ord, Hash)]
-pub struct RawString(pub Vec<u8>);
+#[derive(Clone, Default, Hash)]
+pub struct RawString<B>(pub B)
+where
+ B: ?Sized;
+
+impl<B, B2> PartialEq<RawString<B2>> for RawString<B>
+where
+ B: Borrow<[u8]> + ?Sized,
+ B2: Borrow<[u8]> + ?Sized,
+{
+ fn eq(&self, other: &RawString<B2>) -> bool {
+ self.0.borrow().eq(other.0.borrow())
+ }
+}
+
+impl<B> Eq for RawString<B> where B: Borrow<[u8]> + ?Sized {}
+
+impl<B, B2> PartialOrd<RawString<B2>> for RawString<B>
+where
+ B: Borrow<[u8]> + ?Sized,
+ B2: Borrow<[u8]> + ?Sized,
+{
+ fn partial_cmp(&self, other: &RawString<B2>) -> Option<Ordering> {
+ self.0.borrow().partial_cmp(other.0.borrow())
+ }
+}
-impl RawString {
+impl<B> Ord for RawString<B>
+where
+ B: Borrow<[u8]> + ?Sized,
+{
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.0.borrow().cmp(other.0.borrow())
+ }
+}
+
+/// A [RawString] that owns its contents.
+pub type OwnedRawString = RawString<Vec<u8>>;
+
+/// A [RawString] that borrows its contents.
+///
+/// Because `[u8]` is not [Sized], [BorrowedRawString] may itself only be used
+/// borrowed.
+pub type BorrowedRawString = RawString<[u8]>;
+
+impl Borrow<BorrowedRawString> for OwnedRawString {
+ fn borrow(&self) -> &BorrowedRawString {
+ &BorrowedRawString::new(self.as_bytes())
+ }
+}
+
+impl BorrowedRawString {
+ pub fn new(s: &[u8]) -> &Self {
+ // SAFETY: `RawStr` is a transparent wrapper around `[u8]`, so we can
+ // turn a reference to the wrapped type into a reference to the wrapper
+ // type.
+ unsafe { &*(s as *const [u8] as *const BorrowedRawString) }
+ }
+}
+
+impl OwnedRawString {
/// Creates a new [RawString] that consists of `n` ASCII spaces.
pub fn spaces(n: usize) -> Self {
Self(std::iter::repeat_n(b' ', n).collect())
}
- /// Creates an [EncodedStr] with `encoding` that borrows this string's
- /// contents.
- pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
- EncodedStr::new(&self.0, encoding)
- }
-
/// Extends or shortens this [RawString] to exactly `len` bytes. If the
/// string needs to be extended, does so by appending spaces.
///
}
}
-impl Borrow<RawStr> for RawString {
- fn borrow(&self) -> &RawStr {
- RawStr::from_bytes(&self.0)
+impl<B> RawString<B>
+where
+ B: Borrow<[u8]> + ?Sized,
+{
+ pub fn as_bytes(&self) -> &[u8] {
+ self.0.borrow()
+ }
+
+ pub fn len(&self) -> usize {
+ self.0.borrow().len()
+ }
+
+ /// Returns true if this raw string can be resized to `len` bytes without
+ /// dropping non-space characters.
+ pub fn is_resizable(&self, new_len: usize) -> bool {
+ new_len >= self.len() || self.0.borrow()[new_len..].iter().all(|b| *b == b' ')
}
-}
-impl Deref for RawString {
- type Target = RawStr;
+ /// Compares this string and `other` for equality, ignoring trailing ASCII
+ /// spaces in either string for the purpose of comparison. (This is
+ /// acceptable because we assume that the encoding is ASCII-compatible.)
+ pub fn eq_ignore_trailing_spaces<B2>(&self, other: &RawString<B2>) -> bool
+ where
+ B2: Borrow<[u8]> + ?Sized,
+ {
+ self.0
+ .borrow()
+ .iter()
+ .zip_longest(other.0.borrow())
+ .all(|elem| {
+ let (left, right) = elem.or(&b' ', &b' ');
+ *left == *right
+ })
+ }
- fn deref(&self) -> &Self::Target {
- self.borrow()
+ /// Creates an [EncodedStr] with `encoding` that borrows this string's
+ /// contents.
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
+ EncodedStr::new(self.0.borrow(), encoding)
}
}
-impl From<Cow<'_, [u8]>> for RawString {
+impl From<Cow<'_, [u8]>> for OwnedRawString {
fn from(value: Cow<'_, [u8]>) -> Self {
Self(value.into_owned())
}
}
-impl From<Vec<u8>> for RawString {
+impl From<Vec<u8>> for OwnedRawString {
fn from(source: Vec<u8>) -> Self {
Self(source)
}
}
-impl From<&[u8]> for RawString {
+impl From<&[u8]> for OwnedRawString {
fn from(source: &[u8]) -> Self {
Self(source.into())
}
}
-impl From<EncodedString> for RawString {
- fn from(value: EncodedString) -> Self {
- Self(value.bytes)
- }
-}
-
-impl Debug for RawString {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- <RawStr as Debug>::fmt(&*self, f)
+impl<const N: usize> From<[u8; N]> for OwnedRawString {
+ fn from(source: [u8; N]) -> Self {
+ Self(source.into())
}
}
-impl Serialize for RawString {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- self.deref().serialize(serializer)
+impl From<EncodedString> for OwnedRawString {
+ fn from(value: EncodedString) -> Self {
+ Self(value.bytes)
}
}
-/// A borrowed string in an unspecified encoding.
-///
-/// A [RawString] is usually associated with a [Variable] and uses the
-/// variable's character encoding. We assume that the encoding is one supported
-/// by [encoding_rs] with byte units (that is, not a `UTF-16` encoding). All of
-/// these encodings have some basic ASCII compatibility.
-///
-/// For an owned raw string, see [RawString].
-///
-/// [Variable]: crate::dictionary::Variable
-#[repr(transparent)]
-#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct RawStr(pub [u8]);
-
-impl RawStr {
- /// Creates a new [RawStr] that contains `bytes`.
- pub fn from_bytes(bytes: &[u8]) -> &Self {
- // SAFETY: `RawStr` is a transparent wrapper around `[u8]`, so we can
- // turn a reference to the wrapped type into a reference to the wrapper
- // type.
- unsafe { &*(bytes as *const [u8] as *const Self) }
- }
-
- /// Returns the raw string's contents as a borrowed byte slice.
- pub fn as_bytes(&self) -> &[u8] {
- &self.0
- }
-
- pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
- EncodedStr {
- bytes: &self.0,
- encoding,
- }
- }
-
- /// Interprets the raw string's contents as the specified `encoding` and
- /// returns it decoded into UTF-8, replacing any malformed sequences by
- /// [REPLACEMENT_CHARACTER].
- ///
- /// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
- pub fn decode(&self, encoding: &'static Encoding) -> Cow<'_, str> {
- encoding.decode_without_bom_handling(&self.0).0
- }
-
- /// Compares this string and `other` for equality, ignoring trailing ASCII
- /// spaces in either string for the purpose of comparison. (This is
- /// acceptable because we assume that the encoding is ASCII-compatible.)
- pub fn eq_ignore_trailing_spaces(&self, other: &RawStr) -> bool {
- let mut this = self.0.iter();
- let mut other = other.0.iter();
- loop {
- match (this.next(), other.next()) {
- (Some(a), Some(b)) if a == b => (),
- (Some(_), Some(_)) => return false,
- (None, None) => return true,
- (Some(b' '), None) => return this.all(|c| *c == b' '),
- (None, Some(b' ')) => return other.all(|c| *c == b' '),
- (Some(_), None) | (None, Some(_)) => return false,
- }
- }
- }
-
- /// Returns true if this raw string can be resized to `len` bytes without
- /// dropping non-space characters.
- pub fn is_resizable(&self, new_len: usize) -> bool {
- new_len >= self.len() || self.0[new_len..].iter().all(|b| *b == b' ')
- }
-
- /// Returns the string's length in bytes.
- pub fn len(&self) -> usize {
- self.0.len()
+impl<B> Debug for RawString<B>
+where
+ B: Borrow<[u8]> + ?Sized,
+{
+ // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
+ // (actually bytes interpreted as Unicode code points).
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ let s =
+ from_utf8(&self.0.borrow()).map_or_else(|_| decode_latin1(self.0.borrow()), Cow::from);
+ write!(f, "{s:?}")
}
}
-impl Serialize for RawStr {
+impl<B> Serialize for RawString<B>
+where
+ B: Borrow<[u8]> + ?Sized,
+{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
- if let Ok(s) = str::from_utf8(&self.0) {
- let (variant_index, variant) = if self.0.iter().all(|b| b.is_ascii()) {
+ if let Ok(s) = str::from_utf8(self.0.borrow()) {
+ let (variant_index, variant) = if self.0.borrow().iter().all(|b| b.is_ascii()) {
(0, "Ascii")
} else {
(1, "Utf8")
tuple.end()
} else {
let mut tuple = serializer.serialize_tuple_variant("RawString", 2, "Windows1252", 1)?;
- tuple.serialize_field(&decode_latin1(&self.0))?;
+ tuple.serialize_field(&decode_latin1(self.0.borrow()))?;
tuple.end()
}
}
}
-impl Debug for RawStr {
- // If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
- // (actually bytes interpreted as Unicode code points).
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- let s = from_utf8(&self.0).map_or_else(|_| decode_latin1(&self.0), Cow::from);
- write!(f, "{s:?}")
- }
-}
-
/// The value of a [Variable](crate::dictionary::Variable), with a string
/// encoding.
#[derive(Clone)]
}
impl EncodedDatum {
- pub fn into_raw(self) -> Datum<RawString> {
+ pub fn into_raw(self) -> Datum<OwnedRawString> {
match self {
EncodedDatum::Number(number) => Datum::Number(number),
EncodedDatum::String(encoded_string) => Datum::String(encoded_string.into()),
impl<'a> Eq for EncodedDat<'a> {}
-pub type OwnedDatum = Datum<RawString>;
-pub type BorrowedDatum<'a> = Datum<&'a RawStr>;
+/// A [Datum] that owns its string data (if any).
+pub type OwnedDatum = Datum<OwnedRawString>;
+
+/// A [Datum] that borrows its string data (if any).
+pub type BorrowedDatum<'a> = Datum<&'a BorrowedRawString>;
/// The value of a [Variable](crate::dictionary::Variable).
+///
+/// `RawString` is parameterized by its string type, which is either
+/// [OwnedRawString] if it owns its string value (aliased as [OwnedDatum]) or
+/// [&BorrowedRawString](BorrowedRawString) if it borrows it (aliased as
+/// [BorrowedDatum]).
#[derive(Clone)]
pub enum Datum<B> {
/// A numeric value.
impl<B, B2> PartialEq<Datum<B2>> for Datum<B>
where
- B: Borrow<RawStr>,
- B2: Borrow<RawStr>,
+ B: Borrow<RawString<[u8]>>,
+ B2: Borrow<RawString<[u8]>>,
{
fn eq(&self, other: &Datum<B2>) -> bool {
match (self, other) {
}
}
-impl<B> Eq for Datum<B> where B: Borrow<RawStr> {}
+impl<B> Eq for Datum<B> where B: Borrow<RawString<[u8]>> {}
impl<B, B2> PartialOrd<Datum<B2>> for Datum<B>
where
- B: Borrow<RawStr>,
- B2: Borrow<RawStr>,
+ B: Borrow<RawString<[u8]>>,
+ B2: Borrow<RawString<[u8]>>,
{
fn partial_cmp(&self, other: &Datum<B2>) -> Option<Ordering> {
Some(match (self, other) {
impl<B> Ord for Datum<B>
where
- B: Borrow<RawStr>,
+ B: Borrow<RawString<[u8]>>,
{
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
impl<B> Hash for Datum<B>
where
- B: Borrow<RawStr>,
+ B: Borrow<BorrowedRawString>,
{
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self {
impl<B> Datum<B>
where
- B: Borrow<RawStr>,
+ B: Borrow<RawString<[u8]>>,
{
/// Returns the string inside this datum, or `None` if this is a numeric
/// datum.
- pub fn as_string(&self) -> Option<&RawStr> {
+ pub fn as_string(&self) -> Option<&BorrowedRawString> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(s.borrow()),
/// comparison.
pub fn eq_ignore_trailing_spaces<B2>(&self, other: &Datum<B2>) -> bool
where
- B2: Borrow<RawStr>,
+ B2: Borrow<RawString<[u8]>>,
{
match (self, other) {
(Self::String(a), Datum::String(b)) => a.borrow().eq_ignore_trailing_spaces(b.borrow()),
impl<B> Datum<B>
where
- B: BorrowMut<RawString>,
+ B: BorrowMut<OwnedRawString>,
{
/// Returns the string inside this datum as a mutable borrow, or `None` if
/// this is a numeric datum.
- pub fn as_string_mut(&mut self) -> Option<&mut RawString> {
+ pub fn as_string_mut(&mut self) -> Option<&mut OwnedRawString> {
match self {
Self::Number(_) => None,
Self::String(s) => Some(s.borrow_mut()),
}
}
-impl Datum<RawString> {
+impl Datum<OwnedRawString> {
pub fn with_encoding(self, encoding: &'static Encoding) -> EncodedDatum {
match self {
Datum::Number(number) => EncodedDatum::Number(number),
impl<B> From<f64> for Datum<B>
where
- B: Borrow<RawStr>,
+ B: Borrow<BorrowedRawString>,
{
fn from(number: f64) -> Self {
Some(number).into()
impl<B> From<Option<f64>> for Datum<B>
where
- B: Borrow<RawStr>,
+ B: Borrow<BorrowedRawString>,
{
fn from(value: Option<f64>) -> Self {
Self::Number(value)
impl<B> From<&str> for Datum<B>
where
- B: Borrow<RawStr> + for<'a> From<&'a [u8]>,
+ B: Borrow<BorrowedRawString> + for<'a> From<&'a [u8]>,
{
fn from(value: &str) -> Self {
value.as_bytes().into()
impl<B> From<&[u8]> for Datum<B>
where
- B: Borrow<RawStr> + for<'a> From<&'a [u8]>,
+ B: Borrow<BorrowedRawString> + for<'a> From<&'a [u8]>,
{
fn from(value: &[u8]) -> Self {
Self::String(value.into())
}
/// A case in a data set.
-#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
+#[derive(Clone, Debug, Serialize)]
pub struct RawCase(
/// One [Datum] per variable in the corresponding [Dictionary], in the same
/// order.
///
/// [Dictionary]: crate::dictionary::Dictionary
- pub Vec<Datum<RawString>>,
+ pub Vec<Datum<OwnedRawString>>,
);
impl RawCase {
- pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum<RawString>]> {
+ pub fn as_encoding(&self, encoding: &'static Encoding) -> Case<&'_ [Datum<OwnedRawString>]> {
Case {
encoding,
data: &self.0,
}
}
- pub fn with_encoding(self, encoding: &'static Encoding) -> Case<Vec<Datum<RawString>>> {
+ pub fn with_encoding(self, encoding: &'static Encoding) -> Case<Vec<Datum<OwnedRawString>>> {
Case {
encoding,
data: self.0,
pub struct Case<B>
where
- B: Borrow<[Datum<RawString>]>,
+ B: Borrow<[Datum<OwnedRawString>]>,
{
encoding: &'static Encoding,
data: B,
impl<B> Case<B>
where
- B: Borrow<[Datum<RawString>]>,
+ B: Borrow<[Datum<OwnedRawString>]>,
{
fn len(&self) -> usize {
self.data.borrow().len()
}
}
-impl IntoIterator for Case<Vec<Datum<RawString>>> {
+impl IntoIterator for Case<Vec<Datum<OwnedRawString>>> {
type Item = EncodedDatum;
type IntoIter = CaseVecIter;
pub struct CaseVecIter {
encoding: &'static Encoding,
- iter: std::vec::IntoIter<Datum<RawString>>,
+ iter: std::vec::IntoIter<Datum<OwnedRawString>>,
}
impl Iterator for CaseVecIter {
use unicase::UniCase;
use crate::{
- data::{Datum, EncodedDat, EncodedDatum, RawString},
+ data::{Datum, EncodedDat, EncodedDatum, OwnedRawString, RawString},
format::{DisplayPlain, Format},
identifier::{ByIdentifier, HasIdentifier, Identifier},
output::pivot::{
/// one value (the "counted value") means that the box was checked, and any
/// other value means that it was not.
MultipleDichotomy {
- datum: Datum<RawString>,
+ datum: Datum<OwnedRawString>,
labels: CategoryLabels,
},
}
#[derive(Clone, Default, PartialEq, Eq, Serialize)]
-pub struct ValueLabels(pub HashMap<Datum<RawString>, String>);
+pub struct ValueLabels(pub HashMap<Datum<OwnedRawString>, String>);
impl ValueLabels {
pub fn new() -> Self {
self.0.is_empty()
}
- pub fn get(&self, datum: &Datum<RawString>) -> Option<&str> {
+ pub fn get(&self, datum: &Datum<OwnedRawString>) -> Option<&str> {
self.0.get(datum).map(|s| s.as_str())
}
- pub fn insert(&mut self, datum: Datum<RawString>, label: String) -> Option<String> {
+ pub fn insert(&mut self, datum: Datum<OwnedRawString>, label: String) -> Option<String> {
self.0.insert(datum, label)
}
use crate::{
data::Datum,
- data::RawString,
+ data::OwnedRawString,
dictionary::{VarType, VarWidth},
sys::raw,
};
}
}
- pub fn default_value(&self) -> Datum<RawString> {
+ pub fn default_value(&self) -> Datum<OwnedRawString> {
match self.var_type() {
VarType::Numeric => Datum::sysmis(),
- VarType::String => Datum::String(RawString::default()),
+ VarType::String => Datum::String(OwnedRawString::default()),
}
}
}
Ok(self)
}
- pub fn default_value(&self) -> Datum<RawString> {
+ pub fn default_value(&self) -> Datum<OwnedRawString> {
match self.var_width() {
VarWidth::Numeric => Datum::sysmis(),
- VarWidth::String(width) => Datum::String(RawString::spaces(width as usize)),
+ VarWidth::String(width) => Datum::String(OwnedRawString::spaces(width as usize)),
}
}
use tlo::parse_tlo;
use crate::{
- data::{Datum, EncodedDat, EncodedDatum, RawString},
+ data::{Datum, EncodedDat, EncodedDatum, OwnedRawString},
dictionary::{VarType, Variable},
format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
settings::{Settings, Show},
EncodedDat::String(string) => Self::new_user_text(string.as_str()),
}
}
- pub fn new_variable_value(variable: &Variable, value: &Datum<RawString>) -> Self {
+ pub fn new_variable_value(variable: &Variable, value: &Datum<OwnedRawString>) -> Self {
let var_name = Some(variable.name.as_str().into());
let value_label = variable.value_labels.get(value).map(String::from);
match value {
Datum::String(string) => Self::new(ValueInner::String(StringValue {
show: None,
hex: variable.print_format.type_() == Type::AHex,
- s: string.decode(variable.encoding()).into_owned(),
+ s: string.as_encoded(variable.encoding()).as_str().into_owned(),
var_name,
value_label,
})),
use crate::{
calendar::date_time_to_pspp,
crypto::EncryptedFile,
- data::{Case, Datum, EncodedDatum, RawString},
+ data::{Case, Datum, EncodedDatum, OwnedRawString},
dictionary::{
DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary, InvalidRole, MissingValues,
MissingValuesError, MultipleResponseType, VarWidth, Variable,
.missing_values
.into_iter()
.map(|v| {
- let mut value = RawString::from(v.0.as_slice());
+ let mut value = OwnedRawString::from(v.0.as_slice());
value.resize(variable.width.as_string_width().unwrap());
EncodedDatum::String(value.with_encoding(encoding))
})
}
impl Iterator for Cases {
- type Item = Result<Case<Vec<Datum<RawString>>>, raw::Error>;
+ type Item = Result<Case<Vec<Datum<OwnedRawString>>>, raw::Error>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
//! raw details. Most readers will want to use higher-level interfaces.
use crate::{
- data::{Datum, RawCase, RawStr, RawString},
+ data::{BorrowedRawString, Datum, OwnedRawString, RawCase, RawString},
dictionary::{VarType, VarWidth},
endian::{Endian, Parse, ToBytes},
identifier::{Error as IdError, Identifier},
/// one variable record per 8-byte segment.
Variable(
/// The record.
- VariableRecord<RawString>,
+ VariableRecord<OwnedRawString>,
),
/// Value labels for numeric and short string variables.
/// These appear after the variable records.
ValueLabel(
/// The record.
- ValueLabelRecord<RawDatum, RawString>,
+ ValueLabelRecord<RawDatum, OwnedRawString>,
),
/// Document record.
/// Multiple response variable record.
MultipleResponse(
/// The record.
- MultipleResponseRecord<RawString, RawString>,
+ MultipleResponseRecord<OwnedRawString, OwnedRawString>,
),
/// Value labels for long string variables.
LongStringValueLabels(
/// The record.
- LongStringValueLabelRecord<RawString, RawString>,
+ LongStringValueLabelRecord<OwnedRawString, OwnedRawString>,
),
/// Missing values for long string variables.
/// variable records.
LongStringMissingValues(
/// The record.
- LongStringMissingValueRecord<RawString>,
+ LongStringMissingValueRecord<OwnedRawString>,
),
/// Encoding record.
output
}
- fn decode<'a>(&mut self, input: &'a RawString) -> Cow<'a, str> {
+ fn decode<'a>(&mut self, input: &'a OwnedRawString) -> Cow<'a, str> {
self.decode_slice(input.0.as_slice())
}
/// Decodes `input` to an [Identifier] using our encoding.
- pub fn decode_identifier(&mut self, input: &RawString) -> Result<Identifier, IdError> {
+ pub fn decode_identifier(&mut self, input: &OwnedRawString) -> Result<Identifier, IdError> {
let decoded = &self.decode(input);
self.new_identifier(decoded)
}
match self {
RawDatum::Number(Some(number)) => write!(f, "{number:?}"),
RawDatum::Number(None) => write!(f, "SYSMIS"),
- RawDatum::String(s) => write!(f, "{:?}", RawStr::from_bytes(s)),
+ RawDatum::String(s) => write!(f, "{:?}", BorrowedRawString::new(s)),
}
}
}
{
match self {
RawDatum::Number(number) => number.serialize(serializer),
- RawDatum::String(s) => RawStr::from_bytes(s).serialize(serializer),
+ RawDatum::String(s) => BorrowedRawString::new(s).serialize(serializer),
}
}
}
/// Decodes a `RawDatum` into a [Datum] given that we now know the string
/// width.
- pub fn decode(&self, width: VarWidth) -> Datum<RawString> {
+ pub fn decode(&self, width: VarWidth) -> Datum<OwnedRawString> {
match self {
Self::Number(x) => Datum::Number(*x),
Self::String(s) => {
let width = width.as_string_width().unwrap();
- Datum::String(RawString::from(&s[..width]))
+ Datum::String(OwnedRawString::from(&s[..width]))
}
}
}
}
-impl Datum<RawString> {
+impl Datum<OwnedRawString> {
fn read_case<R: Read + Seek>(
reader: &mut R,
case_vars: &[CaseVar],
skip_bytes(reader, segment.padding_bytes)?;
offset += segment.data_bytes;
}
- values.push(Datum::String(RawString(datum)));
+ values.push(Datum::String(datum.into()));
}
}
}
n_chunks += 1;
}
}
- values.push(Datum::String(RawString(datum)));
+ values.push(Datum::String(datum.into()));
}
}
}
reader: Option<R>,
warn: Box<dyn FnMut(Warning) + 'a>,
- header: FileHeader<RawString>,
+ header: FileHeader<OwnedRawString>,
var_types: VarTypes,
state: ReaderState,
}
/// Returns the header in this reader.
- pub fn header(&self) -> &FileHeader<RawString> {
+ pub fn header(&self) -> &FileHeader<OwnedRawString> {
&self.header
}
fn new<R>(
reader: R,
var_types: VarTypes,
- header: &FileHeader<RawString>,
+ header: &FileHeader<OwnedRawString>,
ztrailer_offset: Option<u64>,
) -> Self
where
} else {
big
};
- write!(f, "{number}/{:?}", RawStr::from_bytes(&self.0))
+ write!(f, "{number}/{:?}", BorrowedRawString::new(&self.0))
}
}
impl<const N: usize> Debug for RawStrArray<N> {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", RawStr::from_bytes(&self.0))
+ write!(f, "{:?}", BorrowedRawString::new(&self.0))
}
}
where
S: serde::Serializer,
{
- RawStr::from_bytes(&self.0).serialize(serializer)
+ BorrowedRawString::new(&self.0).serialize(serializer)
}
}
Ok(vec)
}
-fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<RawString, IoError> {
+fn read_string<R: Read>(r: &mut R, endian: Endian) -> Result<OwnedRawString, IoError> {
let length: u32 = endian.parse(read_bytes(r)?);
Ok(read_vec(r, length as usize)?.into())
}
};
use crate::{
- data::{Datum, RawString},
+ data::{Datum, OwnedRawString, RawString},
dictionary::{
Alignment, Attributes, CategoryLabels, Measure, MissingValueRange, MissingValues,
MissingValuesError, VarType, VarWidth,
pub file_label: [u8; 64],
}
-impl FileHeader<RawString> {
+impl FileHeader<OwnedRawString> {
/// Reads a header record from `r`, reporting any warnings via `warn`.
pub fn read<R>(r: &mut R, warn: &mut dyn FnMut(Warning)) -> Result<Self, Error>
where
));
}
- let creation_date = RawString(header.creation_date.into());
- let creation_time = RawString(header.creation_time.into());
- let file_label = RawString(header.file_label.into());
-
Ok(FileHeader {
magic,
layout_code: header.layout_code,
weight_index,
n_cases,
bias: header.bias,
- creation_date,
- creation_time,
- eye_catcher: RawString(header.eye_catcher.into()),
- file_label,
+ creation_date: header.creation_date.into(),
+ creation_time: header.creation_time.into(),
+ eye_catcher: header.eye_catcher.into(),
+ file_label: header.file_label.into(),
endian,
})
}
#[derive(Clone, Debug, Default, Serialize)]
pub struct RawMissingValues {
/// Individual missing values, up to 3 of them.
- pub values: Vec<Datum<RawString>>,
+ pub values: Vec<Datum<OwnedRawString>>,
/// Optional range of missing values.
pub range: Option<MissingValueRange>,
}
impl RawMissingValues {
- pub fn new(values: Vec<Datum<RawString>>, range: Option<MissingValueRange>) -> Self {
+ pub fn new(values: Vec<Datum<OwnedRawString>>, range: Option<MissingValueRange>) -> Self {
Self { values, range }
}
let width = width.min(8) as usize;
let values = values
.into_iter()
- .map(|value| Datum::String(RawString::from(&value[..width])))
+ .map(|value| Datum::String(OwnedRawString::from(&value[..width])))
.collect();
return Ok(Self::new(values, None));
}
pub name: [u8; 8],
}
-impl VariableRecord<RawString> {
+impl VariableRecord<OwnedRawString> {
/// Reads a variable record from `r`.
pub fn read<R>(
r: &mut R,
1 => {
let len: u32 = endian.parse(read_bytes(r)?);
let read_len = len.min(65535) as usize;
- let label = RawString(read_vec(r, read_len)?);
+ let label = read_vec(r, read_len)?;
let padding_bytes = len.next_multiple_of(4) - len;
let _ = read_vec(r, padding_bytes as usize)?;
- Some(label)
+ Some(label.into())
}
_ => {
return Err(Error::new(
Ok(Record::Variable(VariableRecord {
offsets: start_offset..end_offset,
width,
- name: RawString(raw_record.name.into()),
+ name: raw_record.name.into(),
print_format: raw_record.print_format,
write_format: raw_record.write_format,
missing_values,
pub const MAX_INDEXES: u32 = u32::MAX / 8;
}
-impl ValueLabelRecord<RawDatum, RawString> {
+impl ValueLabelRecord<RawDatum, OwnedRawString> {
pub(super) fn read<R: Read + Seek>(
r: &mut R,
endian: Endian,
let mut label = read_vec(r, padded_len - 1)?;
label.truncate(label_len);
- labels.push((value, RawString(label)));
+ labels.push((value, label.into()));
}
let index_offset = r.stream_position()?;
pub offsets: Range<u64>,
/// The text content of the record.
- pub text: RawString,
+ pub text: OwnedRawString,
}
impl TextRecord {
/// Multiple-dichotomy set.
MultipleDichotomy {
/// The value that is counted in the set.
- value: RawString,
+ value: OwnedRawString,
/// What categories are labeled.
labels: CategoryLabels,
pub short_names: Vec<I>,
}
-impl MultipleResponseSet<RawString, RawString> {
+impl MultipleResponseSet<OwnedRawString, OwnedRawString> {
/// Parses a multiple-response set from `input`. Returns the set and the
/// input remaining to be parsed following the set.
fn parse(input: &[u8]) -> Result<(Self, &[u8]), WarningDetails> {
pub sets: Vec<MultipleResponseSet<I, S>>,
}
-impl MultipleResponseRecord<RawString, RawString> {
+impl MultipleResponseRecord<OwnedRawString, OwnedRawString> {
/// Parses a multiple-response set from `ext`.
pub fn parse(ext: &Extension) -> Result<Record, WarningDetails> {
ext.check_size(Some(1), None, "multiple response set record")?;
}
}
-impl MultipleResponseRecord<RawString, RawString> {
+impl MultipleResponseRecord<OwnedRawString, OwnedRawString> {
/// Decodes this record using `decoder`.
pub fn decode(self, decoder: &mut Decoder) -> MultipleResponseRecord<Identifier, String> {
let mut sets = Vec::new();
}
}
-fn parse_counted_string(input: &[u8]) -> Result<(RawString, &[u8]), WarningDetails> {
+fn parse_counted_string(input: &[u8]) -> Result<(OwnedRawString, &[u8]), WarningDetails> {
let Some(space) = input.iter().position(|&b| b == b' ') else {
return Err(MultipleResponseWarning::CountedStringMissingSpace.into());
};
pub missing_values: Vec<RawStrArray<8>>,
}
-impl LongStringMissingValues<RawString> {
+impl LongStringMissingValues<OwnedRawString> {
/// Decodes these settings using `decoder`.
fn decode(
&self,
pub values: Vec<LongStringMissingValues<N>>,
}
-impl LongStringMissingValueRecord<RawString> {
+impl LongStringMissingValueRecord<OwnedRawString> {
/// Parses this record from `ext`.
pub fn parse(
ext: &Extension,
pub width: u32,
/// `(value, label)` pairs, where each value is `width` bytes.
- pub labels: Vec<(RawString, S)>,
+ pub labels: Vec<(OwnedRawString, S)>,
}
-impl LongStringValueLabels<RawString, RawString> {
+impl LongStringValueLabels<OwnedRawString, OwnedRawString> {
/// Decodes a set of long string value labels using `decoder`.
fn decode(
&self,
pub labels: Vec<LongStringValueLabels<N, S>>,
}
-impl LongStringValueLabelRecord<RawString, RawString> {
+impl LongStringValueLabelRecord<OwnedRawString, OwnedRawString> {
/// Parses this record from `ext` using `endian`.
fn parse(ext: &Extension, endian: Endian) -> Result<Record, WarningDetails> {
ext.check_size(Some(1), None, "long string value labels record")?;
use smallvec::SmallVec;
use crate::{
- data::{Datum, EncodedDatum, RawStr, RawString},
+ data::{Datum, EncodedDatum, OwnedRawString, RawString},
dictionary::{
Alignment, Attributes, CategoryLabels, Dictionary, Measure, MultipleResponseType,
ValueLabels, VarWidth,
}
}
-impl BinWrite for Datum<RawString> {
+impl BinWrite for Datum<OwnedRawString> {
type Args<'a> = ();
fn write_options<W: Write + Seek>(
fn write_case_uncompressed<'c>(
&mut self,
- case: impl Iterator<Item = Datum<RawString>>,
+ case: impl Iterator<Item = Datum<OwnedRawString>>,
) -> Result<(), BinError> {
for (var, datum) in zip_eq(self.case_vars, case) {
match var {
}
fn write_case_compressed<'c>(
&mut self,
- case: impl Iterator<Item = Datum<RawString>>,
+ case: impl Iterator<Item = Datum<OwnedRawString>>,
) -> Result<(), BinError> {
for (var, datum) in zip_eq(self.case_vars, case) {
match var {
/// Panics if [try_finish](Self::try_finish) has been called.
pub fn write_case<'a>(
&mut self,
- case: impl IntoIterator<Item = Datum<RawString>>,
+ case: impl IntoIterator<Item = Datum<OwnedRawString>>,
) -> Result<(), BinError> {
match self.inner.as_mut().unwrap() {
Either::Left(inner) => {