};
use encoding_rs::{mem::decode_latin1, Encoding, UTF_8};
+use itertools::Itertools;
use ordered_float::OrderedFloat;
use serde::{ser::SerializeTupleVariant, Serialize};
-use crate::dictionary::{VarType, VarWidth};
+use crate::{
+ dictionary::{VarType, VarWidth},
+ format::DisplayPlain,
+};
/// An owned string in an unspecified character encoding.
///
EncodedStr::new(&self.0, encoding)
}
+ /// Returns true if this raw string can be resized to `len` bytes without
+ /// dropping non-space characters.
+ pub fn is_resizable(&self, new_len: usize) -> bool {
+ new_len >= self.len() || self.0[new_len..].iter().all(|b| *b == b' ')
+ }
+
/// Extends or shortens this [RawString] to exactly `len` bytes. If the
/// string needs to be extended, does so by appending spaces.
///
/// If this shortens the string, it can cut off a multibyte character in the
- /// middle.
+ /// middle ([is_resizable](Self::is_resizable) checks for this).
pub fn resize(&mut self, len: usize) {
self.0.resize(len, b' ');
}
pub fn trim_end(&mut self) {
while self.0.pop_if(|c| *c == b' ').is_some() {}
}
+
+ pub fn with_encoding(self, encoding: &'static Encoding) -> EncodedString {
+ EncodedString {
+ bytes: self.0,
+ encoding,
+ }
+ }
}
impl Borrow<RawStr> for RawString {
&self.0
}
- /// Returns an object that implements [Display] for printing this [RawStr],
- /// given that it is encoded in `encoding`.
- pub fn display(&self, encoding: &'static Encoding) -> DisplayRawString {
- DisplayRawString(encoding.decode_without_bom_handling(&self.0).0)
+ pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
+ EncodedStr {
+ bytes: &self.0,
+ encoding,
+ }
}
/// Interprets the raw string's contents as the specified `encoding` and
}
}
-/// Helper struct for printing [RawStr] with [format!].
-///
-/// Created by [RawStr::display].
-pub struct DisplayRawString<'a>(Cow<'a, str>);
-
-impl<'a> Display for DisplayRawString<'a> {
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
- write!(f, "{}", &self.0)
- }
-}
-
impl Debug for RawStr {
// If `s` is valid UTF-8, displays it as UTF-8, otherwise as Latin-1
// (actually bytes interpreted as Unicode code points).
}
}
+/// The value of a [Variable](crate::dictionary::Variable), with a string
+/// encoding.
+#[derive(Clone)]
+pub enum EncodedDatum {
+ /// A numeric value.
+ Number(
+ /// A number, or `None` for the system-missing value.
+ Option<f64>,
+ ),
+ /// A string value.
+ String(
+ /// The value, in the variable's encoding.
+ EncodedString,
+ ),
+}
+
+impl EncodedDatum {
+ /// Constructs a new numerical [Datum] for the system-missing value.
+ pub const fn sysmis() -> Self {
+ Self::Number(None)
+ }
+
+ /// Returns the number inside this datum, or `None` if this is a string
+ /// datum.
+ pub fn as_number(&self) -> Option<Option<f64>> {
+ match self {
+ Self::Number(number) => Some(*number),
+ Self::String(_) => None,
+ }
+ }
+
+ /// Returns the string inside this datum, or `None` if this is a numeric
+ /// datum.
+ pub fn as_string(&self) -> Option<&EncodedString> {
+ match self {
+ Self::Number(_) => None,
+ Self::String(s) => Some(s),
+ }
+ }
+
+ /// Returns the string inside this datum as a mutable borrow, or `None` if
+ /// this is a numeric datum.
+ pub fn as_string_mut(&mut self) -> Option<&mut EncodedString> {
+ match self {
+ Self::Number(_) => None,
+ Self::String(s) => Some(s),
+ }
+ }
+
+ /// Resizes this datum to the given `width`. Returns `Ok(())` if
+ /// successful, if and only if this datum and `width` are both string or
+ /// both numeric and, for string widths, resizing would not drop any
+ /// non-space characters.
+ pub fn resize(&mut self, width: VarWidth) -> Result<(), ()> {
+ match (self, width) {
+ (Self::Number(_), VarWidth::Numeric) => Ok(()),
+ (Self::String(s), VarWidth::String(new_width)) => s.resize(new_width as usize),
+ _ => Err(()),
+ }
+ }
+
+ /// Returns the [VarType] corresponding to this datum.
+ pub fn var_type(&self) -> VarType {
+ match self {
+ Self::Number(_) => VarType::Numeric,
+ Self::String(_) => VarType::String,
+ }
+ }
+
+ /// Returns the [VarWidth] corresponding to this datum.
+ pub fn width(&self) -> VarWidth {
+ match self {
+ Self::Number(_) => VarWidth::Numeric,
+ Self::String(s) => VarWidth::String(s.len().try_into().unwrap()),
+ }
+ }
+
+ /// Compares this datum and `other` for equality, ignoring trailing ASCII
+ /// spaces in either, if they are both strings, for the purpose of
+ /// comparison.
+ pub fn eq_ignore_trailing_spaces<'a>(&self, other: impl Into<EncodedDat<'a>>) -> bool {
+ self.borrowed().eq_ignore_trailing_spaces(other.into())
+ }
+
+ /// Removes trailing ASCII spaces from this datum, if it is a string.
+ pub fn trim_end(&mut self) {
+ match self {
+ Self::Number(_) => (),
+ Self::String(s) => s.trim_end(),
+ }
+ }
+
+ pub fn borrowed<'a>(&'a self) -> EncodedDat<'a> {
+ match self {
+ EncodedDatum::Number(number) => EncodedDat::Number(*number),
+ EncodedDatum::String(encoded_string) => EncodedDat::String(encoded_string.borrowed()),
+ }
+ }
+
+ pub fn quoted(&self) -> QuotedEncodedDat<'_> {
+ self.borrowed().quoted()
+ }
+}
+
+impl Display for EncodedDatum {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.borrowed())
+ }
+}
+
+impl Serialize for EncodedDatum {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ match self {
+ EncodedDatum::Number(number) => number.serialize(serializer),
+ EncodedDatum::String(encoded_string) => encoded_string.serialize(serializer),
+ }
+ }
+}
+
+/// A borrowed [Datum] with a string encoding.
+#[derive(Copy, Clone)]
+pub enum EncodedDat<'a> {
+ /// A numeric value.
+ Number(
+ /// A number, or `None` for the system-missing value.
+ Option<f64>,
+ ),
+ /// A string value.
+ String(
+ /// The value, in the variable's encoding.
+ EncodedStr<'a>,
+ ),
+}
+
+impl<'a> EncodedDat<'a> {
+ /// Constructs a new numerical [Datum] for the system-missing value.
+ pub const fn sysmis() -> Self {
+ Self::Number(None)
+ }
+
+ /// Returns the number inside this datum, or `None` if this is a string
+ /// datum.
+ pub fn as_number(&self) -> Option<Option<f64>> {
+ match self {
+ Self::Number(number) => Some(*number),
+ Self::String(_) => None,
+ }
+ }
+
+ /// Returns the string inside this datum, or `None` if this is a numeric
+ /// datum.
+ pub fn as_string(&self) -> Option<&EncodedStr> {
+ match self {
+ Self::Number(_) => None,
+ Self::String(s) => Some(s),
+ }
+ }
+
+ /// Returns the string inside this datum as a mutable borrow, or `None` if
+ /// this is a numeric datum.
+ pub fn as_string_mut(&'a mut self) -> Option<EncodedStr<'a>> {
+ match self {
+ Self::Number(_) => None,
+ Self::String(s) => Some(*s),
+ }
+ }
+
+ pub fn eq_ignore_trailing_spaces<'b>(&self, other: EncodedDat<'b>) -> bool {
+ match (self, other) {
+ (Self::String(a), EncodedDat::String(b)) => a.eq_ignore_trailing_spaces(b),
+ _ => *self == other,
+ }
+ }
+
+ pub fn quoted(&self) -> QuotedEncodedDat<'a> {
+ QuotedEncodedDat(*self)
+ }
+}
+
+pub struct QuotedEncodedDat<'a>(EncodedDat<'a>);
+
+impl Display for QuotedEncodedDat<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match &self.0 {
+ EncodedDat::Number(None) => write!(f, "SYSMIS"),
+ EncodedDat::Number(Some(number)) => number.display_plain().fmt(f),
+ EncodedDat::String(string) => write!(f, "{}", string.quoted()),
+ }
+ }
+}
+
+impl Display for EncodedDat<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::Number(None) => write!(f, "SYSMIS"),
+ Self::Number(Some(number)) => number.display_plain().fmt(f),
+ Self::String(string) => write!(f, "{string}"),
+ }
+ }
+}
+
+impl<'a> PartialEq for EncodedDat<'a> {
+ fn eq(&self, other: &Self) -> bool {
+ match (self, other) {
+ (Self::Number(Some(l0)), Self::Number(Some(r0))) => {
+ OrderedFloat(*l0) == OrderedFloat(*r0)
+ }
+ (Self::Number(None), Self::Number(None)) => true,
+ (Self::String(l0), Self::String(r0)) => l0 == r0,
+ _ => false,
+ }
+ }
+}
+
+impl<'a> Eq for EncodedDat<'a> {}
+
/// The value of a [Variable](crate::dictionary::Variable).
#[derive(Clone)]
pub enum Datum {
impl Debug for Datum {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
match self {
- Datum::Number(Some(number)) => write!(f, "{number:?}"),
- Datum::Number(None) => write!(f, "SYSMIS"),
- Datum::String(s) => write!(f, "{:?}", s),
+ Self::Number(Some(number)) => write!(f, "{number:?}"),
+ Self::Number(None) => write!(f, "SYSMIS"),
+ Self::String(s) => write!(f, "{:?}", s),
}
}
}
S: serde::Serializer,
{
match self {
- Datum::Number(number) => number.serialize(serializer),
- Datum::String(raw_string) => raw_string.serialize(serializer),
+ Self::Number(number) => number.serialize(serializer),
+ Self::String(raw_string) => raw_string.serialize(serializer),
}
}
}
impl Ord for Datum {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
- (Datum::Number(a), Datum::Number(b)) => match (a, b) {
+ (Self::Number(a), Self::Number(b)) => match (a, b) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(a), Some(b)) => a.total_cmp(b),
},
- (Datum::Number(_), Datum::String(_)) => Ordering::Less,
- (Datum::String(_), Datum::Number(_)) => Ordering::Greater,
- (Datum::String(a), Datum::String(b)) => a.cmp(b),
+ (Self::Number(_), Self::String(_)) => Ordering::Less,
+ (Self::String(_), Self::Number(_)) => Ordering::Greater,
+ (Self::String(a), Self::String(b)) => a.cmp(b),
}
}
}
impl Hash for Datum {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self {
- Datum::Number(number) => number.map(OrderedFloat).hash(state),
- Datum::String(string) => string.hash(state),
+ Self::Number(number) => number.map(OrderedFloat).hash(state),
+ Self::String(string) => string.hash(state),
}
}
}
/// datum.
pub fn as_number(&self) -> Option<Option<f64>> {
match self {
- Datum::Number(number) => Some(*number),
- Datum::String(_) => None,
+ Self::Number(number) => Some(*number),
+ Self::String(_) => None,
}
}
/// datum.
pub fn as_string(&self) -> Option<&RawString> {
match self {
- Datum::Number(_) => None,
- Datum::String(s) => Some(s),
+ Self::Number(_) => None,
+ Self::String(s) => Some(s),
}
}
/// this is a numeric datum.
pub fn as_string_mut(&mut self) -> Option<&mut RawString> {
match self {
- Datum::Number(_) => None,
- Datum::String(s) => Some(s),
+ Self::Number(_) => None,
+ Self::String(s) => Some(s),
+ }
+ }
+
+ pub fn as_encoded<'a>(&'a self, encoding: &'static Encoding) -> EncodedDat<'a> {
+ match self {
+ Datum::Number(number) => EncodedDat::Number(*number),
+ Datum::String(raw_string) => EncodedDat::String(raw_string.as_encoded(encoding)),
}
}
/// non-space characters.
pub fn is_resizable(&self, width: VarWidth) -> bool {
match (self, width) {
- (Datum::Number(_), VarWidth::Numeric) => true,
- (Datum::String(s), VarWidth::String(new_width)) => {
- let new_len = new_width as usize;
- new_len >= s.len() || s.0[new_len..].iter().all(|c| *c == b' ')
- }
+ (Self::Number(_), VarWidth::Numeric) => true,
+ (Self::String(s), VarWidth::String(new_width)) => s.is_resizable(new_width as usize),
_ => false,
}
}
- /// Resizes this datum to the given `width`.
- ///
- /// # Panic
- ///
- /// Panics if resizing would change the datum from numeric to string or vice
- /// versa.
- pub fn resize(&mut self, width: VarWidth) {
+ /// Resizes this datum to the given `width`. Returns an error, without
+ /// modifying the datum, if [is_resizable](Self::is_resizable) would return
+ /// false.
+ pub fn resize(&mut self, width: VarWidth) -> Result<(), ()> {
match (self, width) {
- (Datum::Number(_), VarWidth::Numeric) => (),
- (Datum::String(s), VarWidth::String(new_width)) => s.resize(new_width as usize),
- _ => unreachable!(),
+ (Self::Number(_), VarWidth::Numeric) => Ok(()),
+ (Self::String(s), VarWidth::String(new_width)) => {
+ if s.is_resizable(new_width as usize) {
+ s.resize(new_width as usize);
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+ _ => Err(()),
}
}
/// Returns the [VarWidth] corresponding to this datum.
pub fn width(&self) -> VarWidth {
match self {
- Datum::Number(_) => VarWidth::Numeric,
- Datum::String(s) => VarWidth::String(s.len().try_into().unwrap()),
+ Self::Number(_) => VarWidth::Numeric,
+ Self::String(s) => VarWidth::String(s.len().try_into().unwrap()),
}
}
Self::String(s) => s.trim_end(),
}
}
+
+ pub fn with_encoding(self, encoding: &'static Encoding) -> EncodedDatum {
+ match self {
+ Datum::Number(number) => EncodedDatum::Number(number),
+ Datum::String(raw_string) => EncodedDatum::String(raw_string.with_encoding(encoding)),
+ }
+ }
}
impl From<f64> for Datum {
///
/// The borrowed form of such a string is [EncodedStr].
#[derive(Clone, Debug)]
-pub enum EncodedString {
- /// A string in arbitrary encoding.
- Encoded {
- /// The bytes of the string.
- bytes: Vec<u8>,
-
- /// The string's encoding.
- ///
- /// This can be [UTF_8].
- encoding: &'static Encoding,
- },
+pub struct EncodedString {
+ /// The bytes of the string.
+ bytes: Vec<u8>,
- /// A string that is in UTF-8 and known to be valid.
- Utf8 {
- /// The string.
- s: String,
- },
+ /// The string's encoding.
+ encoding: &'static Encoding,
}
impl EncodedString {
+ pub fn len(&self) -> usize {
+ self.bytes.len()
+ }
+
+ /// Returns the bytes in the string, in its encoding.
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+
+ /// Compares this string and `other` for equality, ignoring trailing ASCII
+ /// spaces in either string for the purpose of comparison. (This is
+ /// acceptable because we assume that the encoding is ASCII-compatible.)
+ pub fn eq_ignore_trailing_spaces<'a>(&self, other: impl Into<EncodedStr<'a>>) -> bool {
+ self.borrowed().eq_ignore_trailing_spaces(other.into())
+ }
+
+ pub fn resize(&mut self, new_len: usize) -> Result<(), ()> {
+ match new_len.cmp(&self.len()) {
+ Ordering::Less => {
+ if !self.as_bytes()[new_len..].iter().all(|b| *b == b' ') {
+ return Err(());
+ }
+ self.bytes.truncate(new_len);
+ }
+ Ordering::Equal => (),
+ Ordering::Greater => self.bytes.extend((self.len()..new_len).map(|_| b' ')),
+ }
+ Ok(())
+ }
+
/// Returns the string's [Encoding].
pub fn encoding(&self) -> &'static Encoding {
- match self {
- EncodedString::Encoded { encoding, .. } => encoding,
- EncodedString::Utf8 { .. } => UTF_8,
- }
+ self.encoding
}
/// Returns a borrowed form of this string.
pub fn borrowed(&self) -> EncodedStr<'_> {
- match self {
- EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded { bytes, encoding },
- EncodedString::Utf8 { s } => EncodedStr::Utf8 { s },
- }
+ EncodedStr::new(&self.bytes, self.encoding)
+ }
+
+ /// Removes any trailing ASCII spaces.
+ pub fn trim_end(&mut self) {
+ while self.bytes.pop_if(|c| *c == b' ').is_some() {}
+ }
+}
+
+impl<'a> From<&'a EncodedString> for EncodedStr<'a> {
+ fn from(value: &'a EncodedString) -> Self {
+ value.borrowed()
}
}
impl<'a> From<EncodedStr<'a>> for EncodedString {
fn from(value: EncodedStr<'a>) -> Self {
- match value {
- EncodedStr::Encoded { bytes, encoding } => Self::Encoded {
- bytes: bytes.into(),
- encoding,
- },
- EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
+ Self {
+ bytes: value.bytes.into(),
+ encoding: value.encoding,
}
}
}
+impl Serialize for EncodedString {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ self.borrowed().serialize(serializer)
+ }
+}
+
/// A borrowed string and its [Encoding].
///
/// The string is not guaranteed to be valid in the encoding.
///
/// The owned form of such a string is [EncodedString].
-pub enum EncodedStr<'a> {
- /// A string in an arbitrary encoding
- Encoded {
- /// The bytes of the string.
- bytes: &'a [u8],
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct EncodedStr<'a> {
+ /// The bytes of the string.
+ bytes: &'a [u8],
- /// The string's encoding.
- ///
- /// This can be [UTF_8].
- encoding: &'static Encoding,
- },
-
- /// A string in UTF-8 that is known to be valid.
- Utf8 {
- /// The string.
- s: &'a str,
- },
+ /// The string's encoding.
+ encoding: &'static Encoding,
}
impl<'a> EncodedStr<'a> {
/// Construct a new string with an arbitrary encoding.
pub fn new(bytes: &'a [u8], encoding: &'static Encoding) -> Self {
- Self::Encoded { bytes, encoding }
+ Self { bytes, encoding }
}
/// Returns this string recoded in UTF-8. Invalid characters will be
///
/// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
pub fn as_str(&self) -> Cow<'_, str> {
- match self {
- EncodedStr::Encoded { bytes, encoding } => {
- encoding.decode_without_bom_handling(bytes).0
- }
- EncodedStr::Utf8 { s } => Cow::from(*s),
- }
+ self.encoding.decode_without_bom_handling(self.bytes).0
}
/// Returns the bytes in the string, in its encoding.
pub fn as_bytes(&self) -> &[u8] {
- match self {
- EncodedStr::Encoded { bytes, .. } => bytes,
- EncodedStr::Utf8 { s } => s.as_bytes(),
- }
+ self.bytes
}
/// Returns this string recoded in `encoding`. Invalid characters will be
///
/// [REPLACEMENT_CHARACTER]: std::char::REPLACEMENT_CHARACTER
pub fn to_encoding(&self, encoding: &'static Encoding) -> Cow<[u8]> {
- match self {
- EncodedStr::Encoded { bytes, encoding } => {
- let utf8 = encoding.decode_without_bom_handling(bytes).0;
- match encoding.encode(&utf8).0 {
- Cow::Borrowed(_) => {
- // Recoding into UTF-8 and then back did not change anything.
- Cow::from(*bytes)
- }
- Cow::Owned(owned) => Cow::Owned(owned),
- }
+ let utf8 = self.as_str();
+ match encoding.encode(&utf8).0 {
+ Cow::Borrowed(_) => {
+ // Recoding into UTF-8 and then back did not change anything.
+ Cow::from(self.bytes)
}
- EncodedStr::Utf8 { s } => encoding.encode(s).0,
+ Cow::Owned(owned) => Cow::Owned(owned),
}
}
/// Returns true if this string is empty.
pub fn is_empty(&self) -> bool {
- match self {
- EncodedStr::Encoded { bytes, .. } => bytes.is_empty(),
- EncodedStr::Utf8 { s } => s.is_empty(),
- }
+ self.bytes.is_empty()
+ }
+
+ pub fn eq_ignore_trailing_spaces<'b>(&self, other: EncodedStr<'b>) -> bool {
+ self.bytes.iter().zip_longest(other.bytes).all(|elem| {
+ let (left, right) = elem.or(&b' ', &b' ');
+ *left == *right
+ })
}
/// Returns a helper for displaying this string in double quotes.
}
}
+impl<'a> Display for EncodedStr<'a> {
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ write!(f, "{}", self.as_str())
+ }
+}
+
+impl<'a> Debug for EncodedStr<'a> {
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ write!(f, "{:?}", self.as_str())
+ }
+}
+
impl<'a> From<&'a str> for EncodedStr<'a> {
fn from(s: &'a str) -> Self {
- Self::Utf8 { s }
+ Self {
+ bytes: s.as_bytes(),
+ encoding: UTF_8,
+ }
}
}
impl<'a> From<&'a String> for EncodedStr<'a> {
fn from(s: &'a String) -> Self {
- Self::Utf8 { s: s.as_str() }
+ Self::from(s.as_str())
+ }
+}
+
+impl<'a> Serialize for EncodedStr<'a> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ self.as_str().serialize(serializer)
}
}
use unicase::UniCase;
use crate::{
- data::Datum,
+ data::{Datum, EncodedDat, EncodedDatum},
format::{DisplayPlain, Format},
identifier::{ByIdentifier, HasIdentifier, Identifier},
output::pivot::{
Some(Value::new_user_text(variable.write_format.to_string()))
}
VariableField::MissingValues if !variable.missing_values.is_empty() => {
- Some(Value::new_user_text(
- variable
- .missing_values
- .display(variable.encoding)
- .to_string(),
- ))
+ Some(Value::new_user_text(variable.missing_values.to_string()))
}
VariableField::MissingValues => None,
}
for (datum, label) in sorted_value_labels {
let mut value = Value::new_variable_value(variable, datum)
.with_show_value_label(Some(Show::Value));
- if variable.missing_values.contains(datum) {
+ if variable
+ .missing_values
+ .contains(datum.as_encoded(variable.encoding()))
+ {
value.add_footnote(&missing_footnote);
}
group.push(value);
}
pub fn resize(&mut self, width: VarWidth) {
- if self.missing_values.is_resizable(width) {
- self.missing_values.resize(width);
- } else {
- self.missing_values = MissingValues::default();
- }
+ let _ = self.missing_values.resize(width);
- if self.value_labels.is_resizable(width) {
- self.value_labels.resize(width);
- } else {
- self.value_labels = ValueLabels::default();
- }
+ self.value_labels.resize(width);
self.print_format.resize(width);
self.write_format.resize(width);
self.0 = self
.0
.drain()
- .map(|(mut datum, string)| {
- datum.resize(width);
- (datum, string)
- })
+ .filter_map(|(mut datum, string)| datum.resize(width).is_ok().then(|| (datum, string)))
.collect();
}
}
#[derive(Clone, Default, Serialize)]
pub struct MissingValues {
/// Individual missing values, up to 3 of them.
- values: Vec<Datum>,
+ values: Vec<EncodedDatum>,
/// Optional range of missing values.
range: Option<MissingValueRange>,
impl Debug for MissingValues {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- DisplayMissingValues {
- mv: self,
- encoding: None,
+ write!(f, "{}", self)
+ }
+}
+
+impl Display for MissingValues {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ if let Some(range) = &self.range {
+ write!(f, "{range}")?;
+ if !self.values.is_empty() {
+ write!(f, "; ")?;
+ }
}
- .fmt(f)
+
+ for (i, value) in self.values.iter().enumerate() {
+ if i > 0 {
+ write!(f, "; ")?;
+ }
+ write!(f, "{}", value.quoted())?;
+ }
+
+ if self.is_empty() {
+ write!(f, "none")?;
+ }
+ Ok(())
}
}
}
impl MissingValues {
- pub fn values(&self) -> &[Datum] {
+ pub fn clear(&mut self) {
+ *self = Self::default();
+ }
+ pub fn values(&self) -> &[EncodedDatum] {
&self.values
}
}
pub fn new(
- mut values: Vec<Datum>,
+ mut values: Vec<EncodedDatum>,
range: Option<MissingValueRange>,
) -> Result<Self, MissingValuesError> {
if values.len() > 3 {
let mut var_type = None;
for value in values.iter_mut() {
value.trim_end();
- match value.width() {
- VarWidth::String(w) if w > 8 => return Err(MissingValuesError::TooWide),
- _ => (),
+ if value.width().is_long_string() {
+ return Err(MissingValuesError::TooWide);
}
if var_type.is_some_and(|t| t != value.var_type()) {
return Err(MissingValuesError::MixedTypes);
}
}
- pub fn contains(&self, value: &Datum) -> bool {
+ pub fn contains(&self, value: EncodedDat) -> bool {
if self
.values
.iter()
return true;
}
- match value {
- Datum::Number(Some(number)) => self.range.is_some_and(|range| range.contains(*number)),
- _ => false,
- }
- }
-
- pub fn is_resizable(&self, width: VarWidth) -> bool {
- self.values.iter().all(|datum| datum.is_resizable(width))
- && self.range.iter().all(|range| range.is_resizable(width))
- }
-
- pub fn resize(&mut self, width: VarWidth) {
- for datum in &mut self.values {
- datum.resize(width);
- }
- if let Some(range) = &mut self.range {
- range.resize(width);
- }
- }
-
- pub fn display(&self, encoding: &'static Encoding) -> DisplayMissingValues<'_> {
- DisplayMissingValues {
- mv: self,
- encoding: Some(encoding),
+ if let EncodedDat::Number(Some(number)) = value
+ && let Some(range) = self.range
+ {
+ range.contains(number)
+ } else {
+ false
}
}
-}
-
-pub struct DisplayMissingValues<'a> {
- mv: &'a MissingValues,
- encoding: Option<&'static Encoding>,
-}
-impl<'a> Display for DisplayMissingValues<'a> {
- fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
- if let Some(range) = &self.mv.range {
- write!(f, "{range}")?;
- if !self.mv.values.is_empty() {
- write!(f, "; ")?;
+ pub fn resize(&mut self, width: VarWidth) -> Result<(), ()> {
+ fn inner(this: &mut MissingValues, width: VarWidth) -> Result<(), ()> {
+ for datum in &mut this.values {
+ datum.resize(width)?;
}
- }
-
- for (i, value) in self.mv.values.iter().enumerate() {
- if i > 0 {
- write!(f, "; ")?;
- }
- match self.encoding {
- Some(encoding) => value.display_plain(encoding).fmt(f)?,
- None => value.fmt(f)?,
+ if let Some(range) = &mut this.range {
+ range.resize(width)?;
}
+ Ok(())
}
-
- if self.mv.is_empty() {
- write!(f, "none")?;
- }
- Ok(())
+ inner(self, width).inspect_err(|_| self.clear())
}
}
-#[derive(Copy, Clone, Serialize)]
+#[derive(Copy, Clone, Debug, Serialize)]
pub enum MissingValueRange {
In { low: f64, high: f64 },
From { low: f64 },
}
}
- pub fn is_resizable(&self, width: VarWidth) -> bool {
- width.is_numeric()
- }
-
- pub fn resize(&self, width: VarWidth) {
- assert_eq!(width, VarWidth::Numeric);
+ pub fn resize(&self, width: VarWidth) -> Result<(), ()> {
+ if width.is_numeric() {
+ Ok(())
+ } else {
+ Err(())
+ }
}
}