endian::Endian,
format::{Error as FormatError, Format, UncheckedFormat},
identifier::{ByIdentifier, Error as IdError, Identifier},
- sys::encoding::Error as EncodingError,
- sys::raw::{
- self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
- FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
- LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues,
- MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStrArray, RawWidth,
- ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord,
- VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
+ sys::{
+ encoding::Error as EncodingError,
+ raw::{
+ self, Cases, DecodedRecord, DocumentRecord, EncodedString, EncodingRecord, Extension,
+ FileAttributeRecord, FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName,
+ LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord,
+ MissingValues, MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord,
+ RawStrArray, RawWidth, ValueLabel, ValueLabelRecord, VarDisplayRecord,
+ VariableAttributeRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord,
+ ZHeader, ZTrailer,
+ },
},
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
variable.label = input.label.clone();
- variable.missing_values = input.missing_values.clone();
+ variable.missing_values = input.missing_values.with_encoding(encoding);
variable.print_format = decode_format(
input.print_format,
value.clear();
value.extend_from_slice(v.0.as_slice());
value.resize(variable.width.as_string_width().unwrap(), b' ');
- Value::String(Box::from(value.as_slice()))
+ Value::String(EncodedString::new(&*value, encoding))
})
.collect::<Vec<_>>();
variable.missing_values = MissingValues {
use flate2::read::ZlibDecoder;
use num::Integer;
use std::{
- borrow::{Borrow, Cow},
+ borrow::Cow,
cell::RefCell,
collections::{HashMap, VecDeque},
fmt::{Debug, Display, Formatter, Result as FmtResult},
}
#[derive(Clone, Default)]
-pub struct MissingValues<S = Box<[u8]>>
+pub struct MissingValues<S = RawString>
where
S: Debug,
{
pub values: Vec<Value<S>>,
/// Optional range of missing values.
- pub range: Option<MissingValueRange<S>>,
+ pub range: Option<MissingValueRange>,
}
-#[derive(Clone)]
-pub enum MissingValueRange<S = Box<[u8]>>
-where
- S: Debug,
-{
- In { low: Value<S>, high: Value<S> },
- From { low: Value<S> },
- To { high: Value<S> },
+#[derive(Copy, Clone)]
+pub enum MissingValueRange {
+ In { low: f64, high: f64 },
+ From { low: f64 },
+ To { high: f64 },
+}
+
+impl MissingValueRange {
+ pub fn new(low: f64, high: f64) -> Self {
+ const LOWEST: f64 = f64::MIN.next_up();
+ match (low, high) {
+ (f64::MIN | LOWEST, _) => Self::To { high },
+ (_, f64::MAX) => Self::From { low },
+ (_, _) => Self::In { low, high },
+ }
+ }
+
+ pub fn low(&self) -> Option<f64> {
+ match self {
+ MissingValueRange::In { low, .. } | MissingValueRange::From { low } => Some(*low),
+ MissingValueRange::To { .. } => None,
+ }
+ }
+
+ pub fn high(&self) -> Option<f64> {
+ match self {
+ MissingValueRange::In { high, .. } | MissingValueRange::To { high } => Some(*high),
+ MissingValueRange::From { .. } => None,
+ }
+ }
}
impl<S> Debug for MissingValues<S>
if !self.values.is_empty() {
write!(f, ", ")?;
}
- match range {
- MissingValueRange::In { low, high } => write!(f, "{low:?} THRU {high:?}")?,
- MissingValueRange::From { low } => write!(f, "{low:?} THRU HI")?,
- MissingValueRange::To { high } => write!(f, "LOW THRU {high:?}")?,
+ match range.low() {
+ Some(low) => write!(f, "{low:?}")?,
+ None => write!(f, "LOW")?,
+ }
+ write!(f, " THRU ")?;
+ match range.high() {
+ Some(high) => write!(f, "{high:?}")?,
+ None => write!(f, "HIGH")?,
}
}
.map(|v| Value::Number(endian.parse(v)))
.collect();
- const LOWEST: f64 = f64::MIN.next_up();
- let range =
- range.map(
- |(low, high)| match (endian.parse(low), endian.parse(high)) {
- (f64::MIN | LOWEST, high) => MissingValueRange::To {
- high: Value::Number(Some(high)),
- },
- (low, f64::MAX) => MissingValueRange::From {
- low: Value::Number(Some(low)),
- },
- (low, high) => MissingValueRange::In {
- low: Value::Number(Some(low)),
- high: Value::Number(Some(high)),
- },
- },
- );
+ let range = range.map(|(low, high)| {
+ MissingValueRange::new(endian.parse(low), endian.parse(high))
+ });
return Ok(Self { values, range });
}
Ok(VarWidth::String(_)) if range.is_some() => warn(Warning::MissingValueStringRange),
let width = width.min(8) as usize;
let values = values
.into_iter()
- .map(|value| Value::String(Box::from(&value[..width])))
+ .map(|value| Value::String(RawString::from(&value[..width])))
.collect();
return Ok(Self {
values,
}
Ok(Self::default())
}
+
+ pub fn with_encoding(&self, encoding: &'static Encoding) -> MissingValues<EncodedString> {
+ MissingValues {
+ values: self
+ .values
+ .iter()
+ .map(|value| value.with_encoding(encoding))
+ .collect(),
+ range: self.range,
+ }
+ }
}
#[derive(Clone)]
pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
EncodedStr::new(&self.0, encoding)
}
+ pub fn with_encoding(&self, encoding: &'static Encoding) -> EncodedString {
+ EncodedString::new(&*self.0, encoding)
+ }
pub fn as_slice(&self) -> &[u8] {
&*self.0
}
}
}
-#[derive(Clone, Debug)]
+#[derive(Clone)]
pub enum EncodedString {
Encoded {
bytes: Vec<u8>,
encoding: &'static Encoding,
},
- Utf8 {
- s: String,
- },
+ Utf8(String),
}
impl EncodedString {
+ /// Creates a new `EncodedString` from `bytes` and `encoding`.
+ ///
+ /// It's cheaper to use `EncodedString::from(string)` if the input is in a
+ /// `&str` or `String`.
+ pub fn new(bytes: impl Into<Vec<u8>>, encoding: &'static Encoding) -> Self {
+ let bytes: Vec<u8> = bytes.into();
+ if encoding == UTF_8 {
+ match String::from_utf8(bytes) {
+ Ok(string) => Self::Utf8(string),
+ Err(error) => Self::Encoded {
+ bytes: error.into_bytes(),
+ encoding,
+ },
+ }
+ } else {
+ Self::Encoded { bytes, encoding }
+ }
+ }
+
pub fn borrowed(&self) -> EncodedStr<'_> {
match self {
EncodedString::Encoded { bytes, encoding } => EncodedStr::Encoded { bytes, encoding },
- EncodedString::Utf8 { s } => EncodedStr::Utf8 { s },
+ EncodedString::Utf8(s) => EncodedStr::Utf8 { s },
}
}
pub fn as_utf8_bytes(&self) -> Option<&[u8]> {
match self {
EncodedString::Encoded { bytes, encoding } if *encoding == UTF_8 => Some(&bytes),
- EncodedString::Utf8 { s } => Some(s.as_bytes()),
+ EncodedString::Utf8(s) => Some(s.as_bytes()),
_ => None,
}
}
pub fn as_encoded(&self) -> (&[u8], &'static Encoding) {
match self {
EncodedString::Encoded { bytes, encoding } => (&bytes, encoding),
- EncodedString::Utf8 { s } => (s.as_bytes(), UTF_8),
+ EncodedString::Utf8(s) => (s.as_bytes(), UTF_8),
}
}
}
+impl Debug for EncodedString {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ match self {
+ Self::Encoded { bytes, encoding } => {
+ write!(f, "{:?}({})", self.borrowed().to_utf8(), encoding.name())
+ }
+ Self::Utf8(string) => write!(f, "{string:?}"),
+ }
+ }
+}
+
+impl From<String> for EncodedString {
+ fn from(value: String) -> Self {
+ Self::Utf8(value)
+ }
+}
+
+impl From<&'_ str> for EncodedString {
+ fn from(value: &'_ str) -> Self {
+ Self::Utf8(value.into())
+ }
+}
+
+impl Default for EncodedString {
+ fn default() -> Self {
+ Self::Utf8(String::new())
+ }
+}
+
impl<'a> From<EncodedStr<'a>> for EncodedString {
fn from(value: EncodedStr<'a>) -> Self {
match value {
bytes: bytes.into(),
encoding,
},
- EncodedStr::Utf8 { s } => Self::Utf8 { s: s.into() },
+ EncodedStr::Utf8 { s } => Self::Utf8(s.into()),
}
}
}