};
use encoding_rs::Encoding;
+use enum_map::{Enum, EnumMap};
use indexmap::IndexSet;
use num::integer::div_ceil;
use ordered_float::OrderedFloat;
use crate::{
format::Format,
identifier::{ByIdentifier, HasIdentifier, Identifier},
+ output::pivot::{Axis3, Dimension, Group, PivotTable, Value as PivotValue},
sys::raw::{Alignment, CategoryLabels, Measure, MissingValues, RawString, VarType},
};
pub fn rename_var(&mut self, index: usize, new_name: Identifier) {
assert!(self.try_rename_var(index, new_name));
}
+
+ pub fn display_variables(&self) -> DisplayVariables {
+ DisplayVariables::new(self)
+ }
+}
+
+pub struct DisplayVariables<'a> {
+ dictionary: &'a Dictionary,
+ fields: EnumMap<VariableField, bool>,
+}
+
+impl<'a> DisplayVariables<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self {
+ dictionary,
+ fields: EnumMap::from_fn(|_field: VariableField| true),
+ }
+ }
+ pub fn to_pivot_table(&self) -> PivotTable {
+ let mut names = Group::new("Name");
+ for variable in &self.dictionary.variables {
+ names.push(PivotValue::new_variable(variable));
+ }
+
+ let mut attributes = Group::new("Attributes");
+ let mut columns = Vec::new();
+ for field in self
+ .fields
+ .iter()
+ .filter_map(|(field, include)| include.then_some(field))
+ {
+ columns.push((field, attributes.len()));
+ attributes.push(field.as_str());
+ }
+
+ let mut pt = PivotTable::new(vec![
+ (Axis3::Y, Dimension::new(names)),
+ (Axis3::X, Dimension::new(attributes)),
+ ]);
+ for (var_index, variable) in self.dictionary.variables.iter().enumerate() {
+ for (field, field_index) in &columns {
+ if let Some(value) =
+ Self::get_field_value(var_index, variable, *field, self.dictionary.encoding)
+ {
+ pt.insert(&[var_index, *field_index], value);
+ }
+ }
+ }
+
+ pt
+ }
+
+ fn get_field_value(
+ index: usize,
+ variable: &Variable,
+ field: VariableField,
+ encoding: &'static Encoding,
+ ) -> Option<PivotValue> {
+ match field {
+ VariableField::Position => Some(PivotValue::new_integer(Some(index as f64 + 1.0))),
+ VariableField::Label => variable
+ .label()
+ .map(|label| PivotValue::new_user_text(label)),
+ VariableField::Measure => variable
+ .measure
+ .map(|measure| PivotValue::new_text(measure.as_str())),
+ VariableField::Role => variable
+ .role
+ .map(|role| PivotValue::new_text(role.as_str())),
+ VariableField::Width => {
+ Some(PivotValue::new_integer(Some(variable.display_width as f64)))
+ }
+ VariableField::Alignment => Some(PivotValue::new_text(variable.alignment.as_str())),
+ VariableField::PrintFormat => {
+ Some(PivotValue::new_user_text(variable.print_format.to_string()))
+ }
+ VariableField::WriteFormat => {
+ Some(PivotValue::new_user_text(variable.write_format.to_string()))
+ }
+ VariableField::MissingValues => Some(PivotValue::new_user_text(
+ variable.missing_values.display(encoding).to_string(),
+ )),
+ }
+ }
+}
+
+#[derive(Copy, Clone, Debug, Enum)]
+enum VariableField {
+ Position,
+ Label,
+ Measure,
+ Role,
+ Width,
+ Alignment,
+ PrintFormat,
+ WriteFormat,
+ MissingValues,
+}
+
+impl VariableField {
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ VariableField::Position => "Position",
+ VariableField::Label => "Label",
+ VariableField::Measure => "Measurement Level",
+ VariableField::Role => "Role",
+ VariableField::Width => "Width",
+ VariableField::Alignment => "Alignment",
+ VariableField::PrintFormat => "Print Format",
+ VariableField::WriteFormat => "Write Format",
+ VariableField::MissingValues => "Missing Values",
+ }
+ }
}
fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
_ => Err(InvalidRole::UnknownRole(integer.to_string())),
}
}
+
+ fn as_str(&self) -> &'static str {
+ match self {
+ Role::Input => "Input",
+ Role::Target => "Target",
+ Role::Both => "Both",
+ Role::Partition => "Partition",
+ Role::Split => "Split",
+ }
+ }
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub fn is_string(&self) -> bool {
self.width.is_string()
}
+
+ pub fn label(&self) -> Option<&String> {
+ self.label.as_ref()
+ }
}
impl HasIdentifier for Variable {
pub fn display(&self, format: Format, encoding: &'static Encoding) -> DisplayValue {
DisplayValue::new(format, self, encoding)
}
+
+ pub fn display_plain(&self, encoding: &'static Encoding) -> DisplayValuePlain {
+ DisplayValuePlain {
+ value: self,
+ encoding,
+ quote_strings: true,
+ }
+ }
+}
+
+pub struct DisplayValuePlain<'a> {
+ value: &'a Value,
+ encoding: &'static Encoding,
+ quote_strings: bool,
+}
+
+impl DisplayValuePlain<'_> {
+ pub fn without_quotes(self) -> Self {
+ Self {
+ quote_strings: false,
+ ..self
+ }
+ }
+}
+
+impl Display for DisplayValuePlain<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ match self.value {
+ Value::Number(None) => write!(f, "SYSMIS"),
+ Value::Number(Some(number)) if number.abs() < 0.0005 || number.abs() > 1e15 => {
+ write!(f, "{number:.}")
+ }
+ Value::Number(Some(number)) => write!(f, "{number:.e}"),
+ Value::String(string) => {
+ if self.quote_strings {
+ write!(f, "\"{}\"", string.display(self.encoding))
+ } else {
+ string.display(self.encoding).fmt(f)
+ }
+ }
+ }
+ }
}
impl Display for DisplayValue<'_, '_> {
pub fn class(&self) -> Class {
self.into()
}
+
+ pub fn as_str(&self) -> &str {
+ self.0.as_ref()
+ }
}
impl PartialEq<str> for Identifier {
use tlo::parse_tlo;
use crate::{
- dictionary::Value as DataValue,
+ dictionary::{Value as DataValue, Variable},
format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
settings::{Settings, Show},
sys::raw::VarType,
mod look_xml;
#[cfg(test)]
-mod test;
+pub mod test;
mod tlo;
/// Areas of a pivot table for styling purposes.
}
}
+impl From<Leaf> for Category {
+ fn from(group: Leaf) -> Self {
+ Self::Leaf(group)
+ }
+}
+
impl From<Value> for Category {
fn from(name: Value) -> Self {
- Self::Leaf(Leaf::new(name))
+ Leaf::new(name).into()
}
}
cell_index(data_indexes, self.dimensions.iter().map(|d| d.len()))
}
- fn insert(&mut self, data_indexes: &[usize], value: Value) {
+ pub fn insert(&mut self, data_indexes: &[usize], value: Value) {
self.cells.insert(self.cell_index(data_indexes), value);
}
- fn get(&self, data_indexes: &[usize]) -> Option<&Value> {
+ pub fn get(&self, data_indexes: &[usize]) -> Option<&Value> {
self.cells.get(&self.cell_index(data_indexes))
}
value_label: None,
}))
}
+ pub fn new_variable(variable: &Variable) -> Self {
+ Self::new(ValueInner::Variable(VariableValue {
+ show: None,
+ var_name: String::from(variable.name.as_str()),
+ variable_label: variable.label.clone(),
+ }))
+ }
pub fn new_number(x: Option<f64>) -> Self {
Self::new_number_with_format(x, Format::F8_2)
}
}
#[track_caller]
-fn assert_rendering(name: &str, pivot_table: &PivotTable, expected: &str) {
+pub fn assert_rendering(name: &str, pivot_table: &PivotTable, expected: &str) {
let actual = pivot_table.to_string();
if actual != expected {
eprintln!("Unexpected pivot table rendering:\n--- expected\n+++ actual");
endian::Endian,
format::{Error as FormatError, Format, UncheckedFormat},
identifier::{ByIdentifier, Error as IdError, Identifier},
- sys::encoding::Error as EncodingError,
- sys::raw::{
- self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
- FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
- LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues,
- MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStrArray, RawWidth,
- ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord,
- VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
+ sys::{
+ encoding::Error as EncodingError,
+ raw::{
+ self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension,
+ FileAttributeRecord, FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName,
+ LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord,
+ MissingValues, MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord,
+ RawStrArray, RawString, RawWidth, ValueLabel, ValueLabelRecord, VarDisplayRecord,
+ VariableAttributeRecord, VariableRecord, VariableSetRecord, VeryLongStringsRecord,
+ ZHeader, ZTrailer,
+ },
},
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
}
}
- let mut value = Vec::new();
for record in headers
.long_string_missing_values
.drain(..)
.missing_values
.into_iter()
.map(|v| {
- value.clear();
- value.extend_from_slice(v.0.as_slice());
- value.resize(variable.width.as_string_width().unwrap(), b' ');
- Value::String(Box::from(value.as_slice()))
+ let mut value = RawString::from(v.0.as_slice());
+ value.resize(variable.width.as_string_width().unwrap());
+ Value::String(value)
})
.collect::<Vec<_>>();
variable.missing_values = MissingValues {
use flate2::read::ZlibDecoder;
use num::Integer;
use std::{
- borrow::Cow,
+ borrow::{Borrow, Cow},
cell::RefCell,
collections::{HashMap, VecDeque},
fmt::{Debug, Display, Formatter, Result as FmtResult},
io::{Error as IoError, Read, Seek, SeekFrom},
mem::take,
num::NonZeroU8,
- ops::Range,
+ ops::{Deref, Range},
rc::Rc,
str::from_utf8,
};
}
#[derive(Clone, Default)]
-pub struct MissingValues<S = Box<[u8]>>
-where
- S: Debug,
-{
+pub struct MissingValues {
/// Individual missing values, up to 3 of them.
- pub values: Vec<Value<S>>,
+ pub values: Vec<Value>,
/// Optional range of missing values.
pub range: Option<MissingValueRange>,
}
-#[derive(Copy, Clone)]
-pub enum MissingValueRange {
- In { low: f64, high: f64 },
- From { low: f64 },
- To { high: f64 },
-}
-
-impl MissingValueRange {
- pub fn new(low: f64, high: f64) -> Self {
- const LOWEST: f64 = f64::MIN.next_up();
- match (low, high) {
- (f64::MIN | LOWEST, _) => Self::To { high },
- (_, f64::MAX) => Self::From { low },
- (_, _) => Self::In { low, high },
- }
- }
-
- pub fn low(&self) -> Option<f64> {
- match self {
- MissingValueRange::In { low, .. } | MissingValueRange::From { low } => Some(*low),
- MissingValueRange::To { .. } => None,
- }
- }
-
- pub fn high(&self) -> Option<f64> {
- match self {
- MissingValueRange::In { high, .. } | MissingValueRange::To { high } => Some(*high),
- MissingValueRange::From { .. } => None,
- }
- }
-}
-
-impl<S> Debug for MissingValues<S>
-where
- S: Debug,
-{
+impl Debug for MissingValues {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- for (i, value) in self.values.iter().enumerate() {
- if i > 0 {
- write!(f, ", ")?;
- }
- write!(f, "{value:?}")?;
- }
-
- if let Some(range) = &self.range {
- if !self.values.is_empty() {
- write!(f, ", ")?;
- }
- match range {
- MissingValueRange::In { low, high } => write!(f, "{low:?} THRU {high:?}")?,
- MissingValueRange::From { low } => write!(f, "{low:?} THRU HI")?,
- MissingValueRange::To { high } => write!(f, "LOW THRU {high:?}")?,
- }
+ DisplayMissingValues {
+ mv: self,
+ encoding: None,
}
-
- if self.is_empty() {
- write!(f, "none")?;
- }
-
- Ok(())
+ .fmt(f)
}
}
-impl<S> MissingValues<S>
-where
- S: Debug,
-{
+impl MissingValues {
fn is_empty(&self) -> bool {
self.values.is_empty() && self.range.is_none()
}
-}
-impl MissingValues {
fn read<R: Read + Seek>(
r: &mut R,
offset: u64,
let width = width.min(8) as usize;
let values = values
.into_iter()
- .map(|value| Value::String(Box::from(&value[..width])))
+ .map(|value| Value::String(RawString::from(&value[..width])))
.collect();
return Ok(Self {
values,
}
Ok(Self::default())
}
+
+ pub fn display(&self, encoding: &'static Encoding) -> DisplayMissingValues<'_> {
+ DisplayMissingValues {
+ mv: self,
+ encoding: Some(encoding),
+ }
+ }
+}
+
+pub struct DisplayMissingValues<'a> {
+ mv: &'a MissingValues,
+ encoding: Option<&'static Encoding>,
+}
+
+impl<'a> Display for DisplayMissingValues<'a> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ for (i, value) in self.mv.values.iter().enumerate() {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ match self.encoding {
+ Some(encoding) => value.display_plain(encoding).fmt(f)?,
+ None => value.fmt(f)?,
+ }
+ }
+
+ if let Some(range) = &self.mv.range {
+ if !self.mv.values.is_empty() {
+ write!(f, ", ")?;
+ }
+ write!(f, "{range}")?;
+ }
+
+ if self.mv.is_empty() {
+ write!(f, "none")?;
+ }
+ Ok(())
+ }
+}
+
+#[derive(Copy, Clone)]
+pub enum MissingValueRange {
+ In { low: f64, high: f64 },
+ From { low: f64 },
+ To { high: f64 },
+}
+
+impl MissingValueRange {
+ pub fn new(low: f64, high: f64) -> Self {
+ const LOWEST: f64 = f64::MIN.next_up();
+ match (low, high) {
+ (f64::MIN | LOWEST, _) => Self::To { high },
+ (_, f64::MAX) => Self::From { low },
+ (_, _) => Self::In { low, high },
+ }
+ }
+
+ pub fn low(&self) -> Option<f64> {
+ match self {
+ MissingValueRange::In { low, .. } | MissingValueRange::From { low } => Some(*low),
+ MissingValueRange::To { .. } => None,
+ }
+ }
+
+ pub fn high(&self) -> Option<f64> {
+ match self {
+ MissingValueRange::In { high, .. } | MissingValueRange::To { high } => Some(*high),
+ MissingValueRange::From { .. } => None,
+ }
+ }
+}
+
+impl Display for MissingValueRange {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ match self {
+ MissingValueRange::In { low, high } => write!(f, "{low:?} THRU {high:?}"),
+ MissingValueRange::From { low } => write!(f, "{low:?} THRU HI"),
+ MissingValueRange::To { high } => write!(f, "LOW THRU {high:?}"),
+ }
+ }
}
#[derive(Clone)]
}
}
+/// An owned string in an unspecified encoding.
+///
+/// We assume that the encoding is one supported by [encoding_rs] with byte
+/// units (that is, not a `UTF-16` encoding). All of these encodings have some
+/// basic ASCII compatibility.
+///
+/// A [RawString] owns its contents and can grow and shrink, like a [Vec] or
+/// [String]. For a borrowed raw string, see [RawStr].
#[derive(Clone, PartialEq, Default, Eq, PartialOrd, Ord, Hash)]
pub struct RawString(pub Vec<u8>);
pub fn as_encoded(&self, encoding: &'static Encoding) -> EncodedStr<'_> {
EncodedStr::new(&self.0, encoding)
}
+ pub fn resize(&mut self, len: usize) {
+ self.0.resize(len, b' ');
+ }
+}
+
+impl Borrow<RawStr> for RawString {
+ fn borrow(&self) -> &RawStr {
+ RawStr::from_bytes(&self.0)
+ }
+}
+
+impl Deref for RawString {
+ type Target = RawStr;
+
+ fn deref(&self) -> &Self::Target {
+ self.borrow()
+ }
}
impl From<Cow<'_, [u8]>> for RawString {
}
}
+/// A borrowed string in an unspecified encoding.
+///
+/// We assume that the encoding is one supported by [encoding_rs] with byte
+/// units (that is, not a `UTF-16` encoding). All of these encodings have some
+/// basic ASCII compatibility.
+///
+/// For an owned raw string, see [RawString].
+#[repr(transparent)]
+#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct RawStr(pub [u8]);
+
+impl RawStr {
+ pub fn from_bytes(bytes: &[u8]) -> &Self {
+ // SAFETY: `RawStr` is a transparent wrapper around `[u8]`, so we can
+ // turn a reference to the wrapped type into a reference to the wrapper
+ // type.
+ unsafe { &*(bytes as *const [u8] as *const Self) }
+ }
+
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ /// Returns an object that implements [Display] for printing this [RawStr],
+ /// given that it is encoded in `encoding`.
+ pub fn display(&self, encoding: &'static Encoding) -> DisplayRawString {
+ DisplayRawString(encoding.decode_without_bom_handling(&self.0).0)
+ }
+}
+
+pub struct DisplayRawString<'a>(Cow<'a, str>);
+
+impl<'a> Display for DisplayRawString<'a> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ write!(f, "{}", &self.0)
+ }
+}
+
+impl Debug for RawStr {
+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
+ write!(f, "{:?}", default_decode(self.as_bytes()))
+ }
+}
+
#[derive(Copy, Clone)]
pub struct RawStrArray<const N: usize>(pub [u8; N]);
_ => Err(Warning::InvalidMeasurement(source)),
}
}
+
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ Measure::Nominal => "Nominal",
+ Measure::Ordinal => "Ordinal",
+ Measure::Scale => "Scale",
+ }
+ }
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
VarType::String => Self::Left,
}
}
+
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ Alignment::Left => "Left",
+ Alignment::Right => "Right",
+ Alignment::Center => "Center",
+ }
+ }
}
#[derive(Clone, Debug)]
use crate::{
endian::Endian,
+ output::pivot::test::assert_rendering,
sys::{
cooked::{decode, Headers},
raw::{encoding_from_headers, Decoder, Reader, Record},
assert_eq!(metadata.n_cases, Some(1));
assert_eq!(metadata.version, Some((1, 2, 3)));
println!("{metadata:#?}");
+ let pt = dictionary.display_variables().to_pivot_table();
+ assert_rendering("variable_labels_and_missing_values", &pt, "");
}