use std::{
borrow::Cow,
cmp::Ordering,
- collections::{BTreeSet, HashMap, HashSet},
+ collections::{BTreeMap, BTreeSet, HashMap, HashSet},
fmt::{Debug, Formatter, Result as FmtResult},
hash::Hash,
ops::{Bound, RangeBounds, RangeInclusive},
OutputMrsets::new(self)
}
+ pub fn output_attributes(&self) -> OutputAttributes {
+ OutputAttributes::new(self)
+ }
+
pub fn to_pivot_rows(&self) -> (Group, Vec<Value>) {
let mut group = Group::new("Dictionary Information");
let mut values = Vec::new();
}
}
+pub struct OutputAttributes<'a> {
+ dictionary: &'a Dictionary,
+
+ /// Include attributes whose names begin with `@`?
+ include_at: bool,
+}
+
+impl<'a> OutputAttributes<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self {
+ dictionary,
+ include_at: true,
+ }
+ }
+ pub fn without_at(self) -> Self {
+ Self {
+ include_at: false,
+ ..self
+ }
+ }
+ pub fn any_attributes(&self) -> bool {
+ self.attribute_sets().next().is_some()
+ }
+ fn attribute_sets(&self) -> impl Iterator<Item = (Option<&Variable>, &Attributes)> {
+ std::iter::once((None, &self.dictionary.attributes))
+ .chain(
+ self.dictionary
+ .variables
+ .iter()
+ .map(|var| (Some(&**var), &var.attributes)),
+ )
+ .filter(|(_name, attributes)| attributes.has_any(self.include_at))
+ }
+ pub fn to_pivot_table(&self) -> Option<PivotTable> {
+ if !self.any_attributes() {
+ return None;
+ }
+
+ let mut variables = Group::new("Variable and Name").with_label_shown();
+ let mut data = Vec::new();
+ for (variable, attributes) in self.attribute_sets() {
+ let group_name = match variable {
+ Some(variable) => Value::new_variable(variable),
+ None => Value::new_text("(dataset)"),
+ };
+ let mut group = Group::new(group_name);
+ for (name, values) in &attributes.0 {
+ if values.len() == 1 {
+ group.push(Value::new_user_text(name.as_str()));
+ } else {
+ for index in 1..=values.len() {
+ group.push(Value::new_user_text(format!("{name}[{index}]")));
+ }
+ }
+ for value in values {
+ data.push(Value::new_user_text(value));
+ }
+ }
+ variables.push(group);
+ }
+ let values = Group::new("Value").with("Value");
+ let mut pt = PivotTable::new([
+ (Axis3::X, Dimension::new(values)),
+ (Axis3::Y, Dimension::new(variables)),
+ ]);
+ for (row, datum) in data.into_iter().enumerate() {
+ pt.insert(&[0, row], datum);
+ }
+ Some(pt)
+ }
+}
+
fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
where
F: Fn(DictIndex) -> Option<DictIndex>,
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct Attributes(pub HashMap<Identifier, Vec<String>>);
+pub struct Attributes(pub BTreeMap<Identifier, Vec<String>>);
impl Attributes {
pub fn new() -> Self {
- Self(HashMap::new())
+ Self(BTreeMap::new())
}
pub fn append(&mut self, other: &mut Self) {
- self.0.extend(other.0.drain())
+ self.0.append(&mut other.0)
}
pub fn role(&self) -> Result<Option<Role>, InvalidRole> {
self.try_into()
}
+
+ pub fn iter(&self, include_at: bool) -> impl Iterator<Item = (&Identifier, &[String])> {
+ self.0.iter().filter_map(move |(name, values)| {
+ if include_at || !name.0.starts_with('@') {
+ Some((name, values.as_slice()))
+ } else {
+ None
+ }
+ })
+ }
+
+ pub fn has_any(&self, include_at: bool) -> bool {
+ self.iter(include_at).next().is_some()
+ }
}
#[derive(Clone, Debug, ThisError, PartialEq, Eq)]
impl Display for Identifier {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", self.0)
+ write!(f, "{}", self.0)
}
}
use std::{
borrow::{Borrow, Cow},
cell::RefCell,
- collections::{HashMap, VecDeque},
+ collections::{BTreeMap, VecDeque},
fmt::{Debug, Display, Formatter, Result as FmtResult},
io::{Error as IoError, Read, Seek, SeekFrom},
mem::take,
mut input: &'a str,
sentinel: Option<char>,
) -> Result<(Attributes, &'a str), Warning> {
- let mut attributes = HashMap::new();
+ let mut attributes = BTreeMap::new();
let rest = loop {
match input.chars().next() {
None => break input,
test_sysfile("very_long_strings");
}
+#[test]
+fn attributes() {
+ test_sysfile("attributes");
+}
+
fn test_sysfile(name: &str) {
let input_filename = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("src/sys/testdata")
if let Some(pt) = dictionary.output_mrsets().to_pivot_table() {
output.push(Arc::new(pt.into()));
}
+ if let Some(pt) = dictionary.output_attributes().to_pivot_table() {
+ output.push(Arc::new(pt.into()));
+ }
if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
output.push(Arc::new(pt.into()));
}
--- /dev/null
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │None │
+│ Number of Cases│ 0│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬────────────────────────╮
+│Label │PSPP synthetic test file│
+│Variables│ 3│
+╰─────────┴────────────────────────╯
+
+╭──────────────┬────────┬─────┬─────────────────┬──────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role │Width│Alignment│Print Format│Write Format│Missing Values│
+├──────────────┼────────┼─────┼─────────────────┼──────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│FirstVariable │ 1│ │ │Target│ 8│Right │F8.0 │F8.0 │ │
+│SécondVariable│ 2│ │ │Input │ 8│Right │F8.0 │F8.0 │ │
+│ThirdVariable │ 3│ │ │Input │ 8│Right │F8.0 │F8.0 │ │
+╰──────────────┴────────┴─────┴─────────────────┴──────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭───────────────────────────────┬─────────────╮
+│Variable and Name │ Value │
+├───────────────────────────────┼─────────────┤
+│(dataset) Attr1[1] │Value1 │
+│ Attr1[2] │'déclaration'│
+│ SécondAttr[1]│123 │
+│ SécondAttr[2]│456 │
+├───────────────────────────────┼─────────────┤
+│FirstVariable $@Role │1 │
+│ adèle[1] │23 │
+│ adèle[2] │34 │
+│ bert │123 │
+├───────────────────────────────┼─────────────┤
+│SécondVariable xyzzy │quux │
+├───────────────────────────────┼─────────────┤
+│ThirdVariable fizz │buzz │
+╰───────────────────────────────┴─────────────╯
--- /dev/null
+# File header.
+"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
+2; # Layout code
+3; # Nominal case size
+0; # Not compressed
+0; # Not weighted
+0; # 1 case.
+100.0; # Bias.
+"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
+i8 0 *3;
+
+# Variables.
+2; 0; 0; 0; 0x050800 *2; s8 "FIRSTVAR";
+2; 0; 0; 0; 0x050800 *2; s8 "SECONDVA";
+2; 0; 0; 0; 0x050800 *2; s8 "THIRDVAR";
+
+# Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252;
+
+# Long variable names.
+7; 13; 1; COUNT (
+"FIRSTVAR=FirstVariable"; i8 9;
+"SECONDVA=S"; i8 233; "condVariable"; i8 9;
+"THIRDVAR=ThirdVariable"; i8 9
+);
+
+# Data file attributes record.
+7; 17; 1; COUNT (
+"Attr1('Value1'"; i8 10; "''d"; i8 233; "claration''"; i8 10; ")";
+"S"; i8 233; "condAttr('123'"; i8 10; "'456'"; i8 10; ")";
+);
+
+# Variable attributes record.
+7; 18; 1; COUNT (
+"FirstVariable:";
+ "ad"; i8 232; "le('23'"; i8 10; "'34'"; i8 10; ")";
+ "bert('123'"; i8 10; ")";
+ "$@Role('1'"; i8 10; ")";
+"/S"; i8 233; "condVariable:";
+ "xyzzy('quux'"; i8 10; ")";
+);
+
+# Another variable attributes record.
+# Only system files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
+# include multiple variable attributes records.
+7; 18; 1; COUNT ("ThirdVariable:fizz('buzz'"; i8 10; ")";);
+
+# Character encoding record.
+7; 20; 1; 12; "windows-1252";
+
+# Dictionary termination record.
+999; 0;