- One line feed (byte 0x0a). Sometimes multiple, even hundreds,
of line feeds are present.
-Example: Given appropriate variable definitions, consider the
-following MRSETS command:
-
-```
-MRSETS /MCGROUP NAME=$a LABEL='my mcgroup' VARIABLES=a b c
- /MDGROUP NAME=$b VARIABLES=g e f d VALUE=55
- /MDGROUP NAME=$c LABEL='mdgroup #2' VARIABLES=h i j VALUE='Yes'
- /MDGROUP NAME=$d LABEL='third mdgroup' CATEGORYLABELS=COUNTEDVALUES
- VARIABLES=k l m VALUE=34
- /MDGROUP NAME=$e CATEGORYLABELS=COUNTEDVALUES LABELSOURCE=VARLABEL
- VARIABLES=n o p VALUE='choice'.
-```
-
-The above would generate the following multiple response set record
-of subtype 7:
-
-```
-$a=C 10 my mcgroup a b c
-$b=D2 55 0 g e f d
-$c=D3 Yes 10 mdgroup #2 h i j
-```
-
-It would also generate the following multiple response set record
-with subtype 19:
-
-```
-$d=E 1 2 34 13 third mdgroup k l m
-$e=E 11 6 choice 0 n o p
-```
+> Example: Given appropriate variable definitions, consider the
+> following MRSETS command:
+>
+> ```
+> MRSETS /MCGROUP NAME=$a LABEL='my mcgroup' VARIABLES=a b c
+> /MDGROUP NAME=$b VARIABLES=g e f d VALUE=55
+> /MDGROUP NAME=$c LABEL='mdgroup #2' VARIABLES=h i j VALUE='Yes'
+> /MDGROUP NAME=$d LABEL='third mdgroup' CATEGORYLABELS=COUNTEDVALUES
+> VARIABLES=k l m VALUE=34
+> /MDGROUP NAME=$e CATEGORYLABELS=COUNTEDVALUES LABELSOURCE=VARLABEL
+> VARIABLES=n o p VALUE='choice'.
+> ```
+>
+> The above would generate the following multiple response set record
+> of subtype 7:
+>
+> ```
+> $a=C 10 my mcgroup a b c
+> $b=D2 55 0 g e f d
+> $c=D3 Yes 10 mdgroup #2 h i j
+> ```
+>
+> It would also generate the following multiple response set record
+> with subtype 19:
+>
+> ```
+> $d=E 1 2 34 13 third mdgroup k l m
+> $e=E 11 6 choice 0 n o p
+> ```
[^note]: This part of the format may not be fully understood, because
only a single example of each possibility has been examined.
The total length is `count` bytes.
-### Example
-
-A system file produced with the following VARIABLE ATTRIBUTE commands in
-effect:
-
-```
-VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=fred[1]('23') fred[2]('34').
-VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=bert('123').
-```
-
-will contain a variable attribute record with the following contents:
-
-```
-0000 07 00 00 00 12 00 00 00 01 00 00 00 22 00 00 00 |............"...|
-0010 64 75 6d 6d 79 3a 66 72 65 64 28 27 32 33 27 0a |dummy:fred('23'.|
-0020 27 33 34 27 0a 29 62 65 72 74 28 27 31 32 33 27 |'34'.)bert('123'|
-0030 0a 29 |.) |
-```
+> Example: A system file produced with the following `VARIABLE
+> ATTRIBUTE` commands in effect:
+>
+> ```
+> VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=fred[1]('23') fred[2]('34').
+> VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=bert('123').
+> ```
+>
+> will contain a variable attribute record with the following contents:
+>
+> ```
+> 0000 07 00 00 00 12 00 00 00 01 00 00 00 22 00 00 00 |............"...|
+> 0010 64 75 6d 6d 79 3a 66 72 65 64 28 27 32 33 27 0a |dummy:fred('23'.|
+> 0020 27 33 34 27 0a 29 62 65 72 74 28 27 31 32 33 27 |'34'.)bert('123'|
+> 0030 0a 29 |.) |
+> ```
### Variable Roles
- 1 through 251
A number with value `code - bias`, where `code` is the value of
- the compression code and `bias` comes from the file header. For
- example, code 105 with bias 100.0 (the normal value) indicates a
- numeric variable of value 5.
+ the compression code and `bias` comes from the file header.
+
+ > Example: Code 105 with bias 100.0 (the normal value) indicates a
+ > numeric variable of value 5.
A code of 0 (after subtracting the bias) in a string field encodes
null bytes. This is unusual, since a string field normally
use std::{
borrow::Cow,
cmp::Ordering,
- collections::{HashMap, HashSet},
+ collections::{BTreeSet, HashMap, HashSet},
fmt::{Debug, Formatter, Result as FmtResult},
hash::Hash,
ops::{Bound, RangeBounds, RangeInclusive},
pub attributes: Attributes,
/// Multiple response sets.
- pub mrsets: HashSet<ByIdentifier<MultipleResponseSet>>,
+ pub mrsets: BTreeSet<ByIdentifier<MultipleResponseSet>>,
/// Variable sets.
///
documents: Vec::new(),
vectors: HashSet::new(),
attributes: Attributes::new(),
- mrsets: HashSet::new(),
+ mrsets: BTreeSet::new(),
variable_sets: Vec::new(),
encoding,
}
.map(ByIdentifier::new)
})
.collect();
- self.mrsets = self
- .mrsets
- .drain()
+ self.mrsets = std::mem::take(&mut self.mrsets)
+ .into_iter()
.filter_map(|mrset_by_id| {
mrset_by_id
.0
OutputVariableSets::new(self)
}
+ pub fn output_mrsets(&self) -> OutputMrsets {
+ OutputMrsets::new(self)
+ }
+
pub fn to_pivot_rows(&self) -> (Group, Vec<Value>) {
let mut group = Group::new("Dictionary Information");
let mut values = Vec::new();
}
}
+pub struct OutputMrsets<'a> {
+ dictionary: &'a Dictionary,
+}
+
+impl<'a> OutputMrsets<'a> {
+ fn new(dictionary: &'a Dictionary) -> Self {
+ Self { dictionary }
+ }
+ pub fn any_mrsets(&self) -> bool {
+ !self.dictionary.mrsets.is_empty()
+ }
+ pub fn to_pivot_table(&self) -> Option<PivotTable> {
+ if !self.any_mrsets() {
+ return None;
+ }
+
+ let attributes = Group::new("Attributes")
+ .with("Label")
+ .with("Encoding")
+ .with("Counted Value")
+ .with("Member Variables");
+
+ let mut mrsets = Group::new("Name").with_label_shown();
+ for mrset in &self.dictionary.mrsets {
+ mrsets.push(mrset.name.as_str());
+ }
+ let mut pt = PivotTable::new([
+ (Axis3::Y, Dimension::new(mrsets)),
+ (Axis3::X, Dimension::new(attributes)),
+ ]);
+ for (row, mrset) in self.dictionary.mrsets.iter().enumerate() {
+ pt.insert(&[row, 0], mrset.label.as_str());
+
+ let mr_type_name = match &mrset.mr_type {
+ MultipleResponseType::MultipleDichotomy { datum, .. } => {
+ pt.insert(&[row, 2], Value::new_datum(datum, self.dictionary.encoding));
+ "Dichotomies"
+ }
+ MultipleResponseType::MultipleCategory => "Categories",
+ };
+
+ pt.insert(&[row, 1], Value::new_text(mr_type_name));
+ pt.insert(
+ &[row, 3],
+ mrset
+ .variables
+ .iter()
+ .flat_map(|index| [self.dictionary.variables[*index].name.as_str(), "\n"])
+ .collect::<String>(),
+ );
+ }
+ Some(pt)
+ }
+}
+
fn update_dict_index_vec<F>(dict_indexes: &mut Vec<DictIndex>, f: F)
where
F: Fn(DictIndex) -> Option<DictIndex>,
cell_index(data_indexes, self.dimensions.iter().map(|d| d.len()))
}
- pub fn insert(&mut self, data_indexes: &[usize], value: Value) {
- self.cells.insert(self.cell_index(data_indexes), value);
+ pub fn insert(&mut self, data_indexes: &[usize], value: impl Into<Value>) {
+ self.cells
+ .insert(self.cell_index(data_indexes), value.into());
}
pub fn get(&self, data_indexes: &[usize]) -> Option<&Value> {
variable_label: variable.label.clone(),
}))
}
- pub fn new_value(value: &Datum, encoding: &'static Encoding) -> Self {
+ pub fn new_datum(value: &Datum, encoding: &'static Encoding) -> Self {
match value {
Datum::Number(number) => Self::new_number(*number),
Datum::String(string) => Self::new_user_text(string.decode(encoding).into_owned()),
}
}
+impl From<String> for Value {
+ fn from(value: String) -> Self {
+ Self::new_text(value)
+ }
+}
+
impl From<&Variable> for Value {
fn from(variable: &Variable) -> Self {
Self::new_variable(variable)
return Err(Warning::MultipleResponseSyntaxError);
};
let (name, input) = input.split_at(equals);
+ let input = input.strip_prefix(b"=").unwrap();
let (mr_type, input) = MultipleResponseType::parse(input)?;
let Some(input) = input.strip_prefix(b" ") else {
return Err(Warning::MultipleResponseSyntaxError);
let mut input = &ext.data[..];
let mut sets = Vec::new();
- while !input.is_empty() {
+ loop {
+ while let Some(suffix) = input.strip_prefix(b"\n") {
+ input = suffix;
+ }
+ if input.is_empty() {
+ break;
+ }
let (set, rest) = MultipleResponseSet::parse(input)?;
sets.push(set);
input = rest;
if let Some(pt) = dictionary.output_value_labels().to_pivot_table() {
output.push(Arc::new(pt.into()));
}
+ if let Some(pt) = dictionary.output_mrsets().to_pivot_table() {
+ output.push(Arc::new(pt.into()));
+ }
if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
output.push(Arc::new(pt.into()));
}
+╭──────────────────────┬────────────────────────╮
+│ Created │ 01-JAN-2011 20:53:52│
+├──────────────────────┼────────────────────────┤
+│Writer Product │PSPP synthetic test file│
+│ Version │1.2.3 │
+├──────────────────────┼────────────────────────┤
+│ Compression │None │
+│ Number of Cases│ 0│
+╰──────────────────────┴────────────────────────╯
+
+╭─────────┬────────────────────────╮
+│Label │PSPP synthetic test file│
+│Variables│ 16│
+╰─────────┴────────────────────────╯
+
+╭──┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮
+│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│
+├──┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
+│あ│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│b │ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│c │ 3│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│d │ 4│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│e │ 5│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│f │ 6│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│g │ 7│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│h │ 8│ │Nominal │Input│ 4│Left │A4 │A4 │ │
+│i │ 9│ │Nominal │Input│ 4│Left │A4 │A4 │ │
+│j │ 10│ │Nominal │Input│ 4│Left │A4 │A4 │ │
+│k │ 11│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│l │ 12│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│m │ 13│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
+│n │ 14│ │Nominal │Input│ 6│Left │A6 │A6 │ │
+│o │ 15│ │Nominal │Input│ 6│Left │A6 │A6 │ │
+│p │ 16│ │Nominal │Input│ 6│Left │A6 │A6 │ │
+╰──┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬─────────────┬───────────┬─────────────┬────────────────╮
+│Name│ Label │ Encoding │Counted Value│Member Variables│
+├────┼─────────────┼───────────┼─────────────┼────────────────┤
+│$a │my mcgroup │Categories │ │あ │
+│ │ │ │ │b │
+│ │ │ │ │c │
+│$b │ │Dichotomies│ 55.00│g │
+│ │ │ │ │e │
+│ │ │ │ │f │
+│ │ │ │ │d │
+│$c │mdgroup #2 │Dichotomies│はい │h │
+│ │ │ │ │i │
+│ │ │ │ │j │
+│$d │third mdgroup│Dichotomies│ 34.00│k │
+│ │ │ │ │l │
+│ │ │ │ │m │
+│$e │ │Dichotomies│choice │n │
+│ │ │ │ │o │
+│ │ │ │ │p │
+╰────┴─────────────┴───────────┴─────────────┴────────────────╯