self
}
+ pub fn with_multiple<C>(mut self, children: impl IntoIterator<Item = C>) -> Self
+ where
+ C: Into<Category>,
+ {
+ self.extend(children);
+ self
+ }
+
pub fn with_label_shown(self) -> Self {
self.with_show_label(true)
}
}
}
+impl<C> Extend<C> for Group
+where
+ C: Into<Category>,
+{
+ fn extend<T: IntoIterator<Item = C>>(&mut self, children: T) {
+ let children = children.into_iter();
+ self.children.reserve(children.size_hint().0);
+ for child in children {
+ self.push(child);
+ }
+ }
+}
+
#[derive(Clone, Debug, Default)]
pub struct Footnotes(pub Vec<Arc<Footnote>>);
}
}
+impl From<&Variable> for Category {
+ fn from(variable: &Variable) -> Self {
+ Value::new_variable(variable).into()
+ }
+}
+
impl From<&str> for Category {
fn from(name: &str) -> Self {
Self::Leaf(Leaf::new(Value::new_text(name)))
pub fn decode(
mut headers: Headers,
- cases: Option<Cases>,
+ mut cases: Option<Cases>,
encoding: &'static Encoding,
mut warn: impl FnMut(Error),
) -> Result<(Dictionary, Metadata, Option<Cases>), Error> {
}
}
- 'outer: for record in headers
- .very_long_strings
- .drain(..)
- .flat_map(|record| record.0.into_iter())
- {
- let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
- warn(dbg!(Error::TBD));
- continue;
- };
- let width = VarWidth::String(record.length);
- let n_segments = width.n_segments();
- if n_segments == 1 {
- warn(dbg!(Error::ShortVeryLongString {
- short_name: record.short_name.clone(),
- width: record.length
- }));
- continue;
- }
- if index + n_segments > dictionary.variables.len() {
- warn(dbg!(Error::VeryLongStringOverflow {
- short_name: record.short_name.clone(),
- width: record.length,
- index,
- n_segments,
- len: dictionary.variables.len()
- }));
- continue;
- }
- let mut short_names = Vec::with_capacity(n_segments);
- for i in 0..n_segments {
- let alloc_width = width.segment_alloc_width(i);
- let segment = &dictionary.variables[index + i];
- short_names.push(segment.short_names[0].clone());
- let segment_width = segment.width.as_string_width().unwrap_or(0);
- if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
- warn(Error::VeryLongStringInvalidSegmentWidth {
+ if !headers.very_long_strings.is_empty() {
+ 'outer: for record in headers
+ .very_long_strings
+ .drain(..)
+ .flat_map(|record| record.0.into_iter())
+ {
+ let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
+ warn(dbg!(Error::TBD));
+ continue;
+ };
+ let width = VarWidth::String(record.length);
+ let n_segments = width.n_segments();
+ if n_segments == 1 {
+ warn(dbg!(Error::ShortVeryLongString {
+ short_name: record.short_name.clone(),
+ width: record.length
+ }));
+ continue;
+ }
+ if index + n_segments > dictionary.variables.len() {
+ warn(dbg!(Error::VeryLongStringOverflow {
short_name: record.short_name.clone(),
width: record.length,
- index: i,
- actual: segment_width,
- expected: alloc_width,
- });
- continue 'outer;
+ index,
+ n_segments,
+ len: dictionary.variables.len()
+ }));
+ continue;
}
+ let mut short_names = Vec::with_capacity(n_segments);
+ for i in 0..n_segments {
+ let alloc_width = width.segment_alloc_width(i);
+ let segment = &dictionary.variables[index + i];
+ short_names.push(segment.short_names[0].clone());
+ let segment_width = segment.width.as_string_width().unwrap_or(0);
+ if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
+ warn(Error::VeryLongStringInvalidSegmentWidth {
+ short_name: record.short_name.clone(),
+ width: record.length,
+ index: i,
+ actual: segment_width,
+ expected: alloc_width,
+ });
+ continue 'outer;
+ }
+ }
+ dictionary.delete_vars(index + 1..index + n_segments);
+ let variable = dictionary.variables.get_index_mut2(index).unwrap();
+ variable.short_names = short_names;
+ variable.resize(width);
}
- dictionary.delete_vars(index + 1..index + n_segments);
- let variable = dictionary.variables.get_index_mut2(index).unwrap();
- variable.short_names = short_names;
- variable.resize(width);
+ cases = cases
+ .take()
+ .map(|cases| cases.with_widths(dictionary.variables.iter().map(|var| var.width)));
}
if headers.long_names.is_empty() {
fn bytes(&self) -> usize {
match self {
CaseVar::Numeric => 8,
- CaseVar::String { width, encoding } => encoding
+ CaseVar::String { width: _, encoding } => encoding
.iter()
.map(|segment| segment.data_bytes + segment.padding_bytes)
.sum(),
} else {
Box::new(reader)
},
+ eof: case_vars.is_empty(),
case_vars,
compression: header.compression,
bias: header.bias,
endian: header.endian,
codes: VecDeque::with_capacity(8),
- eof: false,
+ }
+ }
+
+ pub fn with_widths(self, widths: impl IntoIterator<Item = VarWidth>) -> Self {
+ let case_vars = widths.into_iter().map(CaseVar::new).collect::<Vec<_>>();
+ Self {
+ eof: self.eof || case_vars.is_empty(),
+ case_vars,
+ ..self
}
}
}
use crate::{
endian::Endian,
output::{
- pivot::{test::assert_lines_eq, Axis3, Dimension, PivotTable},
+ pivot::{test::assert_lines_eq, Axis3, Dimension, Group, PivotTable, Value},
Details, Item, Text,
},
sys::{
let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap();
let output = match reader.headers().collect() {
Ok(headers) => {
- drop(reader);
+ let cases = reader.cases();
let encoding =
encoding_from_headers(&headers, &mut |warning| warnings.push(warning)).unwrap();
let mut decoder = Decoder::new(encoding, |warning| warnings.push(warning));
let mut errors = Vec::new();
let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
- let (dictionary, metadata, _cases) =
- decode(headers, None, encoding, |e| errors.push(e)).unwrap();
+ let (dictionary, metadata, cases) =
+ decode(headers, cases, encoding, |e| errors.push(e)).unwrap();
let (group, data) = metadata.to_pivot_rows();
let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))])
.with_data(
if let Some(pt) = dictionary.output_variable_sets().to_pivot_table() {
output.push(Arc::new(pt.into()));
}
+ if let Some(cases) = cases {
+ let variables = Group::new("Variable")
+ .with_multiple(dictionary.variables.iter().map(|var| &**var));
+ let mut case_numbers = Group::new("Case").with_label_shown();
+ let mut data = Vec::new();
+ for (case_number, case) in cases.enumerate() {
+ match case {
+ Ok(case) => {
+ case_numbers.push(Value::new_integer(Some(
+ (case_numbers.len() + 1) as f64,
+ )));
+ data.push(
+ case.into_iter()
+ .map(|datum| Value::new_datum(&datum, dictionary.encoding))
+ .collect::<Vec<_>>(),
+ );
+ }
+ Err(error) => {
+ output.push(Arc::new(Item::from(Text::new_log(error.to_string()))));
+ }
+ }
+ }
+ if !data.is_empty() {
+ let mut pt = PivotTable::new([
+ (Axis3::X, Dimension::new(variables)),
+ (Axis3::Y, Dimension::new(case_numbers)),
+ ]);
+ for (row_number, row) in data.into_iter().enumerate() {
+ for (column_number, datum) in row.into_iter().enumerate() {
+ pt.insert(&[column_number, row_number], datum);
+ }
+ }
+ output.push(Arc::new(pt.into()));
+ }
+ }
Item::new(Details::Group(output))
}
Err(error) => Item::new(Details::Text(Box::new(Text::new_log(error.to_string())))),
├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬────╮
+│Case│num1│
+├────┼────┤
+│1 │1.00│
+╰────┴────╯
├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤
│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬────╮
+│Case│num1│
+├────┼────┤
+│1 │1.00│
+╰────┴────╯
│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
│num2│ 2│ │ │Input│ 8│Right │F8.0 │F8.0 │ │
╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬────┬────╮
+│Case│num1│num2│
+├────┼────┼────┤
+│1 │1.00│2.00│
+╰────┴────┴────╯
# File header.
"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
-2; 2; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+2; 2; 0; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
# Numeric variables, no label or missing values.
2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
999; 0;
# Data.
-1.0;
+1.0; 2.0;
\ No newline at end of file
│séq256│ 1│ │Nominal │Input│ 32│Left │A256 │A256 │ │
│str600│ 2│ │Nominal │Input│ 32│Left │A600 │A600 │ │
╰──────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯
+
+╭────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│Case│ séq256 │ str600 │
+├────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
+│1 │abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@a│abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyz│
+╰────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯