From 191ca1390e9902a7186055cf09e61efd43cf6d11 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 7 Jan 2024 11:41:28 -0800 Subject: [PATCH] continue implementation --- rust/src/cooked.rs | 468 +++++++++++++++++++---------------------- rust/src/dictionary.rs | 75 ++++++- rust/src/main.rs | 5 +- rust/src/raw.rs | 92 ++++++-- 4 files changed, 367 insertions(+), 273 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 3e4677e71c..56d6aa1c82 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -4,11 +4,15 @@ use std::{ }; use crate::{ + dictionary::{self, Dictionary}, encoding::{default_encoding, get_encoding, Error as EncodingError}, endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, - raw::{self, ProductInfoRecord, RawDocumentLine, RawStr, RawString, VarDisplayRecord, VarType}, + raw::{ + self, LongStringMissingValueRecord, MissingValues, ProductInfoRecord, RawDocumentLine, + RawStr, RawString, VarDisplayRecord, VarType, + }, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::{DecoderResult, Encoding}; @@ -166,7 +170,7 @@ pub enum Record { VariableSets(VariableSetRecord), VarDisplay(VarDisplayRecord), MultipleResponse(MultipleResponseRecord), - LongStringMissingValues(LongStringMissingValuesRecord), + LongStringMissingValues(LongStringMissingValueRecord), LongStringValueLabels(LongStringValueLabelRecord), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), @@ -195,28 +199,28 @@ pub struct Variable { } pub struct Decoder { - pub compression: Option, - pub endian: Endian, + pub raw: raw::Decoder, pub encoding: &'static Encoding, pub variables: HashMap, pub var_names: HashMap, + pub dictionary: Dictionary, n_dict_indexes: usize, n_generated_names: usize, } #[derive(Default)] struct Headers<'a> { - header: Option<&'a raw::HeaderRecord>, - variables: Vec<&'a raw::VariableRecord>>, + header: Option>>, + variables: Vec, String>>, value_labels: Vec<&'a raw::ValueLabelRecord, RawString>>, - document: Option<&'a raw::DocumentRecord>, + documents: Vec>>, integer_info: Option<&'a raw::IntegerInfoRecord>, float_info: Option<&'a raw::FloatInfoRecord>, variable_sets: Vec<&'a raw::VariableSetRecord>, var_display: Option<&'a raw::VarDisplayRecord>, multiple_response: Vec<&'a raw::MultipleResponseRecord>, long_string_value_labels: Vec<&'a raw::LongStringValueLabelRecord>, - long_string_missing_values: Vec<&'a raw::LongStringMissingValueRecord>>, + long_string_missing_values: Vec>, encoding: Option<&'a raw::EncodingRecord>, number_of_cases: Option<&'a raw::NumberOfCasesRecord>, product_info: Option<&'a raw::ProductInfoRecord>, @@ -237,14 +241,16 @@ fn set_or_warn(option: &mut Option, value: T, warn: &impl Fn(Error)) { } impl<'a> Headers<'a> { - fn new(headers: &'a Vec, warn: &impl Fn(Error)) -> Headers<'a> { + fn new(headers: &'a Vec, decoder: &Decoder, warn: &impl Fn(Error)) -> Headers<'a> { let mut h = Headers::default(); for header in headers { match header { - raw::Record::Header(record) => set_or_warn(&mut h.header, record, warn), - raw::Record::Variable(record) => h.variables.push(record), + raw::Record::Header(record) => { + set_or_warn(&mut h.header, record.decode(&decoder.raw), warn) + } + raw::Record::Variable(record) => h.variables.push(record.decode(&decoder.raw)), raw::Record::ValueLabel(record) => h.value_labels.push(record), - raw::Record::Document(record) => set_or_warn(&mut h.document, record, warn), + raw::Record::Document(record) => h.documents.push(record.decode(&decoder.raw)), raw::Record::IntegerInfo(record) => set_or_warn(&mut h.integer_info, record, warn), raw::Record::FloatInfo(record) => set_or_warn(&mut h.float_info, record, warn), raw::Record::VariableSets(record) => h.variable_sets.push(record), @@ -253,9 +259,9 @@ impl<'a> Headers<'a> { raw::Record::LongStringValueLabels(record) => { h.long_string_value_labels.push(record) } - raw::Record::LongStringMissingValues(record) => { - h.long_string_missing_values.push(record) - } + raw::Record::LongStringMissingValues(record) => h + .long_string_missing_values + .push(record.decode(&decoder.raw)), raw::Record::Encoding(record) => set_or_warn(&mut h.encoding, record, warn), raw::Record::NumberOfCases(record) => { set_or_warn(&mut h.number_of_cases, record, warn) @@ -277,85 +283,83 @@ impl<'a> Headers<'a> { } } -pub fn decode( - headers: Vec, - encoding: Option<&'static Encoding>, +pub fn encoding_from_headers( + headers: &Vec, warn: &impl Fn(Error), -) -> Result, Error> { - let h = Headers::new(&headers, warn); - let Some(header) = h.header else { - return Err(Error::MissingHeaderRecord); - }; - let encoding = match encoding { - Some(encoding) => encoding, - None => { - let encoding = h.encoding.map(|record| record.0.as_str()); - let character_code = h.integer_info.map(|record| record.character_code); - match get_encoding(encoding, character_code) { - Ok(encoding) => encoding, - Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)), - Err(err) => { - warn(Error::EncodingError(err)); - // Warn that we're using the default encoding. - default_encoding() - } - } +) -> Result<&'static Encoding, Error> { + let mut encoding_record = None; + let mut integer_info_record = None; + for record in headers { + match record { + raw::Record::Encoding(record) => encoding_record = Some(record), + raw::Record::IntegerInfo(record) => integer_info_record = Some(record), + _ => (), } - }; - - //let mut dictionary = Dictionary::new(encoding); + } + let encoding = encoding_record.map(|record| record.0.as_str()); + let character_code = integer_info_record.map(|record| record.character_code); + match get_encoding(encoding, character_code) { + Ok(encoding) => Ok(encoding), + Err(err @ EncodingError::Ebcdic) => Err(Error::EncodingError(err)), + Err(err) => { + warn(Error::EncodingError(err)); + // Warn that we're using the default encoding. + Ok(default_encoding()) + } + } +} +pub fn decode( + headers: Vec, + encoding: &'static Encoding, + warn: &impl Fn(Error), +) -> Result<(Vec, Metadata), Error> { let mut decoder = Decoder { - compression: header.compression, - endian: header.endian, + raw: raw::Decoder { + encoding, + warn: Box::new(|error| println!("{error}")), + }, encoding, variables: HashMap::new(), var_names: HashMap::new(), + dictionary: Dictionary::new(encoding), n_dict_indexes: 0, n_generated_names: 0, }; + let h = Headers::new(&headers, &decoder, warn); + let Some(header) = h.header else { + return Err(Error::MissingHeaderRecord); + }; + let mut output = Vec::with_capacity(headers.len()); // Decode the records that don't use variables at all. - if let Some(header) = HeaderRecord::try_decode(&mut decoder, header, warn)? { + if let Some(header) = HeaderRecord::try_decode(&mut decoder, &header, warn)? { output.push(Record::Header(header)) } - if let Some(raw) = h.document { - if let Some(document) = DocumentRecord::try_decode(&mut decoder, raw, warn)? { - output.push(Record::Document(document)) + for document in h.documents { + for line in &document.lines { + decoder.dictionary.documents.push(line.to_string()) } } - if let Some(raw) = h.integer_info { - output.push(Record::IntegerInfo(raw.clone())); - } - if let Some(raw) = h.float_info { - output.push(Record::FloatInfo(raw.clone())); - } - if let Some(raw) = h.product_info { - output.push(Record::ProductInfo(raw.clone())); + /* + for &raw in &h.file_attributes { + let s = decoder.decode_string_cow(&raw.text.0, warn); + output.push(Record::FileAttributes(FileAttributeRecord::parse( + &decoder, &s, warn, + )?)); + } + for &raw in &h.other_extensions { + output.push(Record::OtherExtension(raw.clone())); } - if let Some(raw) = h.number_of_cases { - output.push(Record::NumberOfCases(raw.clone())) + */ + // Decode the variable records, which are the basis of almost everything + // else. + for raw in &h.variables { + parse_variable_record(&mut decoder, raw, warn)?; } /* - for &raw in &h.file_attributes { - let s = decoder.decode_string_cow(&raw.text.0, warn); - output.push(Record::FileAttributes(FileAttributeRecord::parse( - &decoder, &s, warn, - )?)); - } - for &raw in &h.other_extensions { - output.push(Record::OtherExtension(raw.clone())); - } - // Decode the variable records, which are the basis of almost everything - // else. - for &raw in &h.variables { - if let Some(variable) = VariableRecord::try_decode(&mut decoder, raw, warn)? { - output.push(Record::Variable(variable)); - } - } - // Decode value labels and weight variable. These use indexes into the // variable records, so we need to parse them before those indexes become // invalidated by very long string variables. @@ -413,8 +417,9 @@ pub fn decode( let s = decoder.decode_string_cow(&raw.text.0, warn); output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?)); } - */ - Ok(output) + */ + let metadata = Metadata::decode(&header, h.integer_info, h.product_info, warn); + Ok((output, metadata)) } impl Decoder { @@ -532,29 +537,79 @@ fn trim_end_spaces(mut s: String) -> String { s } +/// Data file info that doesn't fit in [Dictionary]. +pub struct Metadata { + creation: NaiveDateTime, + endian: Endian, + compression: Option, + n_cases: Option, + product: String, + product_ext: Option, + version: Option<(i32, i32, i32)>, +} + +impl Metadata { + fn decode( + header: &crate::raw::HeaderRecord>, + integer_info: Option<&IntegerInfoRecord>, + product_ext: Option<&ProductInfoRecord>, + warn: impl Fn(Error), + ) -> Self { + let creation_date = NaiveDate::parse_from_str(&header.creation_date, "%e %b %Y") + .unwrap_or_else(|_| { + warn(Error::InvalidCreationDate { + creation_date: header.creation_date.to_string(), + }); + Default::default() + }); + let creation_time = NaiveTime::parse_from_str(&header.creation_time, "%H:%M:%S") + .unwrap_or_else(|_| { + warn(Error::InvalidCreationTime { + creation_time: header.creation_time.to_string(), + }); + Default::default() + }); + let creation = NaiveDateTime::new(creation_date, creation_time); + + let product = header + .eye_catcher + .trim_start_matches("@(#) SPSS DATA FILE") + .trim_end() + .to_string(); + + Self { + creation, + endian: header.endian, + compression: header.compression, + n_cases: header.n_cases.map(|n| n as u64), + product, + product_ext: product_ext.map(|pe| pe.0.clone()), + version: integer_info.map(|ii| ii.version), + } + } +} + impl TryDecode for HeaderRecord { - type Input<'a> = crate::raw::HeaderRecord; + type Input<'a> = crate::raw::HeaderRecord>; fn try_decode( - decoder: &mut Decoder, + _decoder: &mut Decoder, input: &Self::Input<'_>, warn: impl Fn(Error), ) -> Result, Error> { - let eye_catcher = trim_end_spaces(decoder.decode_string(&input.eye_catcher.0, &warn)); - let file_label = trim_end_spaces(decoder.decode_string(&input.file_label.0, &warn)); - let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn); - let creation_date = - NaiveDate::parse_from_str(&creation_date, "%e %b %Y").unwrap_or_else(|_| { + let eye_catcher = trim_end_spaces(input.eye_catcher.to_string()); + let file_label = trim_end_spaces(input.file_label.to_string()); + let creation_date = NaiveDate::parse_from_str(&input.creation_date, "%e %b %Y") + .unwrap_or_else(|_| { warn(Error::InvalidCreationDate { - creation_date: creation_date.into(), + creation_date: input.creation_date.to_string(), }); Default::default() }); - let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn); - let creation_time = - NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| { + let creation_time = NaiveTime::parse_from_str(&input.creation_time, "%H:%M:%S") + .unwrap_or_else(|_| { warn(Error::InvalidCreationTime { - creation_time: creation_time.into(), + creation_time: input.creation_time.to_string(), }); Default::default() }); @@ -621,6 +676,13 @@ impl VarWidth { pub fn narrower(a: Option, b: Option) -> Option { Self::width_predicate(a, b, |a, b| a.min(b)) } + + pub fn default_display_width(&self) -> u32 { + match self { + VarWidth::Numeric => 8, + VarWidth::String(width) => *width.min(&32) as u32, + } + } } impl From for VarType { @@ -638,39 +700,10 @@ pub struct VariableRecord { pub name: Identifier, pub print_format: Spec, pub write_format: Spec, - pub missing_values: MissingValues, + pub missing_values: MissingValues, pub label: Option, } -#[derive(Clone, Debug)] -pub struct MissingValues { - /// Individual missing values, up to 3 of them. - pub values: Vec, - - /// Optional range of missing values. - pub range: Option<(Value, Value)>, -} - -impl Decode>> for MissingValues { - fn decode( - decoder: &Decoder, - input: &raw::MissingValues>, - _warn: impl Fn(Error), - ) -> Self { - MissingValues { - values: input - .values - .iter() - .map(|value| Value::decode(value, decoder)) - .collect(), - range: input - .range - .as_ref() - .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))), - } - } -} - fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec { UncheckedSpec::try_from(raw) .and_then(Spec::try_from) @@ -682,91 +715,84 @@ fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatErro }) } -impl TryDecode for VariableRecord { - type Input<'a> = raw::VariableRecord>; - - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - let width = match input.width { - 0 => VarWidth::Numeric, - w @ 1..=255 => VarWidth::String(w as u16), - -1 => return Ok(None), - _ => { - return Err(Error::InvalidVariableWidth { - offsets: input.offsets.clone(), - width: input.width, - }) - } - }; - let name = trim_end_spaces(decoder.decode_string(&input.name.0, &warn)); - let name = match Identifier::new(&name, decoder.encoding) { - Ok(name) => { - if !decoder.var_names.contains_key(&name) { - name - } else { - let new_name = decoder.generate_name(); - warn(Error::DuplicateVariableName { - duplicate_name: name.clone(), - new_name: new_name.clone(), - }); - new_name - } - } - Err(id_error) => { +fn parse_variable_record( + decoder: &mut Decoder, + input: &raw::VariableRecord, String>, + warn: impl Fn(Error), +) -> Result<(), Error> { + let width = match input.width { + 0 => VarWidth::Numeric, + w @ 1..=255 => VarWidth::String(w as u16), + -1 => return Ok(()), + _ => { + return Err(Error::InvalidVariableWidth { + offsets: input.offsets.clone(), + width: input.width, + }) + } + }; + let name = trim_end_spaces(input.name.to_string()); + let name = match Identifier::new(&name, decoder.encoding) { + Ok(name) => { + if !decoder.var_names.contains_key(&name) { + name + } else { let new_name = decoder.generate_name(); - warn(Error::InvalidVariableName { - id_error, + warn(Error::DuplicateVariableName { + duplicate_name: name.clone(), new_name: new_name.clone(), }); new_name } - }; - let variable = Variable { - dict_index: decoder.n_dict_indexes, - short_name: name.clone(), - long_name: None, - width, - }; - decoder.n_dict_indexes += width.n_dict_indexes(); - assert!(decoder - .var_names - .insert(name.clone(), variable.dict_index) - .is_none()); - assert!(decoder - .variables - .insert(variable.dict_index, variable) - .is_none()); - - let print_format = decode_format(input.print_format, width, |new_spec, format_error| { - warn(Error::InvalidPrintFormat { - new_spec, - variable: name.clone(), - format_error, - }) - }); - let write_format = decode_format(input.write_format, width, |new_spec, format_error| { - warn(Error::InvalidWriteFormat { - new_spec, - variable: name.clone(), - format_error, - }) - }); - let label = input - .label - .as_ref() - .map(|label| decoder.decode_string(&label.0, &warn)); - Ok(Some(VariableRecord { - width, - name, - print_format, - write_format, - missing_values: MissingValues::decode(decoder, &input.missing_values, warn), - label, - })) + } + Err(id_error) => { + let new_name = decoder.generate_name(); + warn(Error::InvalidVariableName { + id_error, + new_name: new_name.clone(), + }); + new_name + } + }; + let variable = Variable { + dict_index: decoder.n_dict_indexes, + short_name: name.clone(), + long_name: None, + width, + }; + decoder.n_dict_indexes += width.n_dict_indexes(); + assert!(decoder + .var_names + .insert(name.clone(), variable.dict_index) + .is_none()); + assert!(decoder + .variables + .insert(variable.dict_index, variable) + .is_none()); + + let print_format = decode_format(input.print_format, width, |new_spec, format_error| { + warn(Error::InvalidPrintFormat { + new_spec, + variable: name.clone(), + format_error, + }) + }); + let write_format = decode_format(input.write_format, width, |new_spec, format_error| { + warn(Error::InvalidWriteFormat { + new_spec, + variable: name.clone(), + format_error, + }) + }); + let mut variable = dictionary::Variable::new(name, width); + variable.print_format = print_format; + variable.write_format = write_format; + variable.missing_values = input.missing_values.clone(); + if let Some(ref label) = input.label { + variable.label = Some(label.to_string()); } + decoder.dictionary.add_var(variable).unwrap(); + Ok(()) } #[derive(Clone, Debug)] @@ -1284,56 +1310,6 @@ impl TryDecode for MultipleResponseRecord { } } -#[derive(Clone, Debug)] -pub struct LongStringMissingValues { - /// Variable name. - pub var_name: Identifier, - - /// Missing values. - pub missing_values: MissingValues, -} - -impl LongStringMissingValues { - fn decode( - decoder: &Decoder, - input: &raw::LongStringMissingValues>, - warn: &impl Fn(Error), - ) -> Result { - let var_name = decoder.decode_string(&input.var_name.0, warn); - let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) - .map_err(Error::InvalidLongStringValueLabelName)?; - - let missing_values = MissingValues::decode(decoder, &input.missing_values, warn); - - Ok(LongStringMissingValues { - var_name, - missing_values, - }) - } -} - -#[derive(Clone, Debug)] -pub struct LongStringMissingValuesRecord(Vec); - -impl TryDecode for LongStringMissingValuesRecord { - type Input<'a> = raw::LongStringMissingValueRecord>; - - fn try_decode( - decoder: &mut Decoder, - input: &Self::Input<'_>, - warn: impl Fn(Error), - ) -> Result, Error> { - let mut labels = Vec::with_capacity(input.0.len()); - for label in &input.0 { - match LongStringMissingValues::decode(decoder, label, &warn) { - Ok(set) => labels.push(set), - Err(error) => warn(error), - } - } - Ok(Some(LongStringMissingValuesRecord(labels))) - } -} - #[derive(Clone, Debug)] pub struct LongStringValueLabels { pub var_name: Identifier, diff --git a/rust/src/dictionary.rs b/rust/src/dictionary.rs index f9886641f7..e9eca118ab 100644 --- a/rust/src/dictionary.rs +++ b/rust/src/dictionary.rs @@ -8,10 +8,10 @@ use encoding_rs::Encoding; use indexmap::IndexSet; use crate::{ - cooked::{MissingValues, Value, VarWidth}, - format::Format, + cooked::{Value, VarWidth}, + format::Spec, identifier::{ByIdentifier, HasIdentifier, Identifier}, - raw::{CategoryLabels, Alignment, Measure}, + raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType}, }; pub type DictIndex = usize; @@ -50,6 +50,14 @@ impl Dictionary { } } + pub fn add_var(&mut self, variable: Variable) -> Result<(), ()> { + if self.variables.insert(ByIdentifier::new(variable)) { + Ok(()) + } else { + Err(()) + } + } + pub fn reorder_var(&mut self, from_index: DictIndex, to_index: DictIndex) { if from_index != to_index { self.variables.move_index(from_index, to_index); @@ -187,16 +195,48 @@ pub enum Role { Split, } +impl Default for Role { + fn default() -> Self { + Self::Input + } +} + +pub enum DictClass { + Ordinary, + System, + Scratch, +} + +impl DictClass { + pub fn from_identifier(id: &Identifier) -> Self { + if id.0.starts_with('$') { + Self::System + } else if id.0.starts_with('#') { + Self::Scratch + } else { + Self::Ordinary + } + } + + pub fn must_leave(self) -> bool { + match self { + DictClass::Ordinary => false, + DictClass::System => false, + DictClass::Scratch => true, + } + } +} + #[derive(Clone, Debug)] pub struct Variable { pub name: Identifier, pub width: VarWidth, pub missing_values: MissingValues, - pub print_format: Format, - pub write_format: Format, + pub print_format: Spec, + pub write_format: Spec, pub value_labels: HashMap, pub label: Option, - pub measure: Measure, + pub measure: Option, pub role: Role, pub display_width: u32, pub alignment: Alignment, @@ -205,6 +245,29 @@ pub struct Variable { pub attributes: HashSet>, } +impl Variable { + pub fn new(name: Identifier, width: VarWidth) -> Self { + let var_type = VarType::from_width(width); + let leave = DictClass::from_identifier(&name).must_leave(); + Self { + name, + width, + missing_values: MissingValues::default(), + print_format: Spec::default_for_width(width), + write_format: Spec::default_for_width(width), + value_labels: HashMap::new(), + label: None, + measure: Measure::default_for_type(var_type), + role: Role::default(), + display_width: width.default_display_width(), + alignment: Alignment::default_for_type(var_type), + leave, + short_names: Vec::new(), + attributes: HashSet::new() + } + } +} + impl HasIdentifier for Variable { fn identifier(&self) -> &Identifier { &self.name diff --git a/rust/src/main.rs b/rust/src/main.rs index 45d0622f0d..473062183b 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -17,7 +17,7 @@ use anyhow::Result; use clap::{Parser, ValueEnum}; use encoding_rs::Encoding; -use pspp::cooked::decode; +use pspp::cooked::{decode, encoding_from_headers}; use pspp::raw::{Reader, Record, Magic}; use std::fs::File; use std::io::BufReader; @@ -111,7 +111,8 @@ fn dissect(file_name: &Path, max_cases: u64, mode: Mode, encoding: Option<&'stat } Mode::Cooked => { let headers: Vec = reader.collect::, _>>()?; - let headers = decode(headers, encoding, &|e| eprintln!("{e}"))?; + let encoding = encoding_from_headers(&headers, &|e| eprintln!("{e}"))?; + let (headers, _) = decode(headers, encoding, &|e| eprintln!("{e}"))?; for header in headers { println!("{header:?}"); } diff --git a/rust/src/raw.rs b/rust/src/raw.rs index 544481906f..986bb92a52 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -1,4 +1,5 @@ use crate::{ + cooked::VarWidth, endian::{Endian, Parse, ToBytes}, identifier::{Error as IdError, Identifier}, }; @@ -185,6 +186,9 @@ pub enum Error { #[error("Invalid multiple response set variable name. {0}")] InvalidMrSetVariableName(IdError), + #[error("Invalid variable name in long string missing values record. {0}")] + InvalidLongStringMissingValueVariableName(IdError), + #[error("Details TBD")] TBD, } @@ -398,7 +402,7 @@ impl HeaderRecord { }) } - fn decode<'a>(&'a self, decoder: &Decoder) -> HeaderRecord> { + pub fn decode<'a>(&'a self, decoder: &Decoder) -> HeaderRecord> { let eye_catcher = decoder.decode(&self.eye_catcher); let file_label = decoder.decode(&self.file_label); let creation_date = decoder.decode(&self.creation_date); @@ -421,9 +425,9 @@ impl HeaderRecord { } } -struct Decoder { - encoding: &'static Encoding, - warn: Box, +pub struct Decoder { + pub encoding: &'static Encoding, + pub warn: Box, } impl Decoder { @@ -552,14 +556,14 @@ pub enum VarType { } impl VarType { - fn from_width(width: i32) -> VarType { + pub fn from_width(width: VarWidth) -> VarType { match width { - 0 => VarType::Numeric, - _ => VarType::String, + VarWidth::Numeric => Self::Numeric, + VarWidth::String(_) => Self::String, } } - fn opposite(self) -> VarType { + pub fn opposite(self) -> VarType { match self { Self::Numeric => Self::String, Self::String => Self::Numeric, @@ -848,7 +852,11 @@ where }; match record { Record::Variable(VariableRecord { width, .. }) => { - self.var_types.push(VarType::from_width(width)); + self.var_types.push(if width == 0 { + VarType::Numeric + } else { + VarType::String + }); } Record::EndOfHeaders(_) => { self.state = if let Some(Compression::ZLib) = self.header.compression { @@ -1016,7 +1024,7 @@ fn format_name(type_: u32) -> Cow<'static, str> { } #[derive(Clone)] -pub struct MissingValues +pub struct MissingValues where S: Debug, { @@ -1063,6 +1071,18 @@ where } } +impl Default for MissingValues +where + S: Debug, +{ + fn default() -> Self { + Self { + values: Vec::new(), + range: None, + } + } +} + impl MissingValues> { fn read( r: &mut R, @@ -1079,7 +1099,11 @@ impl MissingValues> { (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }), }; - let var_type = VarType::from_width(width); + let var_type = if width == 0 { + VarType::Numeric + } else { + VarType::String + }; let mut values = Vec::new(); for _ in 0..n_values { @@ -1209,7 +1233,7 @@ impl VariableRecord> { })) } - fn decode<'a>(&'a self, decoder: &Decoder) -> VariableRecord, String> { + pub fn decode<'a>(&'a self, decoder: &Decoder) -> VariableRecord, String> { VariableRecord { offsets: self.offsets.clone(), width: self.width, @@ -1506,7 +1530,7 @@ impl DocumentRecord { } } - fn decode<'a>(&'a self, decoder: &Decoder) -> DocumentRecord> { + pub fn decode<'a>(&'a self, decoder: &Decoder) -> DocumentRecord> { DocumentRecord { offsets: self.offsets.clone(), lines: self @@ -1792,6 +1816,13 @@ pub enum Measure { } impl Measure { + pub fn default_for_type(var_type: VarType) -> Option { + match var_type { + VarType::Numeric => None, + VarType::String => Some(Self::Nominal), + } + } + fn try_decode(source: u32) -> Result, Error> { match source { 0 => Ok(None), @@ -1820,6 +1851,13 @@ impl Alignment { _ => Err(Error::InvalidAlignment(source)), } } + + pub fn default_for_type(var_type: VarType) -> Self { + match var_type { + VarType::Numeric => Self::Right, + VarType::String => Self::Left, + } + } } #[derive(Clone, Debug)] @@ -1892,11 +1930,14 @@ where } impl LongStringMissingValues> { - fn decode<'a>(&self, decoder: &Decoder) -> LongStringMissingValues { - LongStringMissingValues { - var_name: decoder.decode(&self.var_name).to_string(), + fn decode<'a>( + &self, + decoder: &Decoder, + ) -> Result, IdError> { + Ok(LongStringMissingValues { + var_name: decoder.decode_identifier(&self.var_name)?, missing_values: self.missing_values.decode(decoder), - } + }) } } @@ -1959,8 +2000,21 @@ impl ExtensionRecord for LongStringMissingValueRecord> { } impl LongStringMissingValueRecord> { - fn decode<'a>(&self, decoder: &Decoder) -> LongStringMissingValueRecord { - LongStringMissingValueRecord(self.0.iter().map(|mv| mv.decode(decoder)).collect()) + pub fn decode<'a>( + &self, + decoder: &Decoder, + ) -> LongStringMissingValueRecord { + let mut mvs = Vec::with_capacity(self.0.len()); + for mv in self.0.iter() { + if let Some(mv) = mv + .decode(decoder) + .map_err(|err| Error::InvalidLongStringMissingValueVariableName(err)) + .warn_on_error(&decoder.warn) + { + mvs.push(mv); + } + } + LongStringMissingValueRecord(mvs) } } -- 2.30.2