1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range};
4 encoding::{default_encoding, get_encoding, Error as EncodingError},
6 format::{Error as FormatError, Spec, UncheckedSpec},
7 identifier::{Error as IdError, Identifier},
8 raw::{self, UnencodedStr, VarType},
10 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11 use encoding_rs::{DecoderResult, Encoding};
12 use num::integer::div_ceil;
13 use ordered_float::OrderedFloat;
14 use thiserror::Error as ThisError;
16 pub use crate::raw::{CategoryLabels, Compression};
18 #[derive(ThisError, Debug)]
20 // XXX this is really an internal error and maybe we should change the
21 // interfaces to make it impossible
22 #[error("Missing header record")]
26 EncodingError(EncodingError),
28 #[error("Using default encoding {0}.")]
29 UsingDefaultEncoding(String),
31 #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)]
32 InvalidVariableWidth { offsets: Range<u64>, width: i32 },
34 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
35 InvalidLongMissingValueFormat,
37 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
38 InvalidCreationDate { creation_date: String },
40 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
41 InvalidCreationTime { creation_time: String },
43 #[error("{id_error} Renaming variable to {new_name}.")]
50 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
55 format_error: FormatError,
59 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
64 format_error: FormatError,
67 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
68 DuplicateVariableName {
69 duplicate_name: Identifier,
73 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
74 InvalidDictIndex { dict_index: usize, max_index: usize },
76 #[error("Dictionary index {0} refers to a long string continuation.")]
77 DictIndexIsContinuation(usize),
79 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
80 ValueLabelsDifferentTypes {
81 numeric_var: Identifier,
82 string_var: Identifier,
86 "Value labels may not be added to long string variable {0} using record types 3 or 4."
88 InvalidLongStringValueLabel(Identifier),
90 #[error("Invalid multiple response set name. {0}")]
91 InvalidMrSetName(IdError),
93 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
94 UnknownMrSetVariable {
96 short_name: Identifier,
99 #[error("Multiple response set {0} has no variables.")]
100 EmptyMrSet(Identifier),
102 #[error("Multiple response set {0} has only one variable.")]
103 OneVarMrSet(Identifier),
105 #[error("Multiple response set {0} contains both string and numeric variables.")]
106 MixedMrSet(Identifier),
109 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
111 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
113 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
114 TooWideMDGroupCountedValue {
121 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
122 InvalidLongValueLabelWidth {
129 #[error("Invalid attribute name. {0}")]
130 InvalidAttributeName(IdError),
132 #[error("Invalid short name in long variable name record. {0}")]
133 InvalidShortName(IdError),
135 #[error("Invalid name in long variable name record. {0}")]
136 InvalidLongName(IdError),
138 #[error("Invalid variable name in very long string record. {0}")]
139 InvalidLongStringName(IdError),
141 #[error("Invalid variable name in long string value label record. {0}")]
142 InvalidLongStringValueLabelName(IdError),
144 #[error("Invalid variable name in attribute record. {0}")]
145 InvalidAttributeVariableName(IdError),
147 // XXX This is risky because `text` might be arbitarily long.
148 #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
149 MalformedString { encoding: String, text: String },
151 #[error("Invalid variable measurement level value {0}")]
152 InvalidMeasurement(u32),
154 #[error("Invalid variable display alignment value {0}")]
155 InvalidAlignment(u32),
157 #[error("Details TBD")]
161 #[derive(Clone, Debug)]
163 Header(HeaderRecord),
164 Variable(VariableRecord),
165 ValueLabel(ValueLabelRecord),
166 Document(DocumentRecord),
167 IntegerInfo(IntegerInfoRecord),
168 FloatInfo(FloatInfoRecord),
169 VariableSets(VariableSetRecord),
170 VarDisplay(VarDisplayRecord),
171 MultipleResponse(MultipleResponseRecord),
172 LongStringMissingValues(LongStringMissingValuesRecord),
173 LongStringValueLabels(LongStringValueLabelRecord),
174 Encoding(EncodingRecord),
175 NumberOfCases(NumberOfCasesRecord),
176 ProductInfo(ProductInfoRecord),
177 LongNames(LongNameRecord),
178 VeryLongStrings(VeryLongStringRecord),
179 FileAttributes(FileAttributeRecord),
180 VariableAttributes(VariableAttributeRecord),
181 OtherExtension(Extension),
184 //ZTrailer(ZTrailer),
188 pub use crate::raw::EncodingRecord;
189 pub use crate::raw::Extension;
190 pub use crate::raw::FloatInfoRecord;
191 pub use crate::raw::IntegerInfoRecord;
192 pub use crate::raw::NumberOfCasesRecord;
194 type DictIndex = usize;
196 pub struct Variable {
197 pub dict_index: DictIndex,
198 pub short_name: Identifier,
199 pub long_name: Option<Identifier>,
204 pub compression: Option<Compression>,
206 pub encoding: &'static Encoding,
207 pub variables: HashMap<DictIndex, Variable>,
208 pub var_names: HashMap<Identifier, DictIndex>,
209 n_dict_indexes: usize,
210 n_generated_names: usize,
214 headers: Vec<raw::Record>,
215 encoding: Option<&'static Encoding>,
216 warn: &impl Fn(Error),
217 ) -> Result<Vec<Record>, Error> {
218 let Some(header_record) = headers.iter().find_map(|rec| {
219 if let raw::Record::Header(header) = rec {
225 return Err(Error::MissingHeaderRecord);
227 let encoding = match encoding {
228 Some(encoding) => encoding,
230 let encoding = headers.iter().find_map(|rec| {
231 if let raw::Record::Encoding(ref e) = rec {
237 let character_code = headers.iter().find_map(|rec| {
238 if let raw::Record::IntegerInfo(ref r) = rec {
239 Some(r.character_code)
244 match get_encoding(encoding, character_code) {
245 Ok(encoding) => encoding,
246 Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
248 warn(Error::EncodingError(err));
249 // Warn that we're using the default encoding.
256 let mut decoder = Decoder {
257 compression: header_record.compression,
258 endian: header_record.endian,
260 variables: HashMap::new(),
261 var_names: HashMap::new(),
263 n_generated_names: 0,
266 let mut output = Vec::with_capacity(headers.len());
267 for header in &headers {
269 raw::Record::Header(ref input) => {
270 if let Some(header) = HeaderRecord::try_decode(&mut decoder, input, warn)? {
271 output.push(Record::Header(header))
274 raw::Record::Variable(ref input) => {
275 if let Some(variable) = VariableRecord::try_decode(&mut decoder, input, warn)? {
276 output.push(Record::Variable(variable));
279 raw::Record::ValueLabel(ref input) => {
280 if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, input, warn)?
282 output.push(Record::ValueLabel(value_label));
285 raw::Record::Document(ref input) => {
286 if let Some(document) = DocumentRecord::try_decode(&mut decoder, input, warn)? {
287 output.push(Record::Document(document))
290 raw::Record::IntegerInfo(ref input) => output.push(Record::IntegerInfo(input.clone())),
291 raw::Record::FloatInfo(ref input) => output.push(Record::FloatInfo(input.clone())),
292 raw::Record::VariableSets(ref input) => {
293 let s = decoder.decode_string_cow(&input.text.0, warn);
294 output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?));
296 raw::Record::VarDisplay(ref input) => {
297 if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, input, warn)? {
298 output.push(Record::VarDisplay(vdr))
301 raw::Record::MultipleResponse(ref input) => {
302 if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, input, warn)? {
303 output.push(Record::MultipleResponse(mrr))
306 raw::Record::LongStringMissingValues(ref input) => {
307 if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? {
308 output.push(Record::LongStringMissingValues(mrr))
311 raw::Record::LongStringValueLabels(ref input) => {
313 LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)?
315 output.push(Record::LongStringValueLabels(mrr))
318 raw::Record::Encoding(ref input) => output.push(Record::Encoding(input.clone())),
319 raw::Record::NumberOfCases(ref input) => {
320 output.push(Record::NumberOfCases(input.clone()))
322 raw::Record::ProductInfo(ref input) => {
323 let s = decoder.decode_string_cow(&input.text.0, warn);
324 output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?));
326 raw::Record::LongNames(ref input) => {
327 let s = decoder.decode_string_cow(&input.text.0, warn);
328 output.push(Record::LongNames(LongNameRecord::parse(
334 raw::Record::VeryLongStrings(ref input) => {
335 let s = decoder.decode_string_cow(&input.text.0, warn);
336 output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
340 raw::Record::FileAttributes(ref input) => {
341 let s = decoder.decode_string_cow(&input.text.0, warn);
342 output.push(Record::FileAttributes(FileAttributeRecord::parse(
346 raw::Record::VariableAttributes(ref input) => {
347 let s = decoder.decode_string_cow(&input.text.0, warn);
348 output.push(Record::VariableAttributes(VariableAttributeRecord::parse(
352 raw::Record::OtherExtension(ref input) => {
353 output.push(Record::OtherExtension(input.clone()))
355 raw::Record::EndOfHeaders(_) => (),
356 raw::Record::ZHeader(_) => (),
357 raw::Record::ZTrailer(_) => (),
358 raw::Record::Case(_) => (),
365 fn generate_name(&mut self) -> Identifier {
367 self.n_generated_names += 1;
368 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
370 if !self.var_names.contains_key(&name) {
373 assert!(self.n_generated_names < usize::MAX);
376 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
377 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
379 warn(Error::MalformedString {
380 encoding: self.encoding.name().into(),
381 text: output.clone().into(),
386 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
387 self.decode_string_cow(input, warn).into()
389 pub fn decode_identifier(
392 warn: &impl Fn(Error),
393 ) -> Result<Identifier, IdError> {
394 let s = self.decode_string_cow(input, warn);
395 Identifier::new(&s, self.encoding)
397 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
398 let max_index = self.n_dict_indexes;
399 if dict_index == 0 || dict_index > max_index {
400 return Err(Error::InvalidDictIndex {
405 let Some(variable) = self.variables.get(&(dict_index - 1)) else {
406 return Err(Error::DictIndexIsContinuation(dict_index));
411 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
412 /// re-encoding the result back into `self.encoding` will have exactly the
413 /// same length in bytes.
415 /// XXX warn about errors?
416 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
417 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
418 // This is the common case. Usually there will be no errors.
421 // Unusual case. Don't bother to optimize it much.
422 let mut decoder = self.encoding.new_decoder_without_bom_handling();
423 let mut output = String::with_capacity(
425 .max_utf8_buffer_length_without_replacement(input.len())
428 let mut rest = input;
429 while !rest.is_empty() {
430 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
431 (DecoderResult::InputEmpty, _) => break,
432 (DecoderResult::OutputFull, _) => unreachable!(),
433 (DecoderResult::Malformed(a, b), consumed) => {
434 let skipped = a as usize + b as usize;
435 output.extend(repeat('?').take(skipped));
436 rest = &rest[consumed..];
440 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
446 pub trait TryDecode: Sized {
449 decoder: &mut Decoder,
451 warn: impl Fn(Error),
452 ) -> Result<Option<Self>, Error>;
455 pub trait Decode<Input>: Sized {
456 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
459 impl<const N: usize> Decode<UnencodedStr<N>> for String {
460 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
461 decoder.decode_string(&input.0, &warn)
465 #[derive(Clone, Debug)]
466 pub struct HeaderRecord {
467 pub eye_catcher: String,
468 pub weight_index: Option<usize>,
469 pub n_cases: Option<u64>,
470 pub creation: NaiveDateTime,
471 pub file_label: String,
474 fn trim_end_spaces(mut s: String) -> String {
475 s.truncate(s.trim_end_matches(' ').len());
479 impl TryDecode for HeaderRecord {
480 type Input = crate::raw::HeaderRecord;
483 decoder: &mut Decoder,
485 warn: impl Fn(Error),
486 ) -> Result<Option<Self>, Error> {
487 let eye_catcher = trim_end_spaces(decoder.decode_string(&input.eye_catcher.0, &warn));
488 let file_label = trim_end_spaces(decoder.decode_string(&input.file_label.0, &warn));
489 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
491 NaiveDate::parse_from_str(&creation_date, "%e %b %Y").unwrap_or_else(|_| {
492 warn(Error::InvalidCreationDate {
493 creation_date: creation_date.into(),
497 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
499 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
500 warn(Error::InvalidCreationTime {
501 creation_time: creation_time.into(),
505 Ok(Some(HeaderRecord {
507 weight_index: input.weight_index.map(|n| n as usize),
508 n_cases: input.n_cases.map(|n| n as u64),
509 creation: NaiveDateTime::new(creation_date, creation_time),
515 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
521 impl PartialOrd for VarWidth {
522 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
523 match (self, other) {
524 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
525 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
532 const MAX_STRING: u16 = 32767;
534 fn n_dict_indexes(self) -> usize {
536 VarWidth::Numeric => 1,
537 VarWidth::String(w) => div_ceil(w as usize, 8),
544 f: impl Fn(u16, u16) -> u16,
545 ) -> Option<VarWidth> {
547 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
548 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
549 Some(VarWidth::String(f(a, b)))
555 /// Returns the wider of `self` and `other`:
556 /// - Numerical variable widths are equally wide.
557 /// - Longer strings are wider than shorter strings.
558 /// - Numerical and string types are incomparable, so result in `None`.
559 /// - Any `None` in the input yields `None` in the output.
560 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
561 Self::width_predicate(a, b, |a, b| a.max(b))
564 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
565 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
566 Self::width_predicate(a, b, |a, b| a.min(b))
570 impl From<VarWidth> for VarType {
571 fn from(source: VarWidth) -> Self {
573 VarWidth::Numeric => VarType::Numeric,
574 VarWidth::String(_) => VarType::String,
579 #[derive(Clone, Debug)]
580 pub struct VariableRecord {
582 pub name: Identifier,
583 pub print_format: Spec,
584 pub write_format: Spec,
585 pub missing_values: MissingValues,
586 pub label: Option<String>,
589 #[derive(Clone, Debug)]
590 pub struct MissingValues {
591 /// Individual missing values, up to 3 of them.
592 pub values: Vec<Value>,
594 /// Optional range of missing values.
595 pub range: Option<(Value, Value)>,
598 impl Decode<raw::MissingValues> for MissingValues {
599 fn decode(decoder: &Decoder, input: &raw::MissingValues, _warn: impl Fn(Error)) -> Self {
604 .map(|value| Value::decode(value, decoder))
609 .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))),
614 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
615 UncheckedSpec::try_from(raw)
616 .and_then(Spec::try_from)
617 .and_then(|x| x.check_width_compatibility(width))
618 .unwrap_or_else(|error| {
619 let new_format = Spec::default_for_width(width);
620 warn(new_format, error);
625 impl TryDecode for VariableRecord {
626 type Input = raw::VariableRecord;
629 decoder: &mut Decoder,
630 input: &crate::raw::VariableRecord,
631 warn: impl Fn(Error),
632 ) -> Result<Option<VariableRecord>, Error> {
633 let width = match input.width {
634 0 => VarWidth::Numeric,
635 w @ 1..=255 => VarWidth::String(w as u16),
636 -1 => return Ok(None),
638 return Err(Error::InvalidVariableWidth {
639 offsets: input.offsets.clone(),
644 let name = trim_end_spaces(decoder.decode_string(&input.name.0, &warn));
645 let name = match Identifier::new(&name, decoder.encoding) {
647 if !decoder.var_names.contains_key(&name) {
650 let new_name = decoder.generate_name();
651 warn(Error::DuplicateVariableName {
652 duplicate_name: name.clone(),
653 new_name: new_name.clone(),
659 let new_name = decoder.generate_name();
660 warn(Error::InvalidVariableName {
662 new_name: new_name.clone(),
667 let variable = Variable {
668 dict_index: decoder.n_dict_indexes,
669 short_name: name.clone(),
673 decoder.n_dict_indexes += width.n_dict_indexes();
676 .insert(name.clone(), variable.dict_index)
680 .insert(variable.dict_index, variable)
683 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
684 warn(Error::InvalidPrintFormat {
686 variable: name.clone(),
690 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
691 warn(Error::InvalidWriteFormat {
693 variable: name.clone(),
700 .map(|label| decoder.decode_string(&label.0, &warn));
701 Ok(Some(VariableRecord {
706 missing_values: MissingValues::decode(decoder, &input.missing_values, warn),
712 #[derive(Clone, Debug)]
713 pub struct DocumentRecord(Vec<String>);
715 impl TryDecode for DocumentRecord {
716 type Input = crate::raw::DocumentRecord;
719 decoder: &mut Decoder,
721 warn: impl Fn(Error),
722 ) -> Result<Option<Self>, Error> {
723 Ok(Some(DocumentRecord(
727 .map(|s| trim_end_spaces(decoder.decode_string(&s.0, &warn)))
737 const NAME: &'static str;
738 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
741 #[derive(Clone, Debug)]
742 pub struct VariableSet {
744 pub vars: Vec<String>,
748 fn parse(input: &str) -> Result<Self, Error> {
749 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
750 let vars = input.split_ascii_whitespace().map(String::from).collect();
758 trait WarnOnError<T> {
759 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
761 impl<T> WarnOnError<T> for Result<T, Error> {
762 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
764 Ok(result) => Some(result),
773 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
775 Number(Option<OrderedFloat<f64>>),
780 pub fn decode(raw: &raw::Value, decoder: &Decoder) -> Self {
782 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
783 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
788 #[derive(Clone, Debug)]
789 pub struct ValueLabel {
794 #[derive(Clone, Debug)]
795 pub struct ValueLabelRecord {
796 pub var_type: VarType,
797 pub labels: Vec<ValueLabel>,
798 pub variables: Vec<Identifier>,
801 impl TryDecode for ValueLabelRecord {
802 type Input = crate::raw::ValueLabelRecord;
804 decoder: &mut Decoder,
806 warn: impl Fn(Error),
807 ) -> Result<Option<ValueLabelRecord>, Error> {
808 let variables: Vec<&Variable> = input
811 .filter_map(|&dict_index| {
813 .get_var_by_index(dict_index as usize)
814 .warn_on_error(&warn)
816 .filter(|&variable| match variable.width {
817 VarWidth::String(width) if width > 8 => {
818 warn(Error::InvalidLongStringValueLabel(
819 variable.short_name.clone(),
826 let mut i = variables.iter();
827 let Some(&first_var) = i.next() else {
830 let var_type: VarType = first_var.width.into();
832 let this_type: VarType = variable.width.into();
833 if var_type != this_type {
834 let (numeric_var, string_var) = match var_type {
835 VarType::Numeric => (first_var, variable),
836 VarType::String => (variable, first_var),
838 warn(Error::ValueLabelsDifferentTypes {
839 numeric_var: numeric_var.short_name.clone(),
840 string_var: string_var.short_name.clone(),
848 .map(|(value, label)| {
849 let label = decoder.decode_string(&label.0, &warn);
850 let value = Value::decode(
851 &raw::Value::from_raw(value, var_type, decoder.endian),
854 ValueLabel { value, label }
857 let variables = variables
859 .map(|&variable| variable.short_name.clone())
861 Ok(Some(ValueLabelRecord {
869 #[derive(Clone, Debug)]
870 pub struct VariableSetRecord(Vec<VariableSet>);
872 impl TextRecord for VariableSetRecord {
873 const NAME: &'static str = "variable set";
874 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
875 let mut sets = Vec::new();
876 for line in input.lines() {
877 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
881 Ok(VariableSetRecord(sets))
885 #[derive(Clone, Debug)]
886 pub struct ProductInfoRecord(pub String);
888 impl TextRecord for ProductInfoRecord {
889 const NAME: &'static str = "extra product info";
890 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
891 Ok(ProductInfoRecord(input.into()))
895 #[derive(Clone, Debug)]
896 pub struct LongName {
897 pub short_name: Identifier,
898 pub long_name: Identifier,
902 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
904 Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?;
906 Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?;
914 #[derive(Clone, Debug)]
915 pub struct LongNameRecord(Vec<LongName>);
917 impl LongNameRecord {
918 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
919 let mut names = Vec::new();
920 for pair in input.split('\t').filter(|s| !s.is_empty()) {
921 if let Some((short_name, long_name)) = pair.split_once('=') {
922 if let Some(long_name) =
923 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
925 names.push(long_name);
931 Ok(LongNameRecord(names))
935 #[derive(Clone, Debug)]
936 pub struct VeryLongString {
937 pub short_name: Identifier,
941 impl VeryLongString {
942 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
943 let Some((short_name, length)) = input.split_once('=') else {
944 return Err(Error::TBD);
947 Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?;
948 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
949 if length > VarWidth::MAX_STRING {
950 return Err(Error::TBD);
952 Ok(VeryLongString { short_name, length })
956 #[derive(Clone, Debug)]
957 pub struct VeryLongStringRecord(Vec<VeryLongString>);
959 impl VeryLongStringRecord {
960 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
961 let mut very_long_strings = Vec::new();
964 .map(|s| s.trim_end_matches('\t'))
965 .filter(|s| !s.is_empty())
967 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
968 very_long_strings.push(vls)
971 Ok(VeryLongStringRecord(very_long_strings))
975 #[derive(Clone, Debug)]
976 pub struct Attribute {
977 pub name: Identifier,
978 pub values: Vec<String>,
985 warn: &impl Fn(Error),
986 ) -> Result<(Option<Attribute>, &'a str), Error> {
987 let Some((name, mut input)) = input.split_once('(') else {
988 return Err(Error::TBD);
990 let mut values = Vec::new();
992 let Some((value, rest)) = input.split_once('\n') else {
993 return Err(Error::TBD);
995 if let Some(stripped) = value
997 .and_then(|value| value.strip_suffix('\''))
999 values.push(stripped.into());
1002 values.push(value.into());
1004 if let Some(rest) = rest.strip_prefix(')') {
1005 let attribute = Identifier::new(name, decoder.encoding)
1006 .map_err(Error::InvalidAttributeName)
1007 .warn_on_error(warn)
1008 .map(|name| Attribute { name, values });
1009 return Ok((attribute, rest));
1016 #[derive(Clone, Debug)]
1017 pub struct AttributeSet(pub Vec<Attribute>);
1023 sentinel: Option<char>,
1024 warn: &impl Fn(Error),
1025 ) -> Result<(AttributeSet, &'a str), Error> {
1026 let mut attributes = Vec::new();
1028 match input.chars().next() {
1029 None => break input,
1030 c if c == sentinel => break &input[1..],
1032 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
1033 if let Some(attribute) = attribute {
1034 attributes.push(attribute);
1040 Ok((AttributeSet(attributes), rest))
1044 #[derive(Clone, Debug)]
1045 pub struct FileAttributeRecord(AttributeSet);
1047 impl FileAttributeRecord {
1048 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1049 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
1050 if !rest.is_empty() {
1053 Ok(FileAttributeRecord(set))
1057 #[derive(Clone, Debug)]
1058 pub struct VarAttributeSet {
1059 pub long_var_name: Identifier,
1060 pub attributes: AttributeSet,
1063 impl VarAttributeSet {
1067 warn: &impl Fn(Error),
1068 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
1069 let Some((long_var_name, rest)) = input.split_once(':') else {
1070 return Err(Error::TBD);
1072 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
1073 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
1074 .map_err(Error::InvalidAttributeVariableName)
1075 .warn_on_error(warn)
1076 .map(|name| VarAttributeSet {
1077 long_var_name: name,
1080 Ok((var_attribute, rest))
1084 #[derive(Clone, Debug)]
1085 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
1087 impl VariableAttributeRecord {
1088 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
1089 let mut var_attribute_sets = Vec::new();
1090 while !input.is_empty() {
1091 let Some((var_attribute, rest)) =
1092 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
1096 if let Some(var_attribute) = var_attribute {
1097 var_attribute_sets.push(var_attribute);
1101 Ok(VariableAttributeRecord(var_attribute_sets))
1105 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1113 fn try_decode(source: u32) -> Result<Option<Measure>, Error> {
1116 1 => Ok(Some(Measure::Nominal)),
1117 2 => Ok(Some(Measure::Ordinal)),
1118 3 => Ok(Some(Measure::Scale)),
1119 _ => Err(Error::InvalidMeasurement(source)),
1124 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
1125 pub enum Alignment {
1132 fn try_decode(source: u32) -> Result<Option<Alignment>, Error> {
1135 1 => Ok(Some(Alignment::Left)),
1136 2 => Ok(Some(Alignment::Right)),
1137 3 => Ok(Some(Alignment::Center)),
1138 _ => Err(Error::InvalidAlignment(source)),
1143 #[derive(Clone, Debug)]
1144 pub struct VarDisplay {
1145 pub measure: Option<Measure>,
1146 pub width: Option<u32>,
1147 pub alignment: Option<Alignment>,
1150 #[derive(Clone, Debug)]
1151 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
1153 impl TryDecode for VarDisplayRecord {
1154 type Input = raw::VarDisplayRecord;
1156 decoder: &mut Decoder,
1157 input: &Self::Input,
1158 warn: impl Fn(Error),
1159 ) -> Result<Option<Self>, Error> {
1160 let n_vars = decoder.variables.len();
1161 let n_per_var = if input.0.len() == 3 * n_vars {
1163 } else if input.0.len() == 2 * n_vars {
1166 return Err(Error::TBD);
1169 let var_displays = input
1173 let (measure, width, alignment) = match n_per_var == 3 {
1174 true => (chunk[0], Some(chunk[1]), chunk[2]),
1175 false => (chunk[0], None, chunk[1]),
1177 let measure = Measure::try_decode(measure).warn_on_error(&warn).flatten();
1178 let alignment = Alignment::try_decode(alignment)
1179 .warn_on_error(&warn)
1188 Ok(Some(VarDisplayRecord(var_displays)))
1192 #[derive(Clone, Debug)]
1193 pub enum MultipleResponseType {
1196 labels: CategoryLabels,
1201 impl MultipleResponseType {
1204 mr_set: &Identifier,
1205 input: &raw::MultipleResponseType,
1206 min_width: VarWidth,
1207 warn: &impl Fn(Error),
1208 ) -> Result<Self, Error> {
1209 let mr_type = match input {
1210 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
1211 let value = decoder.decode_string_cow(&value.0, warn);
1212 let value = match min_width {
1213 VarWidth::Numeric => {
1214 let number: f64 = value.trim().parse().map_err(|_| {
1215 Error::InvalidMDGroupCountedValue {
1216 mr_set: mr_set.clone(),
1217 number: value.into(),
1220 Value::Number(Some(number.into()))
1222 VarWidth::String(max_width) => {
1223 let value = value.trim_end_matches(' ');
1224 let width = value.len();
1225 if width > max_width as usize {
1226 return Err(Error::TooWideMDGroupCountedValue {
1227 mr_set: mr_set.clone(),
1228 value: value.into(),
1233 Value::String(value.into())
1236 MultipleResponseType::MultipleDichotomy {
1241 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
1247 #[derive(Clone, Debug)]
1248 pub struct MultipleResponseSet {
1249 pub name: Identifier,
1250 pub min_width: VarWidth,
1251 pub max_width: VarWidth,
1253 pub mr_type: MultipleResponseType,
1254 pub dict_indexes: Vec<DictIndex>,
1257 impl MultipleResponseSet {
1260 input: &raw::MultipleResponseSet,
1261 warn: &impl Fn(Error),
1262 ) -> Result<Self, Error> {
1263 let mr_set_name = decoder
1264 .decode_identifier(&input.name.0, warn)
1265 .map_err(Error::InvalidMrSetName)?;
1267 let label = decoder.decode_string(&input.label.0, warn);
1269 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
1270 for short_name in input.short_names.iter() {
1271 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1274 warn(Error::InvalidMrSetName(error));
1278 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1279 warn(Error::UnknownMrSetVariable {
1280 mr_set: mr_set_name.clone(),
1281 short_name: short_name.clone(),
1285 dict_indexes.push(dict_index);
1288 match dict_indexes.len() {
1289 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1290 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1294 let Some((Some(min_width), Some(max_width))) = dict_indexes
1296 .map(|dict_index| decoder.variables[dict_index].width)
1297 .map(|w| (Some(w), Some(w)))
1298 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1300 return Err(Error::MixedMrSet(mr_set_name));
1304 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1306 Ok(MultipleResponseSet {
1317 #[derive(Clone, Debug)]
1318 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1320 impl TryDecode for MultipleResponseRecord {
1321 type Input = raw::MultipleResponseRecord;
1324 decoder: &mut Decoder,
1325 input: &Self::Input,
1326 warn: impl Fn(Error),
1327 ) -> Result<Option<Self>, Error> {
1328 let mut sets = Vec::with_capacity(input.0.len());
1329 for set in &input.0 {
1330 match MultipleResponseSet::decode(decoder, set, &warn) {
1331 Ok(set) => sets.push(set),
1332 Err(error) => warn(error),
1335 Ok(Some(MultipleResponseRecord(sets)))
1339 #[derive(Clone, Debug)]
1340 pub struct LongStringMissingValues {
1342 pub var_name: Identifier,
1345 pub missing_values: MissingValues,
1348 impl LongStringMissingValues {
1351 input: &raw::LongStringMissingValues,
1352 warn: &impl Fn(Error),
1353 ) -> Result<Self, Error> {
1354 let var_name = decoder.decode_string(&input.var_name.0, warn);
1355 let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
1356 .map_err(Error::InvalidLongStringValueLabelName)?;
1358 let missing_values = MissingValues::decode(decoder, &input.missing_values, warn);
1360 Ok(LongStringMissingValues {
1367 #[derive(Clone, Debug)]
1368 pub struct LongStringMissingValuesRecord(Vec<LongStringMissingValues>);
1370 impl TryDecode for LongStringMissingValuesRecord {
1371 type Input = raw::LongStringMissingValueSet;
1374 decoder: &mut Decoder,
1375 input: &Self::Input,
1376 warn: impl Fn(Error),
1377 ) -> Result<Option<Self>, Error> {
1378 let mut labels = Vec::with_capacity(input.0.len());
1379 for label in &input.0 {
1380 match LongStringMissingValues::decode(decoder, label, &warn) {
1381 Ok(set) => labels.push(set),
1382 Err(error) => warn(error),
1385 Ok(Some(LongStringMissingValuesRecord(labels)))
1389 #[derive(Clone, Debug)]
1390 pub struct LongStringValueLabels {
1391 pub var_name: Identifier,
1392 pub width: VarWidth,
1393 pub labels: Vec<ValueLabel>,
1396 impl LongStringValueLabels {
1399 input: &raw::LongStringValueLabels,
1400 warn: &impl Fn(Error),
1401 ) -> Result<Self, Error> {
1402 let var_name = decoder.decode_string(&input.var_name.0, warn);
1403 let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
1404 .map_err(Error::InvalidLongStringValueLabelName)?;
1407 let max_width = VarWidth::MAX_STRING;
1408 if input.width < 9 || input.width > max_width as u32 {
1409 return Err(Error::InvalidLongValueLabelWidth {
1416 let width = input.width as u16;
1418 let mut labels = Vec::with_capacity(input.labels.len());
1419 for (value, label) in input.labels.iter() {
1420 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1421 let label = decoder.decode_string(&label.0, warn);
1422 labels.push(ValueLabel { value, label });
1425 Ok(LongStringValueLabels {
1427 width: VarWidth::String(width),
1433 #[derive(Clone, Debug)]
1434 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1436 impl TryDecode for LongStringValueLabelRecord {
1437 type Input = raw::LongStringValueLabelRecord;
1440 decoder: &mut Decoder,
1441 input: &Self::Input,
1442 warn: impl Fn(Error),
1443 ) -> Result<Option<Self>, Error> {
1444 let mut labels = Vec::with_capacity(input.0.len());
1445 for label in &input.0 {
1446 match LongStringValueLabels::decode(decoder, label, &warn) {
1447 Ok(set) => labels.push(set),
1448 Err(error) => warn(error),
1451 Ok(Some(LongStringValueLabelRecord(labels)))
1457 use encoding_rs::WINDOWS_1252;
1461 let mut s = String::new();
1462 s.push(char::REPLACEMENT_CHARACTER);
1463 let encoded = WINDOWS_1252.encode(&s).0;
1464 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1465 println!("{:?}", decoded);
1470 let charset: Vec<u8> = (0..=255).collect();
1471 println!("{}", charset.len());
1472 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1473 println!("{}", decoded.len());
1474 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1475 println!("{}", encoded.len());
1476 assert_eq!(&charset[..], &encoded[..]);