1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
4 encoding::{get_encoding, Error as EncodingError, default_encoding},
6 format::{Error as FormatError, Spec, UncheckedSpec},
7 identifier::{Error as IdError, Identifier},
8 raw::{self, MissingValues, UnencodedStr, VarType},
10 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11 use encoding_rs::{DecoderResult, Encoding};
12 use num::integer::div_ceil;
13 use ordered_float::OrderedFloat;
14 use thiserror::Error as ThisError;
16 pub use crate::raw::{CategoryLabels, Compression};
18 #[derive(ThisError, Debug)]
20 // XXX this is really an internal error and maybe we should change the
21 // interfaces to make it impossible
22 #[error("Missing header record")]
26 EncodingError(EncodingError),
28 #[error("Using default encoding {0}.")]
29 UsingDefaultEncoding(String),
31 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
32 InvalidVariableWidth { offset: u64, width: i32 },
34 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
35 InvalidLongMissingValueFormat,
37 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
38 InvalidCreationDate { creation_date: String },
40 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
41 InvalidCreationTime { creation_time: String },
43 #[error("{id_error} Renaming variable to {new_name}.")]
50 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
55 format_error: FormatError,
59 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
64 format_error: FormatError,
67 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
68 DuplicateVariableName {
69 duplicate_name: Identifier,
73 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
74 InvalidDictIndex { dict_index: usize, max_index: usize },
76 #[error("Dictionary index {0} refers to a long string continuation.")]
77 DictIndexIsContinuation(usize),
79 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
80 ValueLabelsDifferentTypes {
81 numeric_var: Identifier,
82 string_var: Identifier,
86 "Value labels may not be added to long string variable {0} using record types 3 or 4."
88 InvalidLongStringValueLabel(Identifier),
90 #[error("Invalid multiple response set name. {0}")]
91 InvalidMrSetName(IdError),
93 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
94 UnknownMrSetVariable {
96 short_name: Identifier,
99 #[error("Multiple response set {0} has no variables.")]
100 EmptyMrSet(Identifier),
102 #[error("Multiple response set {0} has only one variable.")]
103 OneVarMrSet(Identifier),
105 #[error("Multiple response set {0} contains both string and numeric variables.")]
106 MixedMrSet(Identifier),
109 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
111 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
113 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
114 TooWideMDGroupCountedValue {
121 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
122 InvalidLongValueLabelWidth {
129 #[error("Invalid attribute name. {0}")]
130 InvalidAttributeName(IdError),
132 #[error("Invalid short name in long variable name record. {0}")]
133 InvalidShortName(IdError),
135 #[error("Invalid name in long variable name record. {0}")]
136 InvalidLongName(IdError),
138 #[error("Invalid variable name in very long string record. {0}")]
139 InvalidLongStringName(IdError),
141 #[error("Invalid variable name in long string value label record. {0}")]
142 InvalidLongStringValueLabelName(IdError),
144 #[error("Invalid variable name in attribute record. {0}")]
145 InvalidAttributeVariableName(IdError),
147 #[error("Details TBD")]
151 #[derive(Clone, Debug)]
153 Header(HeaderRecord),
154 Variable(VariableRecord),
155 ValueLabel(ValueLabelRecord),
156 Document(DocumentRecord),
157 IntegerInfo(IntegerInfoRecord),
158 FloatInfo(FloatInfoRecord),
159 VariableSets(VariableSetRecord),
160 VarDisplay(VarDisplayRecord),
161 MultipleResponse(MultipleResponseRecord),
162 LongStringValueLabels(LongStringValueLabelRecord),
163 Encoding(EncodingRecord),
164 NumberOfCases(NumberOfCasesRecord),
165 ProductInfo(ProductInfoRecord),
166 LongNames(LongNameRecord),
167 VeryLongStrings(VeryLongStringRecord),
168 FileAttributes(FileAttributeRecord),
169 VariableAttributes(VariableAttributeRecord),
170 OtherExtension(Extension),
173 //ZTrailer(ZTrailer),
177 pub use crate::raw::EncodingRecord;
178 pub use crate::raw::Extension;
179 pub use crate::raw::FloatInfoRecord;
180 pub use crate::raw::IntegerInfoRecord;
181 pub use crate::raw::NumberOfCasesRecord;
183 type DictIndex = usize;
185 pub struct Variable {
186 pub dict_index: DictIndex,
187 pub short_name: Identifier,
188 pub long_name: Option<Identifier>,
193 pub compression: Option<Compression>,
195 pub encoding: &'static Encoding,
196 pub variables: HashMap<DictIndex, Variable>,
197 pub var_names: HashMap<Identifier, DictIndex>,
198 n_dict_indexes: usize,
199 n_generated_names: usize,
202 pub fn decode<T>(headers: Vec<raw::Record>, warn: &impl Fn(Error)) -> Result<Vec<Record>, Error> {
203 let Some(header_record) = headers.iter().find_map(|rec| {
204 if let raw::Record::Header(header) = rec {
210 return Err(Error::MissingHeaderRecord);
212 let encoding = headers.iter().find_map(|rec| {
213 if let raw::Record::Encoding(ref e) = rec {
219 let character_code = headers.iter().find_map(|rec| {
220 if let raw::Record::IntegerInfo(ref r) = rec {
221 Some(r.character_code)
226 let encoding = match get_encoding(encoding, character_code) {
227 Ok(encoding) => encoding,
228 Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
230 warn(Error::EncodingError(err));
231 // Warn that we're using the default encoding.
236 let decoder = Decoder {
237 compression: header_record.compression,
238 endian: header_record.endian,
240 variables: HashMap::new(),
241 var_names: HashMap::new(),
243 n_generated_names: 0,
250 fn generate_name(&mut self) -> Identifier {
252 self.n_generated_names += 1;
253 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
255 if !self.var_names.contains_key(&name) {
258 assert!(self.n_generated_names < usize::MAX);
261 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
262 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
268 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
269 self.decode_string_cow(input, warn).into()
271 pub fn decode_identifier(
274 warn: &impl Fn(Error),
275 ) -> Result<Identifier, IdError> {
276 let s = self.decode_string_cow(input, warn);
277 Identifier::new(&s, self.encoding)
279 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
280 let max_index = self.n_dict_indexes - 1;
281 if dict_index == 0 || dict_index as usize > max_index {
282 return Err(Error::InvalidDictIndex {
287 let Some(variable) = self.variables.get(&dict_index) else {
288 return Err(Error::DictIndexIsContinuation(dict_index));
293 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
294 /// re-encoding the result back into `self.encoding` will have exactly the
295 /// same length in bytes.
297 /// XXX warn about errors?
298 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
299 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
300 // This is the common case. Usually there will be no errors.
303 // Unusual case. Don't bother to optimize it much.
304 let mut decoder = self.encoding.new_decoder_without_bom_handling();
305 let mut output = String::with_capacity(
307 .max_utf8_buffer_length_without_replacement(input.len())
310 let mut rest = input;
311 while !rest.is_empty() {
312 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
313 (DecoderResult::InputEmpty, _) => break,
314 (DecoderResult::OutputFull, _) => unreachable!(),
315 (DecoderResult::Malformed(a, b), consumed) => {
316 let skipped = a as usize + b as usize;
317 output.extend(repeat('?').take(skipped));
318 rest = &rest[consumed..];
322 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
328 pub trait TryDecode: Sized {
333 warn: impl Fn(Error),
334 ) -> Result<Self, Error>;
337 pub trait Decode<Input>: Sized {
338 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
341 impl<const N: usize> Decode<UnencodedStr<N>> for String {
342 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
343 decoder.decode_string(&input.0, &warn)
347 #[derive(Clone, Debug)]
348 pub struct HeaderRecord {
349 pub eye_catcher: String,
350 pub weight_index: Option<usize>,
351 pub n_cases: Option<u64>,
352 pub creation: NaiveDateTime,
353 pub file_label: String,
356 impl TryDecode for HeaderRecord {
357 type Input = crate::raw::HeaderRecord;
362 warn: impl Fn(Error),
363 ) -> Result<Self, Error> {
364 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
365 let file_label = decoder.decode_string(&input.file_label.0, &warn);
366 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
367 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
368 warn(Error::InvalidCreationDate {
369 creation_date: creation_date.into(),
373 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
375 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
376 warn(Error::InvalidCreationTime {
377 creation_time: creation_time.into(),
383 weight_index: input.weight_index.map(|n| n as usize),
384 n_cases: input.n_cases.map(|n| n as u64),
385 creation: NaiveDateTime::new(creation_date, creation_time),
391 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
397 impl PartialOrd for VarWidth {
398 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
399 match (self, other) {
400 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
401 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
408 const MAX_STRING: u16 = 32767;
410 fn n_dict_indexes(self) -> usize {
412 VarWidth::Numeric => 1,
413 VarWidth::String(w) => div_ceil(w as usize, 8),
420 f: impl Fn(u16, u16) -> u16,
421 ) -> Option<VarWidth> {
423 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
424 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
425 Some(VarWidth::String(f(a, b)))
431 /// Returns the wider of `self` and `other`:
432 /// - Numerical variable widths are equally wide.
433 /// - Longer strings are wider than shorter strings.
434 /// - Numerical and string types are incomparable, so result in `None`.
435 /// - Any `None` in the input yields `None` in the output.
436 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
437 Self::width_predicate(a, b, |a, b| a.max(b))
440 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
441 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
442 Self::width_predicate(a, b, |a, b| a.min(b))
446 impl From<VarWidth> for VarType {
447 fn from(source: VarWidth) -> Self {
449 VarWidth::Numeric => VarType::Numeric,
450 VarWidth::String(_) => VarType::String,
455 #[derive(Clone, Debug)]
456 pub struct VariableRecord {
458 pub name: Identifier,
459 pub print_format: Spec,
460 pub write_format: Spec,
461 pub missing_values: MissingValues,
462 pub label: Option<String>,
465 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
466 UncheckedSpec::try_from(raw)
467 .and_then(Spec::try_from)
468 .and_then(|x| x.check_width_compatibility(width))
469 .unwrap_or_else(|error| {
470 let new_format = Spec::default_for_width(width);
471 warn(new_format, error);
476 impl VariableRecord {
478 decoder: &mut Decoder,
479 input: &crate::raw::VariableRecord,
480 warn: impl Fn(Error),
481 ) -> Result<Option<VariableRecord>, Error> {
482 let width = match input.width {
483 0 => VarWidth::Numeric,
484 w @ 1..=255 => VarWidth::String(w as u16),
485 -1 => return Ok(None),
487 return Err(Error::InvalidVariableWidth {
488 offset: input.offset,
493 let name = match decoder.decode_identifier(&input.name.0, &warn) {
495 if !decoder.var_names.contains_key(&name) {
498 let new_name = decoder.generate_name();
499 warn(Error::DuplicateVariableName {
500 duplicate_name: name.clone(),
501 new_name: new_name.clone(),
507 let new_name = decoder.generate_name();
508 warn(Error::InvalidVariableName {
510 new_name: new_name.clone(),
515 let variable = Variable {
516 dict_index: decoder.n_dict_indexes,
517 short_name: name.clone(),
521 decoder.n_dict_indexes += width.n_dict_indexes();
524 .insert(name.clone(), variable.dict_index)
528 .insert(variable.dict_index, variable)
531 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
532 warn(Error::InvalidPrintFormat {
534 variable: name.clone(),
538 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
539 warn(Error::InvalidWriteFormat {
541 variable: name.clone(),
548 .map(|label| decoder.decode_string(&label.0, &warn));
549 Ok(Some(VariableRecord {
554 missing_values: input.missing_values.clone(),
560 #[derive(Clone, Debug)]
561 pub struct DocumentRecord(Vec<String>);
563 impl TryDecode for DocumentRecord {
564 type Input = crate::raw::DocumentRecord;
569 warn: impl Fn(Error),
570 ) -> Result<Self, Error> {
575 .map(|s| decoder.decode_string(&s.0, &warn))
585 const NAME: &'static str;
586 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
589 #[derive(Clone, Debug)]
590 pub struct VariableSet {
592 pub vars: Vec<String>,
596 fn parse(input: &str) -> Result<Self, Error> {
597 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
598 let vars = input.split_ascii_whitespace().map(String::from).collect();
606 trait WarnOnError<T> {
607 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
609 impl<T> WarnOnError<T> for Result<T, Error> {
610 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
612 Ok(result) => Some(result),
621 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
623 Number(Option<OrderedFloat<f64>>),
628 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
630 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
631 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
636 #[derive(Clone, Debug)]
637 pub struct ValueLabel {
642 #[derive(Clone, Debug)]
643 pub struct ValueLabelRecord {
644 pub var_type: VarType,
645 pub labels: Vec<ValueLabel>,
646 pub variables: Vec<Identifier>,
649 impl ValueLabelRecord {
651 decoder: &mut Decoder,
652 raw_value_label: &crate::raw::ValueLabelRecord,
653 dict_indexes: &crate::raw::VarIndexRecord,
654 warn: impl Fn(Error),
655 ) -> Result<Option<ValueLabelRecord>, Error> {
656 let variables: Vec<&Variable> = dict_indexes
659 .filter_map(|&dict_index| {
661 .get_var_by_index(dict_index as usize)
662 .warn_on_error(&warn)
664 .filter(|&variable| match variable.width {
665 VarWidth::String(width) if width > 8 => {
666 warn(Error::InvalidLongStringValueLabel(
667 variable.short_name.clone(),
674 let mut i = variables.iter();
675 let Some(&first_var) = i.next() else {
678 let var_type: VarType = first_var.width.into();
680 let this_type: VarType = variable.width.into();
681 if var_type != this_type {
682 let (numeric_var, string_var) = match var_type {
683 VarType::Numeric => (first_var, variable),
684 VarType::String => (variable, first_var),
686 warn(Error::ValueLabelsDifferentTypes {
687 numeric_var: numeric_var.short_name.clone(),
688 string_var: string_var.short_name.clone(),
693 let labels = raw_value_label
696 .map(|(value, label)| {
697 let label = decoder.decode_string(&label.0, &warn);
698 let value = Value::decode(
699 raw::Value::from_raw(*value, var_type, decoder.endian),
702 ValueLabel { value, label }
705 let variables = variables
707 .map(|&variable| variable.short_name.clone())
709 Ok(Some(ValueLabelRecord {
717 #[derive(Clone, Debug)]
718 pub struct VariableSetRecord(Vec<VariableSet>);
720 impl TextRecord for VariableSetRecord {
721 const NAME: &'static str = "variable set";
722 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
723 let mut sets = Vec::new();
724 for line in input.lines() {
725 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
729 Ok(VariableSetRecord(sets))
733 #[derive(Clone, Debug)]
734 pub struct ProductInfoRecord(pub String);
736 impl TextRecord for ProductInfoRecord {
737 const NAME: &'static str = "extra product info";
738 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
739 Ok(ProductInfoRecord(input.into()))
743 #[derive(Clone, Debug)]
744 pub struct LongName {
745 pub short_name: Identifier,
746 pub long_name: Identifier,
750 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
751 let short_name = Identifier::new(short_name, decoder.encoding)
752 .map_err(|e| Error::InvalidShortName(e))?;
754 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
762 #[derive(Clone, Debug)]
763 pub struct LongNameRecord(Vec<LongName>);
765 impl LongNameRecord {
766 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
767 let mut names = Vec::new();
768 for pair in input.split('\t').filter(|s| !s.is_empty()) {
769 if let Some((short_name, long_name)) = pair.split_once('=') {
770 if let Some(long_name) =
771 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
773 names.push(long_name);
779 Ok(LongNameRecord(names))
783 #[derive(Clone, Debug)]
784 pub struct VeryLongString {
785 pub short_name: Identifier,
789 impl VeryLongString {
790 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
791 let Some((short_name, length)) = input.split_once('=') else {
792 return Err(Error::TBD);
794 let short_name = Identifier::new(short_name, decoder.encoding)
795 .map_err(|e| Error::InvalidLongStringName(e))?;
796 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
797 if length > VarWidth::MAX_STRING {
798 return Err(Error::TBD);
801 short_name: short_name.into(),
807 #[derive(Clone, Debug)]
808 pub struct VeryLongStringRecord(Vec<VeryLongString>);
810 impl VeryLongStringRecord {
811 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
812 let mut very_long_strings = Vec::new();
815 .map(|s| s.trim_end_matches('\t'))
816 .filter(|s| !s.is_empty())
818 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
819 very_long_strings.push(vls)
822 Ok(VeryLongStringRecord(very_long_strings))
826 #[derive(Clone, Debug)]
827 pub struct Attribute {
828 pub name: Identifier,
829 pub values: Vec<String>,
836 warn: &impl Fn(Error),
837 ) -> Result<(Option<Attribute>, &'a str), Error> {
838 let Some((name, mut input)) = input.split_once('(') else {
839 return Err(Error::TBD);
841 let mut values = Vec::new();
843 let Some((value, rest)) = input.split_once('\n') else {
844 return Err(Error::TBD);
846 if let Some(stripped) = value
848 .and_then(|value| value.strip_suffix('\''))
850 values.push(stripped.into());
853 values.push(value.into());
855 if let Some(rest) = rest.strip_prefix(')') {
856 let attribute = Identifier::new(name, decoder.encoding)
857 .map_err(|e| Error::InvalidAttributeName(e))
859 .map(|name| Attribute { name, values });
860 return Ok((attribute, rest));
867 #[derive(Clone, Debug)]
868 pub struct AttributeSet(pub Vec<Attribute>);
874 sentinel: Option<char>,
875 warn: &impl Fn(Error),
876 ) -> Result<(AttributeSet, &'a str), Error> {
877 let mut attributes = Vec::new();
879 match input.chars().next() {
881 c if c == sentinel => break &input[1..],
883 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
884 if let Some(attribute) = attribute {
885 attributes.push(attribute);
891 Ok((AttributeSet(attributes), rest))
895 #[derive(Clone, Debug)]
896 pub struct FileAttributeRecord(AttributeSet);
898 impl FileAttributeRecord {
899 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
900 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
901 if !rest.is_empty() {
904 Ok(FileAttributeRecord(set))
908 #[derive(Clone, Debug)]
909 pub struct VarAttributeSet {
910 pub long_var_name: Identifier,
911 pub attributes: AttributeSet,
914 impl VarAttributeSet {
918 warn: &impl Fn(Error),
919 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
920 let Some((long_var_name, rest)) = input.split_once(':') else {
921 return Err(Error::TBD);
923 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
924 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
925 .map_err(|e| Error::InvalidAttributeVariableName(e))
927 .map(|name| VarAttributeSet {
931 Ok((var_attribute, rest))
935 #[derive(Clone, Debug)]
936 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
938 impl VariableAttributeRecord {
939 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
940 let mut var_attribute_sets = Vec::new();
941 while !input.is_empty() {
942 let Some((var_attribute, rest)) =
943 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
947 if let Some(var_attribute) = var_attribute {
948 var_attribute_sets.push(var_attribute);
952 Ok(VariableAttributeRecord(var_attribute_sets))
956 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
963 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
970 #[derive(Clone, Debug)]
971 pub struct VarDisplay {
972 pub measure: Option<Measure>,
974 pub align: Option<Alignment>,
977 #[derive(Clone, Debug)]
978 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
980 #[derive(Clone, Debug)]
981 pub enum MultipleResponseType {
984 labels: CategoryLabels,
989 impl MultipleResponseType {
993 input: &raw::MultipleResponseType,
995 warn: &impl Fn(Error),
996 ) -> Result<Self, Error> {
997 let mr_type = match input {
998 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
999 let value = decoder.decode_string_cow(&value.0, warn);
1000 let value = match min_width {
1001 VarWidth::Numeric => {
1002 let number: f64 = value.trim().parse().map_err(|_| {
1003 Error::InvalidMDGroupCountedValue {
1004 mr_set: mr_set.clone(),
1005 number: value.into(),
1008 Value::Number(Some(number.into()))
1010 VarWidth::String(max_width) => {
1011 let value = value.trim_end_matches(' ');
1012 let width = value.len();
1013 if width > max_width as usize {
1014 return Err(Error::TooWideMDGroupCountedValue {
1015 mr_set: mr_set.clone(),
1016 value: value.into(),
1021 Value::String(value.into())
1024 MultipleResponseType::MultipleDichotomy {
1029 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
1035 #[derive(Clone, Debug)]
1036 pub struct MultipleResponseSet {
1037 pub name: Identifier,
1038 pub min_width: VarWidth,
1039 pub max_width: VarWidth,
1041 pub mr_type: MultipleResponseType,
1042 pub dict_indexes: Vec<DictIndex>,
1045 impl MultipleResponseSet {
1048 input: &raw::MultipleResponseSet,
1049 warn: &impl Fn(Error),
1050 ) -> Result<Self, Error> {
1051 let mr_set_name = decoder
1052 .decode_identifier(&input.name.0, warn)
1053 .map_err(|error| Error::InvalidMrSetName(error))?;
1055 let label = decoder.decode_string(&input.label.0, warn);
1057 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
1058 for short_name in input.short_names.iter() {
1059 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1062 warn(Error::InvalidMrSetName(error));
1066 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1067 warn(Error::UnknownMrSetVariable {
1068 mr_set: mr_set_name.clone(),
1069 short_name: short_name.clone(),
1073 dict_indexes.push(dict_index);
1076 match dict_indexes.len() {
1077 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1078 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1082 let Some((Some(min_width), Some(max_width))) = dict_indexes
1084 .map(|dict_index| decoder.variables[dict_index].width)
1085 .map(|w| (Some(w), Some(w)))
1086 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1088 return Err(Error::MixedMrSet(mr_set_name));
1092 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1094 Ok(MultipleResponseSet {
1105 #[derive(Clone, Debug)]
1106 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1108 impl TryDecode for MultipleResponseRecord {
1109 type Input = raw::MultipleResponseRecord;
1113 input: &Self::Input,
1114 warn: impl Fn(Error),
1115 ) -> Result<Self, Error> {
1116 let mut sets = Vec::with_capacity(input.0.len());
1117 for set in &input.0 {
1118 match MultipleResponseSet::decode(decoder, set, &warn) {
1119 Ok(set) => sets.push(set),
1120 Err(error) => warn(error),
1123 Ok(MultipleResponseRecord(sets))
1127 #[derive(Clone, Debug)]
1128 pub struct LongStringValueLabels {
1129 pub var_name: Identifier,
1130 pub width: VarWidth,
1131 pub labels: Vec<ValueLabel>,
1134 impl LongStringValueLabels {
1137 input: &raw::LongStringValueLabels,
1138 warn: &impl Fn(Error),
1139 ) -> Result<Self, Error> {
1140 let var_name = decoder
1141 .decode_identifier(&input.var_name.0, warn)
1142 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1145 let max_width = VarWidth::MAX_STRING;
1146 if input.width < 9 || input.width > max_width as u32 {
1147 return Err(Error::InvalidLongValueLabelWidth {
1148 name: var_name.into(),
1154 let width = input.width as u16;
1156 let mut labels = Vec::with_capacity(input.labels.len());
1157 for (value, label) in input.labels.iter() {
1158 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1159 let label = decoder.decode_string(&label.0, warn);
1160 labels.push(ValueLabel { value, label });
1163 Ok(LongStringValueLabels {
1165 width: VarWidth::String(width),
1171 #[derive(Clone, Debug)]
1172 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1174 impl TryDecode for LongStringValueLabelRecord {
1175 type Input = raw::LongStringValueLabelRecord;
1179 input: &Self::Input,
1180 warn: impl Fn(Error),
1181 ) -> Result<Self, Error> {
1182 let mut labels = Vec::with_capacity(input.0.len());
1183 for label in &input.0 {
1184 match LongStringValueLabels::decode(decoder, label, &warn) {
1185 Ok(set) => labels.push(set),
1186 Err(error) => warn(error),
1189 Ok(LongStringValueLabelRecord(labels))
1195 use encoding_rs::WINDOWS_1252;
1199 let mut s = String::new();
1200 s.push(char::REPLACEMENT_CHARACTER);
1201 let encoded = WINDOWS_1252.encode(&s).0;
1202 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1203 println!("{:?}", decoded);
1208 let charset: Vec<u8> = (0..=255).collect();
1209 println!("{}", charset.len());
1210 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1211 println!("{}", decoded.len());
1212 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1213 println!("{}", encoded.len());
1214 assert_eq!(&charset[..], &encoded[..]);