1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
4 encoding::{get_encoding, Error as EncodingError},
6 format::{Error as FormatError, Spec, UncheckedSpec},
7 identifier::{Error as IdError, Identifier},
8 raw::{self, MissingValues, UnencodedStr, VarType},
10 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11 use encoding_rs::{DecoderResult, Encoding};
12 use num::integer::div_ceil;
13 use ordered_float::OrderedFloat;
14 use thiserror::Error as ThisError;
16 pub use crate::raw::{CategoryLabels, Compression};
18 #[derive(ThisError, Debug)]
20 // XXX this is really an internal error and maybe we should change the
21 // interfaces to make it impossible
22 #[error("Missing header record")]
26 EncodingError(EncodingError),
28 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
29 InvalidVariableWidth { offset: u64, width: i32 },
31 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
32 InvalidLongMissingValueFormat,
34 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
35 InvalidCreationDate { creation_date: String },
37 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
38 InvalidCreationTime { creation_time: String },
40 #[error("{id_error} Renaming variable to {new_name}.")]
47 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
52 format_error: FormatError,
56 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
61 format_error: FormatError,
64 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
65 DuplicateVariableName {
66 duplicate_name: Identifier,
70 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
71 InvalidDictIndex { dict_index: usize, max_index: usize },
73 #[error("Dictionary index {0} refers to a long string continuation.")]
74 DictIndexIsContinuation(usize),
76 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
77 ValueLabelsDifferentTypes {
78 numeric_var: Identifier,
79 string_var: Identifier,
83 "Value labels may not be added to long string variable {0} using record types 3 or 4."
85 InvalidLongStringValueLabel(Identifier),
87 #[error("Invalid multiple response set name. {0}")]
88 InvalidMrSetName(IdError),
90 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
91 UnknownMrSetVariable {
93 short_name: Identifier,
96 #[error("Multiple response set {0} has no variables.")]
97 EmptyMrSet(Identifier),
99 #[error("Multiple response set {0} has only one variable.")]
100 OneVarMrSet(Identifier),
102 #[error("Multiple response set {0} contains both string and numeric variables.")]
103 MixedMrSet(Identifier),
106 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
108 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
110 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
111 TooWideMDGroupCountedValue {
118 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
119 InvalidLongValueLabelWidth {
126 #[error("Invalid attribute name. {0}")]
127 InvalidAttributeName(IdError),
129 #[error("Invalid short name in long variable name record. {0}")]
130 InvalidShortName(IdError),
132 #[error("Invalid name in long variable name record. {0}")]
133 InvalidLongName(IdError),
135 #[error("Invalid variable name in very long string record. {0}")]
136 InvalidLongStringName(IdError),
138 #[error("Invalid variable name in long string value label record. {0}")]
139 InvalidLongStringValueLabelName(IdError),
141 #[error("Invalid variable name in attribute record. {0}")]
142 InvalidAttributeVariableName(IdError),
144 #[error("Details TBD")]
148 #[derive(Clone, Debug)]
150 Header(HeaderRecord),
151 Variable(VariableRecord),
152 ValueLabel(ValueLabelRecord),
153 Document(DocumentRecord),
154 IntegerInfo(IntegerInfoRecord),
155 FloatInfo(FloatInfoRecord),
156 VariableSets(VariableSetRecord),
157 VarDisplay(VarDisplayRecord),
158 MultipleResponse(MultipleResponseRecord),
159 LongStringValueLabels(LongStringValueLabelRecord),
160 Encoding(EncodingRecord),
161 NumberOfCases(NumberOfCasesRecord),
162 ProductInfo(ProductInfoRecord),
163 LongNames(LongNameRecord),
164 VeryLongStrings(VeryLongStringRecord),
165 FileAttributes(FileAttributeRecord),
166 VariableAttributes(VariableAttributeRecord),
167 OtherExtension(Extension),
170 //ZTrailer(ZTrailer),
174 pub use crate::raw::EncodingRecord;
175 pub use crate::raw::Extension;
176 pub use crate::raw::FloatInfoRecord;
177 pub use crate::raw::IntegerInfoRecord;
178 pub use crate::raw::NumberOfCasesRecord;
180 type DictIndex = usize;
182 pub struct Variable {
183 pub dict_index: DictIndex,
184 pub short_name: Identifier,
185 pub long_name: Option<Identifier>,
190 pub compression: Option<Compression>,
192 pub encoding: &'static Encoding,
193 pub variables: HashMap<DictIndex, Variable>,
194 pub var_names: HashMap<Identifier, DictIndex>,
195 n_dict_indexes: usize,
196 n_generated_names: usize,
199 pub fn decode<T>(headers: Vec<raw::Record>, warn: &impl Fn(Error)) -> Result<Vec<Record>, Error> {
200 let Some(header_record) = headers.iter().find_map(|rec| {
201 if let raw::Record::Header(header) = rec {
207 return Err(Error::MissingHeaderRecord);
209 let encoding = headers.iter().find_map(|rec| {
210 if let raw::Record::Encoding(ref e) = rec {
216 let character_code = headers.iter().find_map(|rec| {
217 if let raw::Record::IntegerInfo(ref r) = rec {
218 Some(r.character_code)
223 let encoding = match get_encoding(encoding, character_code) {
224 Ok(encoding) => encoding,
225 Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
227 warn(Error::EncodingError(err));
228 // Warn that we're using the default encoding.
233 let decoder = Decoder {
234 compression: header_record.compression,
235 endian: header_record.endian,
237 variables: HashMap::new(),
238 var_names: HashMap::new(),
240 n_generated_names: 0,
247 fn generate_name(&mut self) -> Identifier {
249 self.n_generated_names += 1;
250 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
252 if !self.var_names.contains_key(&name) {
255 assert!(self.n_generated_names < usize::MAX);
258 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
259 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
265 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
266 self.decode_string_cow(input, warn).into()
268 pub fn decode_identifier(
271 warn: &impl Fn(Error),
272 ) -> Result<Identifier, IdError> {
273 let s = self.decode_string_cow(input, warn);
274 Identifier::new(&s, self.encoding)
276 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
277 let max_index = self.n_dict_indexes - 1;
278 if dict_index == 0 || dict_index as usize > max_index {
279 return Err(Error::InvalidDictIndex {
284 let Some(variable) = self.variables.get(&dict_index) else {
285 return Err(Error::DictIndexIsContinuation(dict_index));
290 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
291 /// re-encoding the result back into `self.encoding` will have exactly the
292 /// same length in bytes.
294 /// XXX warn about errors?
295 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
296 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
297 // This is the common case. Usually there will be no errors.
300 // Unusual case. Don't bother to optimize it much.
301 let mut decoder = self.encoding.new_decoder_without_bom_handling();
302 let mut output = String::with_capacity(
304 .max_utf8_buffer_length_without_replacement(input.len())
307 let mut rest = input;
308 while !rest.is_empty() {
309 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
310 (DecoderResult::InputEmpty, _) => break,
311 (DecoderResult::OutputFull, _) => unreachable!(),
312 (DecoderResult::Malformed(a, b), consumed) => {
313 let skipped = a as usize + b as usize;
314 output.extend(repeat('?').take(skipped));
315 rest = &rest[consumed..];
319 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
325 pub trait TryDecode: Sized {
330 warn: impl Fn(Error),
331 ) -> Result<Self, Error>;
334 pub trait Decode<Input>: Sized {
335 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
338 impl<const N: usize> Decode<UnencodedStr<N>> for String {
339 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
340 decoder.decode_string(&input.0, &warn)
344 #[derive(Clone, Debug)]
345 pub struct HeaderRecord {
346 pub eye_catcher: String,
347 pub weight_index: Option<usize>,
348 pub n_cases: Option<u64>,
349 pub creation: NaiveDateTime,
350 pub file_label: String,
353 impl TryDecode for HeaderRecord {
354 type Input = crate::raw::HeaderRecord;
359 warn: impl Fn(Error),
360 ) -> Result<Self, Error> {
361 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
362 let file_label = decoder.decode_string(&input.file_label.0, &warn);
363 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
364 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
365 warn(Error::InvalidCreationDate {
366 creation_date: creation_date.into(),
370 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
372 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
373 warn(Error::InvalidCreationTime {
374 creation_time: creation_time.into(),
380 weight_index: input.weight_index.map(|n| n as usize),
381 n_cases: input.n_cases.map(|n| n as u64),
382 creation: NaiveDateTime::new(creation_date, creation_time),
388 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
394 impl PartialOrd for VarWidth {
395 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
396 match (self, other) {
397 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
398 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
405 const MAX_STRING: u16 = 32767;
407 fn n_dict_indexes(self) -> usize {
409 VarWidth::Numeric => 1,
410 VarWidth::String(w) => div_ceil(w as usize, 8),
417 f: impl Fn(u16, u16) -> u16,
418 ) -> Option<VarWidth> {
420 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
421 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
422 Some(VarWidth::String(f(a, b)))
428 /// Returns the wider of `self` and `other`:
429 /// - Numerical variable widths are equally wide.
430 /// - Longer strings are wider than shorter strings.
431 /// - Numerical and string types are incomparable, so result in `None`.
432 /// - Any `None` in the input yields `None` in the output.
433 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
434 Self::width_predicate(a, b, |a, b| a.max(b))
437 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
438 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
439 Self::width_predicate(a, b, |a, b| a.min(b))
443 impl From<VarWidth> for VarType {
444 fn from(source: VarWidth) -> Self {
446 VarWidth::Numeric => VarType::Numeric,
447 VarWidth::String(_) => VarType::String,
452 #[derive(Clone, Debug)]
453 pub struct VariableRecord {
455 pub name: Identifier,
456 pub print_format: Spec,
457 pub write_format: Spec,
458 pub missing_values: MissingValues,
459 pub label: Option<String>,
462 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
463 UncheckedSpec::try_from(raw)
464 .and_then(Spec::try_from)
465 .and_then(|x| x.check_width_compatibility(width))
466 .unwrap_or_else(|error| {
467 let new_format = Spec::default_for_width(width);
468 warn(new_format, error);
473 impl VariableRecord {
475 decoder: &mut Decoder,
476 input: &crate::raw::VariableRecord,
477 warn: impl Fn(Error),
478 ) -> Result<Option<VariableRecord>, Error> {
479 let width = match input.width {
480 0 => VarWidth::Numeric,
481 w @ 1..=255 => VarWidth::String(w as u16),
482 -1 => return Ok(None),
484 return Err(Error::InvalidVariableWidth {
485 offset: input.offset,
490 let name = match decoder.decode_identifier(&input.name.0, &warn) {
492 if !decoder.var_names.contains_key(&name) {
495 let new_name = decoder.generate_name();
496 warn(Error::DuplicateVariableName {
497 duplicate_name: name.clone(),
498 new_name: new_name.clone(),
504 let new_name = decoder.generate_name();
505 warn(Error::InvalidVariableName {
507 new_name: new_name.clone(),
512 let variable = Variable {
513 dict_index: decoder.n_dict_indexes,
514 short_name: name.clone(),
518 decoder.n_dict_indexes += width.n_dict_indexes();
521 .insert(name.clone(), variable.dict_index)
525 .insert(variable.dict_index, variable)
528 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
529 warn(Error::InvalidPrintFormat {
531 variable: name.clone(),
535 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
536 warn(Error::InvalidWriteFormat {
538 variable: name.clone(),
545 .map(|label| decoder.decode_string(&label.0, &warn));
546 Ok(Some(VariableRecord {
551 missing_values: input.missing_values.clone(),
557 #[derive(Clone, Debug)]
558 pub struct DocumentRecord(Vec<String>);
560 impl TryDecode for DocumentRecord {
561 type Input = crate::raw::DocumentRecord;
566 warn: impl Fn(Error),
567 ) -> Result<Self, Error> {
572 .map(|s| decoder.decode_string(&s.0, &warn))
582 const NAME: &'static str;
583 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
586 #[derive(Clone, Debug)]
587 pub struct VariableSet {
589 pub vars: Vec<String>,
593 fn parse(input: &str) -> Result<Self, Error> {
594 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
595 let vars = input.split_ascii_whitespace().map(String::from).collect();
603 trait WarnOnError<T> {
604 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
606 impl<T> WarnOnError<T> for Result<T, Error> {
607 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
609 Ok(result) => Some(result),
618 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
620 Number(Option<OrderedFloat<f64>>),
625 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
627 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
628 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
633 #[derive(Clone, Debug)]
634 pub struct ValueLabel {
639 #[derive(Clone, Debug)]
640 pub struct ValueLabelRecord {
641 pub var_type: VarType,
642 pub labels: Vec<ValueLabel>,
643 pub variables: Vec<Identifier>,
646 impl ValueLabelRecord {
648 decoder: &mut Decoder,
649 raw_value_label: &crate::raw::ValueLabelRecord,
650 dict_indexes: &crate::raw::VarIndexRecord,
651 warn: impl Fn(Error),
652 ) -> Result<Option<ValueLabelRecord>, Error> {
653 let variables: Vec<&Variable> = dict_indexes
656 .filter_map(|&dict_index| {
658 .get_var_by_index(dict_index as usize)
659 .warn_on_error(&warn)
661 .filter(|&variable| match variable.width {
662 VarWidth::String(width) if width > 8 => {
663 warn(Error::InvalidLongStringValueLabel(
664 variable.short_name.clone(),
671 let mut i = variables.iter();
672 let Some(&first_var) = i.next() else {
675 let var_type: VarType = first_var.width.into();
677 let this_type: VarType = variable.width.into();
678 if var_type != this_type {
679 let (numeric_var, string_var) = match var_type {
680 VarType::Numeric => (first_var, variable),
681 VarType::String => (variable, first_var),
683 warn(Error::ValueLabelsDifferentTypes {
684 numeric_var: numeric_var.short_name.clone(),
685 string_var: string_var.short_name.clone(),
690 let labels = raw_value_label
693 .map(|(value, label)| {
694 let label = decoder.decode_string(&label.0, &warn);
695 let value = Value::decode(
696 raw::Value::from_raw(*value, var_type, decoder.endian),
699 ValueLabel { value, label }
702 let variables = variables
704 .map(|&variable| variable.short_name.clone())
706 Ok(Some(ValueLabelRecord {
714 #[derive(Clone, Debug)]
715 pub struct VariableSetRecord(Vec<VariableSet>);
717 impl TextRecord for VariableSetRecord {
718 const NAME: &'static str = "variable set";
719 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
720 let mut sets = Vec::new();
721 for line in input.lines() {
722 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
726 Ok(VariableSetRecord(sets))
730 #[derive(Clone, Debug)]
731 pub struct ProductInfoRecord(pub String);
733 impl TextRecord for ProductInfoRecord {
734 const NAME: &'static str = "extra product info";
735 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
736 Ok(ProductInfoRecord(input.into()))
740 #[derive(Clone, Debug)]
741 pub struct LongName {
742 pub short_name: Identifier,
743 pub long_name: Identifier,
747 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
748 let short_name = Identifier::new(short_name, decoder.encoding)
749 .map_err(|e| Error::InvalidShortName(e))?;
751 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
759 #[derive(Clone, Debug)]
760 pub struct LongNameRecord(Vec<LongName>);
762 impl LongNameRecord {
763 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
764 let mut names = Vec::new();
765 for pair in input.split('\t').filter(|s| !s.is_empty()) {
766 if let Some((short_name, long_name)) = pair.split_once('=') {
767 if let Some(long_name) =
768 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
770 names.push(long_name);
776 Ok(LongNameRecord(names))
780 #[derive(Clone, Debug)]
781 pub struct VeryLongString {
782 pub short_name: Identifier,
786 impl VeryLongString {
787 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
788 let Some((short_name, length)) = input.split_once('=') else {
789 return Err(Error::TBD);
791 let short_name = Identifier::new(short_name, decoder.encoding)
792 .map_err(|e| Error::InvalidLongStringName(e))?;
793 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
794 if length > VarWidth::MAX_STRING {
795 return Err(Error::TBD);
798 short_name: short_name.into(),
804 #[derive(Clone, Debug)]
805 pub struct VeryLongStringRecord(Vec<VeryLongString>);
807 impl VeryLongStringRecord {
808 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
809 let mut very_long_strings = Vec::new();
812 .map(|s| s.trim_end_matches('\t'))
813 .filter(|s| !s.is_empty())
815 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
816 very_long_strings.push(vls)
819 Ok(VeryLongStringRecord(very_long_strings))
823 #[derive(Clone, Debug)]
824 pub struct Attribute {
825 pub name: Identifier,
826 pub values: Vec<String>,
833 warn: &impl Fn(Error),
834 ) -> Result<(Option<Attribute>, &'a str), Error> {
835 let Some((name, mut input)) = input.split_once('(') else {
836 return Err(Error::TBD);
838 let mut values = Vec::new();
840 let Some((value, rest)) = input.split_once('\n') else {
841 return Err(Error::TBD);
843 if let Some(stripped) = value
845 .and_then(|value| value.strip_suffix('\''))
847 values.push(stripped.into());
850 values.push(value.into());
852 if let Some(rest) = rest.strip_prefix(')') {
853 let attribute = Identifier::new(name, decoder.encoding)
854 .map_err(|e| Error::InvalidAttributeName(e))
856 .map(|name| Attribute { name, values });
857 return Ok((attribute, rest));
864 #[derive(Clone, Debug)]
865 pub struct AttributeSet(pub Vec<Attribute>);
871 sentinel: Option<char>,
872 warn: &impl Fn(Error),
873 ) -> Result<(AttributeSet, &'a str), Error> {
874 let mut attributes = Vec::new();
876 match input.chars().next() {
878 c if c == sentinel => break &input[1..],
880 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
881 if let Some(attribute) = attribute {
882 attributes.push(attribute);
888 Ok((AttributeSet(attributes), rest))
892 #[derive(Clone, Debug)]
893 pub struct FileAttributeRecord(AttributeSet);
895 impl FileAttributeRecord {
896 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
897 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
898 if !rest.is_empty() {
901 Ok(FileAttributeRecord(set))
905 #[derive(Clone, Debug)]
906 pub struct VarAttributeSet {
907 pub long_var_name: Identifier,
908 pub attributes: AttributeSet,
911 impl VarAttributeSet {
915 warn: &impl Fn(Error),
916 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
917 let Some((long_var_name, rest)) = input.split_once(':') else {
918 return Err(Error::TBD);
920 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
921 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
922 .map_err(|e| Error::InvalidAttributeVariableName(e))
924 .map(|name| VarAttributeSet {
928 Ok((var_attribute, rest))
932 #[derive(Clone, Debug)]
933 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
935 impl VariableAttributeRecord {
936 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
937 let mut var_attribute_sets = Vec::new();
938 while !input.is_empty() {
939 let Some((var_attribute, rest)) =
940 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
944 if let Some(var_attribute) = var_attribute {
945 var_attribute_sets.push(var_attribute);
949 Ok(VariableAttributeRecord(var_attribute_sets))
953 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
960 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
967 #[derive(Clone, Debug)]
968 pub struct VarDisplay {
969 pub measure: Option<Measure>,
971 pub align: Option<Alignment>,
974 #[derive(Clone, Debug)]
975 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
977 #[derive(Clone, Debug)]
978 pub enum MultipleResponseType {
981 labels: CategoryLabels,
986 impl MultipleResponseType {
990 input: &raw::MultipleResponseType,
992 warn: &impl Fn(Error),
993 ) -> Result<Self, Error> {
994 let mr_type = match input {
995 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
996 let value = decoder.decode_string_cow(&value.0, warn);
997 let value = match min_width {
998 VarWidth::Numeric => {
999 let number: f64 = value.trim().parse().map_err(|_| {
1000 Error::InvalidMDGroupCountedValue {
1001 mr_set: mr_set.clone(),
1002 number: value.into(),
1005 Value::Number(Some(number.into()))
1007 VarWidth::String(max_width) => {
1008 let value = value.trim_end_matches(' ');
1009 let width = value.len();
1010 if width > max_width as usize {
1011 return Err(Error::TooWideMDGroupCountedValue {
1012 mr_set: mr_set.clone(),
1013 value: value.into(),
1018 Value::String(value.into())
1021 MultipleResponseType::MultipleDichotomy {
1026 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
1032 #[derive(Clone, Debug)]
1033 pub struct MultipleResponseSet {
1034 pub name: Identifier,
1035 pub min_width: VarWidth,
1036 pub max_width: VarWidth,
1038 pub mr_type: MultipleResponseType,
1039 pub dict_indexes: Vec<DictIndex>,
1042 impl MultipleResponseSet {
1045 input: &raw::MultipleResponseSet,
1046 warn: &impl Fn(Error),
1047 ) -> Result<Self, Error> {
1048 let mr_set_name = decoder
1049 .decode_identifier(&input.name.0, warn)
1050 .map_err(|error| Error::InvalidMrSetName(error))?;
1052 let label = decoder.decode_string(&input.label.0, warn);
1054 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
1055 for short_name in input.short_names.iter() {
1056 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1059 warn(Error::InvalidMrSetName(error));
1063 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1064 warn(Error::UnknownMrSetVariable {
1065 mr_set: mr_set_name.clone(),
1066 short_name: short_name.clone(),
1070 dict_indexes.push(dict_index);
1073 match dict_indexes.len() {
1074 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1075 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1079 let Some((Some(min_width), Some(max_width))) = dict_indexes
1081 .map(|dict_index| decoder.variables[dict_index].width)
1082 .map(|w| (Some(w), Some(w)))
1083 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1085 return Err(Error::MixedMrSet(mr_set_name));
1089 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1091 Ok(MultipleResponseSet {
1102 #[derive(Clone, Debug)]
1103 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1105 impl TryDecode for MultipleResponseRecord {
1106 type Input = raw::MultipleResponseRecord;
1110 input: &Self::Input,
1111 warn: impl Fn(Error),
1112 ) -> Result<Self, Error> {
1113 let mut sets = Vec::with_capacity(input.0.len());
1114 for set in &input.0 {
1115 match MultipleResponseSet::decode(decoder, set, &warn) {
1116 Ok(set) => sets.push(set),
1117 Err(error) => warn(error),
1120 Ok(MultipleResponseRecord(sets))
1124 #[derive(Clone, Debug)]
1125 pub struct LongStringValueLabels {
1126 pub var_name: Identifier,
1127 pub width: VarWidth,
1128 pub labels: Vec<ValueLabel>,
1131 impl LongStringValueLabels {
1134 input: &raw::LongStringValueLabels,
1135 warn: &impl Fn(Error),
1136 ) -> Result<Self, Error> {
1137 let var_name = decoder
1138 .decode_identifier(&input.var_name.0, warn)
1139 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1142 let max_width = VarWidth::MAX_STRING;
1143 if input.width < 9 || input.width > max_width as u32 {
1144 return Err(Error::InvalidLongValueLabelWidth {
1145 name: var_name.into(),
1151 let width = input.width as u16;
1153 let mut labels = Vec::with_capacity(input.labels.len());
1154 for (value, label) in input.labels.iter() {
1155 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1156 let label = decoder.decode_string(&label.0, warn);
1157 labels.push(ValueLabel { value, label });
1160 Ok(LongStringValueLabels {
1162 width: VarWidth::String(width),
1168 #[derive(Clone, Debug)]
1169 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1171 impl TryDecode for LongStringValueLabelRecord {
1172 type Input = raw::LongStringValueLabelRecord;
1176 input: &Self::Input,
1177 warn: impl Fn(Error),
1178 ) -> Result<Self, Error> {
1179 let mut labels = Vec::with_capacity(input.0.len());
1180 for label in &input.0 {
1181 match LongStringValueLabels::decode(decoder, label, &warn) {
1182 Ok(set) => labels.push(set),
1183 Err(error) => warn(error),
1186 Ok(LongStringValueLabelRecord(labels))
1192 use encoding_rs::WINDOWS_1252;
1196 let mut s = String::new();
1197 s.push(char::REPLACEMENT_CHARACTER);
1198 let encoded = WINDOWS_1252.encode(&s).0;
1199 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1200 println!("{:?}", decoded);
1205 let charset: Vec<u8> = (0..=255).collect();
1206 println!("{}", charset.len());
1207 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1208 println!("{}", decoded.len());
1209 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1210 println!("{}", encoded.len());
1211 assert_eq!(&charset[..], &encoded[..]);