1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
5 format::{Error as FormatError, Spec, UncheckedSpec},
6 identifier::{Error as IdError, Identifier},
7 raw::{self, MissingValues, UnencodedStr, VarType},
9 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
10 use encoding_rs::{DecoderResult, Encoding};
11 use num::integer::div_ceil;
12 use ordered_float::OrderedFloat;
13 use thiserror::Error as ThisError;
15 pub use crate::raw::{CategoryLabels, Compression};
17 #[derive(ThisError, Debug)]
19 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
20 InvalidVariableWidth { offset: u64, width: i32 },
22 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
23 InvalidLongMissingValueFormat,
25 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
26 InvalidCreationDate { creation_date: String },
28 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
29 InvalidCreationTime { creation_time: String },
31 #[error("{id_error} Renaming variable to {new_name}.")]
38 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
43 format_error: FormatError,
47 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
52 format_error: FormatError,
55 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
56 DuplicateVariableName {
57 duplicate_name: Identifier,
61 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
62 InvalidDictIndex { dict_index: usize, max_index: usize },
64 #[error("Dictionary index {0} refers to a long string continuation.")]
65 DictIndexIsContinuation(usize),
67 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
68 ValueLabelsDifferentTypes {
69 numeric_var: Identifier,
70 string_var: Identifier,
74 "Value labels may not be added to long string variable {0} using record types 3 or 4."
76 InvalidLongStringValueLabel(Identifier),
78 #[error("Invalid multiple response set name. {0}")]
79 InvalidMrSetName(IdError),
81 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
82 UnknownMrSetVariable {
84 short_name: Identifier,
87 #[error("Multiple response set {0} has no variables.")]
88 EmptyMrSet(Identifier),
90 #[error("Multiple response set {0} has only one variable.")]
91 OneVarMrSet(Identifier),
93 #[error("Multiple response set {0} contains both string and numeric variables.")]
94 MixedMrSet(Identifier),
97 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
99 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
101 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
102 TooWideMDGroupCountedValue {
109 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
110 InvalidLongValueLabelWidth {
117 #[error("Invalid attribute name. {0}")]
118 InvalidAttributeName(IdError),
120 #[error("Invalid short name in long variable name record. {0}")]
121 InvalidShortName(IdError),
123 #[error("Invalid name in long variable name record. {0}")]
124 InvalidLongName(IdError),
126 #[error("Invalid variable name in very long string record. {0}")]
127 InvalidLongStringName(IdError),
129 #[error("Invalid variable name in long string value label record. {0}")]
130 InvalidLongStringValueLabelName(IdError),
132 #[error("Invalid variable name in attribute record. {0}")]
133 InvalidAttributeVariableName(IdError),
135 #[error("Details TBD")]
139 #[derive(Clone, Debug)]
141 Header(HeaderRecord),
142 Variable(VariableRecord),
143 ValueLabel(ValueLabelRecord),
144 Document(DocumentRecord),
145 IntegerInfo(IntegerInfoRecord),
146 FloatInfo(FloatInfoRecord),
147 VariableSets(VariableSetRecord),
148 VarDisplay(VarDisplayRecord),
149 MultipleResponse(MultipleResponseRecord),
150 LongStringValueLabels(LongStringValueLabelRecord),
151 Encoding(EncodingRecord),
152 NumberOfCases(NumberOfCasesRecord),
153 ProductInfo(ProductInfoRecord),
154 LongNames(LongNameRecord),
155 VeryLongStrings(VeryLongStringRecord),
156 FileAttributes(FileAttributeRecord),
157 VariableAttributes(VariableAttributeRecord),
158 //OtherExtension(Extension),
161 //ZTrailer(ZTrailer),
165 pub use crate::raw::EncodingRecord;
166 pub use crate::raw::FloatInfoRecord;
167 pub use crate::raw::IntegerInfoRecord;
168 pub use crate::raw::NumberOfCasesRecord;
170 type DictIndex = usize;
172 pub struct Variable {
173 pub dict_index: DictIndex,
174 pub short_name: Identifier,
175 pub long_name: Option<Identifier>,
180 pub compression: Option<Compression>,
182 pub encoding: &'static Encoding,
183 pub variables: HashMap<DictIndex, Variable>,
184 pub var_names: HashMap<Identifier, DictIndex>,
185 n_dict_indexes: usize,
186 n_generated_names: usize,
190 fn generate_name(&mut self) -> Identifier {
192 self.n_generated_names += 1;
193 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
195 if !self.var_names.contains_key(&name) {
198 assert!(self.n_generated_names < usize::MAX);
201 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
202 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
208 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
209 self.decode_string_cow(input, warn).into()
211 pub fn decode_identifier(
214 warn: &impl Fn(Error),
215 ) -> Result<Identifier, IdError> {
216 let s = self.decode_string_cow(input, warn);
217 Identifier::new(&s, self.encoding)
219 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
220 let max_index = self.n_dict_indexes - 1;
221 if dict_index == 0 || dict_index as usize > max_index {
222 return Err(Error::InvalidDictIndex {
227 let Some(variable) = self.variables.get(&dict_index) else {
228 return Err(Error::DictIndexIsContinuation(dict_index));
233 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
234 /// re-encoding the result back into `self.encoding` will have exactly the
235 /// same length in bytes.
237 /// XXX warn about errors?
238 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
239 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
240 // This is the common case. Usually there will be no errors.
243 // Unusual case. Don't bother to optimize it much.
244 let mut decoder = self.encoding.new_decoder_without_bom_handling();
245 let mut output = String::with_capacity(
247 .max_utf8_buffer_length_without_replacement(input.len())
250 let mut rest = input;
251 while !rest.is_empty() {
252 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
253 (DecoderResult::InputEmpty, _) => break,
254 (DecoderResult::OutputFull, _) => unreachable!(),
255 (DecoderResult::Malformed(a, b), consumed) => {
256 let skipped = a as usize + b as usize;
257 output.extend(repeat('?').take(skipped));
258 rest = &rest[consumed..];
262 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
268 pub trait TryDecode: Sized {
273 warn: impl Fn(Error),
274 ) -> Result<Self, Error>;
277 pub trait Decode<Input>: Sized {
278 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
281 impl<const N: usize> Decode<UnencodedStr<N>> for String {
282 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
283 decoder.decode_string(&input.0, &warn)
287 #[derive(Clone, Debug)]
288 pub struct HeaderRecord {
289 pub eye_catcher: String,
290 pub weight_index: Option<usize>,
291 pub n_cases: Option<u64>,
292 pub creation: NaiveDateTime,
293 pub file_label: String,
296 impl TryDecode for HeaderRecord {
297 type Input = crate::raw::HeaderRecord;
302 warn: impl Fn(Error),
303 ) -> Result<Self, Error> {
304 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
305 let file_label = decoder.decode_string(&input.file_label.0, &warn);
306 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
307 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
308 warn(Error::InvalidCreationDate {
309 creation_date: creation_date.into(),
313 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
315 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
316 warn(Error::InvalidCreationTime {
317 creation_time: creation_time.into(),
323 weight_index: input.weight_index.map(|n| n as usize),
324 n_cases: input.n_cases.map(|n| n as u64),
325 creation: NaiveDateTime::new(creation_date, creation_time),
331 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
337 impl PartialOrd for VarWidth {
338 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
339 match (self, other) {
340 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
341 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
348 const MAX_STRING: u16 = 32767;
350 fn n_dict_indexes(self) -> usize {
352 VarWidth::Numeric => 1,
353 VarWidth::String(w) => div_ceil(w as usize, 8),
360 f: impl Fn(u16, u16) -> u16,
361 ) -> Option<VarWidth> {
363 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
364 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
365 Some(VarWidth::String(f(a, b)))
371 /// Returns the wider of `self` and `other`:
372 /// - Numerical variable widths are equally wide.
373 /// - Longer strings are wider than shorter strings.
374 /// - Numerical and string types are incomparable, so result in `None`.
375 /// - Any `None` in the input yields `None` in the output.
376 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
377 Self::width_predicate(a, b, |a, b| a.max(b))
380 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
381 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
382 Self::width_predicate(a, b, |a, b| a.min(b))
386 impl From<VarWidth> for VarType {
387 fn from(source: VarWidth) -> Self {
389 VarWidth::Numeric => VarType::Numeric,
390 VarWidth::String(_) => VarType::String,
395 #[derive(Clone, Debug)]
396 pub struct VariableRecord {
398 pub name: Identifier,
399 pub print_format: Spec,
400 pub write_format: Spec,
401 pub missing_values: MissingValues,
402 pub label: Option<String>,
405 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
406 UncheckedSpec::try_from(raw)
407 .and_then(Spec::try_from)
408 .and_then(|x| x.check_width_compatibility(width))
409 .unwrap_or_else(|error| {
410 let new_format = Spec::default_for_width(width);
411 warn(new_format, error);
416 impl VariableRecord {
418 decoder: &mut Decoder,
419 input: &crate::raw::VariableRecord,
420 warn: impl Fn(Error),
421 ) -> Result<Option<VariableRecord>, Error> {
422 let width = match input.width {
423 0 => VarWidth::Numeric,
424 w @ 1..=255 => VarWidth::String(w as u16),
425 -1 => return Ok(None),
427 return Err(Error::InvalidVariableWidth {
428 offset: input.offset,
433 let name = match decoder.decode_identifier(&input.name.0, &warn) {
435 if !decoder.var_names.contains_key(&name) {
438 let new_name = decoder.generate_name();
439 warn(Error::DuplicateVariableName {
440 duplicate_name: name.clone(),
441 new_name: new_name.clone(),
447 let new_name = decoder.generate_name();
448 warn(Error::InvalidVariableName {
450 new_name: new_name.clone(),
455 let variable = Variable {
456 dict_index: decoder.n_dict_indexes,
457 short_name: name.clone(),
461 decoder.n_dict_indexes += width.n_dict_indexes();
464 .insert(name.clone(), variable.dict_index)
468 .insert(variable.dict_index, variable)
471 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
472 warn(Error::InvalidPrintFormat {
474 variable: name.clone(),
478 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
479 warn(Error::InvalidWriteFormat {
481 variable: name.clone(),
488 .map(|label| decoder.decode_string(&label.0, &warn));
489 Ok(Some(VariableRecord {
494 missing_values: input.missing_values.clone(),
500 #[derive(Clone, Debug)]
501 pub struct DocumentRecord(Vec<String>);
503 impl TryDecode for DocumentRecord {
504 type Input = crate::raw::DocumentRecord;
509 warn: impl Fn(Error),
510 ) -> Result<Self, Error> {
515 .map(|s| decoder.decode_string(&s.0, &warn))
525 const NAME: &'static str;
526 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
529 #[derive(Clone, Debug)]
530 pub struct VariableSet {
532 pub vars: Vec<String>,
536 fn parse(input: &str) -> Result<Self, Error> {
537 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
538 let vars = input.split_ascii_whitespace().map(String::from).collect();
546 trait WarnOnError<T> {
547 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
549 impl<T> WarnOnError<T> for Result<T, Error> {
550 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
552 Ok(result) => Some(result),
561 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
563 Number(Option<OrderedFloat<f64>>),
568 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
570 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
571 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
576 #[derive(Clone, Debug)]
577 pub struct ValueLabel {
582 #[derive(Clone, Debug)]
583 pub struct ValueLabelRecord {
584 pub var_type: VarType,
585 pub labels: Vec<ValueLabel>,
586 pub variables: Vec<Identifier>,
589 impl ValueLabelRecord {
591 decoder: &mut Decoder,
592 raw_value_label: &crate::raw::ValueLabelRecord,
593 dict_indexes: &crate::raw::VarIndexRecord,
594 warn: impl Fn(Error),
595 ) -> Result<Option<ValueLabelRecord>, Error> {
596 let variables: Vec<&Variable> = dict_indexes
599 .filter_map(|&dict_index| {
601 .get_var_by_index(dict_index as usize)
602 .warn_on_error(&warn)
604 .filter(|&variable| match variable.width {
605 VarWidth::String(width) if width > 8 => {
606 warn(Error::InvalidLongStringValueLabel(
607 variable.short_name.clone(),
614 let mut i = variables.iter();
615 let Some(&first_var) = i.next() else {
618 let var_type: VarType = first_var.width.into();
620 let this_type: VarType = variable.width.into();
621 if var_type != this_type {
622 let (numeric_var, string_var) = match var_type {
623 VarType::Numeric => (first_var, variable),
624 VarType::String => (variable, first_var),
626 warn(Error::ValueLabelsDifferentTypes {
627 numeric_var: numeric_var.short_name.clone(),
628 string_var: string_var.short_name.clone(),
633 let labels = raw_value_label
636 .map(|(value, label)| {
637 let label = decoder.decode_string(&label.0, &warn);
638 let value = Value::decode(
639 raw::Value::from_raw(*value, var_type, decoder.endian),
642 ValueLabel { value, label }
645 let variables = variables
647 .map(|&variable| variable.short_name.clone())
649 Ok(Some(ValueLabelRecord {
657 #[derive(Clone, Debug)]
658 pub struct VariableSetRecord(Vec<VariableSet>);
660 impl TextRecord for VariableSetRecord {
661 const NAME: &'static str = "variable set";
662 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
663 let mut sets = Vec::new();
664 for line in input.lines() {
665 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
669 Ok(VariableSetRecord(sets))
673 #[derive(Clone, Debug)]
674 pub struct ProductInfoRecord(pub String);
676 impl TextRecord for ProductInfoRecord {
677 const NAME: &'static str = "extra product info";
678 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
679 Ok(ProductInfoRecord(input.into()))
683 #[derive(Clone, Debug)]
684 pub struct LongName {
685 pub short_name: Identifier,
686 pub long_name: Identifier,
690 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
691 let short_name = Identifier::new(short_name, decoder.encoding)
692 .map_err(|e| Error::InvalidShortName(e))?;
694 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
702 #[derive(Clone, Debug)]
703 pub struct LongNameRecord(Vec<LongName>);
705 impl LongNameRecord {
706 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
707 let mut names = Vec::new();
708 for pair in input.split('\t').filter(|s| !s.is_empty()) {
709 if let Some((short_name, long_name)) = pair.split_once('=') {
710 if let Some(long_name) =
711 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
713 names.push(long_name);
719 Ok(LongNameRecord(names))
723 #[derive(Clone, Debug)]
724 pub struct VeryLongString {
725 pub short_name: Identifier,
729 impl VeryLongString {
730 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
731 let Some((short_name, length)) = input.split_once('=') else {
732 return Err(Error::TBD);
734 let short_name = Identifier::new(short_name, decoder.encoding)
735 .map_err(|e| Error::InvalidLongStringName(e))?;
736 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
737 if length > VarWidth::MAX_STRING {
738 return Err(Error::TBD);
741 short_name: short_name.into(),
747 #[derive(Clone, Debug)]
748 pub struct VeryLongStringRecord(Vec<VeryLongString>);
750 impl VeryLongStringRecord {
751 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
752 let mut very_long_strings = Vec::new();
755 .map(|s| s.trim_end_matches('\t'))
756 .filter(|s| !s.is_empty())
758 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
759 very_long_strings.push(vls)
762 Ok(VeryLongStringRecord(very_long_strings))
766 #[derive(Clone, Debug)]
767 pub struct Attribute {
768 pub name: Identifier,
769 pub values: Vec<String>,
776 warn: &impl Fn(Error),
777 ) -> Result<(Option<Attribute>, &'a str), Error> {
778 let Some((name, mut input)) = input.split_once('(') else {
779 return Err(Error::TBD);
781 let mut values = Vec::new();
783 let Some((value, rest)) = input.split_once('\n') else {
784 return Err(Error::TBD);
786 if let Some(stripped) = value
788 .and_then(|value| value.strip_suffix('\''))
790 values.push(stripped.into());
793 values.push(value.into());
795 if let Some(rest) = rest.strip_prefix(')') {
796 let attribute = Identifier::new(name, decoder.encoding)
797 .map_err(|e| Error::InvalidAttributeName(e))
799 .map(|name| Attribute { name, values });
800 return Ok((attribute, rest));
807 #[derive(Clone, Debug)]
808 pub struct AttributeSet(pub Vec<Attribute>);
814 sentinel: Option<char>,
815 warn: &impl Fn(Error),
816 ) -> Result<(AttributeSet, &'a str), Error> {
817 let mut attributes = Vec::new();
819 match input.chars().next() {
821 c if c == sentinel => break &input[1..],
823 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
824 if let Some(attribute) = attribute {
825 attributes.push(attribute);
831 Ok((AttributeSet(attributes), rest))
835 #[derive(Clone, Debug)]
836 pub struct FileAttributeRecord(AttributeSet);
838 impl FileAttributeRecord {
839 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
840 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
841 if !rest.is_empty() {
844 Ok(FileAttributeRecord(set))
848 #[derive(Clone, Debug)]
849 pub struct VarAttributeSet {
850 pub long_var_name: Identifier,
851 pub attributes: AttributeSet,
854 impl VarAttributeSet {
858 warn: &impl Fn(Error),
859 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
860 let Some((long_var_name, rest)) = input.split_once(':') else {
861 return Err(Error::TBD);
863 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
864 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
865 .map_err(|e| Error::InvalidAttributeVariableName(e))
867 .map(|name| VarAttributeSet {
871 Ok((var_attribute, rest))
875 #[derive(Clone, Debug)]
876 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
878 impl VariableAttributeRecord {
879 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
880 let mut var_attribute_sets = Vec::new();
881 while !input.is_empty() {
882 let Some((var_attribute, rest)) =
883 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
887 if let Some(var_attribute) = var_attribute {
888 var_attribute_sets.push(var_attribute);
892 Ok(VariableAttributeRecord(var_attribute_sets))
896 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
903 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
910 #[derive(Clone, Debug)]
911 pub struct VarDisplay {
912 pub measure: Option<Measure>,
914 pub align: Option<Alignment>,
917 #[derive(Clone, Debug)]
918 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
920 #[derive(Clone, Debug)]
921 pub enum MultipleResponseType {
924 labels: CategoryLabels,
929 impl MultipleResponseType {
933 input: &raw::MultipleResponseType,
935 warn: &impl Fn(Error),
936 ) -> Result<Self, Error> {
937 let mr_type = match input {
938 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
939 let value = decoder.decode_string_cow(&value.0, warn);
940 let value = match min_width {
941 VarWidth::Numeric => {
942 let number: f64 = value.trim().parse().map_err(|_| {
943 Error::InvalidMDGroupCountedValue {
944 mr_set: mr_set.clone(),
945 number: value.into(),
948 Value::Number(Some(number.into()))
950 VarWidth::String(max_width) => {
951 let value = value.trim_end_matches(' ');
952 let width = value.len();
953 if width > max_width as usize {
954 return Err(Error::TooWideMDGroupCountedValue {
955 mr_set: mr_set.clone(),
961 Value::String(value.into())
964 MultipleResponseType::MultipleDichotomy {
969 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
975 #[derive(Clone, Debug)]
976 pub struct MultipleResponseSet {
977 pub name: Identifier,
978 pub min_width: VarWidth,
979 pub max_width: VarWidth,
981 pub mr_type: MultipleResponseType,
982 pub dict_indexes: Vec<DictIndex>,
985 impl MultipleResponseSet {
988 input: &raw::MultipleResponseSet,
989 warn: &impl Fn(Error),
990 ) -> Result<Self, Error> {
991 let mr_set_name = decoder
992 .decode_identifier(&input.name.0, warn)
993 .map_err(|error| Error::InvalidMrSetName(error))?;
995 let label = decoder.decode_string(&input.label.0, warn);
997 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
998 for short_name in input.short_names.iter() {
999 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1002 warn(Error::InvalidMrSetName(error));
1006 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1007 warn(Error::UnknownMrSetVariable {
1008 mr_set: mr_set_name.clone(),
1009 short_name: short_name.clone(),
1013 dict_indexes.push(dict_index);
1016 match dict_indexes.len() {
1017 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1018 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1022 let Some((Some(min_width), Some(max_width))) = dict_indexes
1024 .map(|dict_index| decoder.variables[dict_index].width)
1025 .map(|w| (Some(w), Some(w)))
1026 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1028 return Err(Error::MixedMrSet(mr_set_name));
1032 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1034 Ok(MultipleResponseSet {
1045 #[derive(Clone, Debug)]
1046 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1048 impl TryDecode for MultipleResponseRecord {
1049 type Input = raw::MultipleResponseRecord;
1053 input: &Self::Input,
1054 warn: impl Fn(Error),
1055 ) -> Result<Self, Error> {
1056 let mut sets = Vec::with_capacity(input.0.len());
1057 for set in &input.0 {
1058 match MultipleResponseSet::decode(decoder, set, &warn) {
1059 Ok(set) => sets.push(set),
1060 Err(error) => warn(error),
1063 Ok(MultipleResponseRecord(sets))
1067 #[derive(Clone, Debug)]
1068 pub struct LongStringValueLabels {
1069 pub var_name: Identifier,
1070 pub width: VarWidth,
1071 pub labels: Vec<ValueLabel>,
1074 impl LongStringValueLabels {
1077 input: &raw::LongStringValueLabels,
1078 warn: &impl Fn(Error),
1079 ) -> Result<Self, Error> {
1080 let var_name = decoder
1081 .decode_identifier(&input.var_name.0, warn)
1082 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1085 let max_width = VarWidth::MAX_STRING;
1086 if input.width < 9 || input.width > max_width as u32 {
1087 return Err(Error::InvalidLongValueLabelWidth {
1088 name: var_name.into(),
1094 let width = input.width as u16;
1096 let mut labels = Vec::with_capacity(input.labels.len());
1097 for (value, label) in input.labels.iter() {
1098 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1099 let label = decoder.decode_string(&label.0, warn);
1100 labels.push(ValueLabel { value, label });
1103 Ok(LongStringValueLabels {
1105 width: VarWidth::String(width),
1111 #[derive(Clone, Debug)]
1112 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1114 impl TryDecode for LongStringValueLabelRecord {
1115 type Input = raw::LongStringValueLabelRecord;
1119 input: &Self::Input,
1120 warn: impl Fn(Error),
1121 ) -> Result<Self, Error> {
1122 let mut labels = Vec::with_capacity(input.0.len());
1123 for label in &input.0 {
1124 match LongStringValueLabels::decode(decoder, label, &warn) {
1125 Ok(set) => labels.push(set),
1126 Err(error) => warn(error),
1129 Ok(LongStringValueLabelRecord(labels))
1135 use encoding_rs::WINDOWS_1252;
1139 let mut s = String::new();
1140 s.push(char::REPLACEMENT_CHARACTER);
1141 let encoded = WINDOWS_1252.encode(&s).0;
1142 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1143 println!("{:?}", decoded);
1148 let charset: Vec<u8> = (0..=255).collect();
1149 println!("{}", charset.len());
1150 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1151 println!("{}", decoded.len());
1152 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1153 println!("{}", encoded.len());
1154 assert_eq!(&charset[..], &encoded[..]);