1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
5 format::{Error as FormatError, Spec, UncheckedSpec},
6 identifier::{Error as IdError, Identifier},
7 raw::{self, MissingValues, UnencodedStr, VarType},
9 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
10 use encoding_rs::{DecoderResult, Encoding};
11 use num::integer::div_ceil;
12 use ordered_float::OrderedFloat;
13 use thiserror::Error as ThisError;
15 pub use crate::raw::{CategoryLabels, Compression};
17 #[derive(ThisError, Debug)]
19 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
20 InvalidVariableWidth { offset: u64, width: i32 },
22 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
23 InvalidLongMissingValueFormat,
25 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
26 InvalidCreationDate { creation_date: String },
28 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
29 InvalidCreationTime { creation_time: String },
31 #[error("{id_error} Renaming variable to {new_name}.")]
38 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
43 format_error: FormatError,
47 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
52 format_error: FormatError,
55 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
56 DuplicateVariableName {
57 duplicate_name: Identifier,
61 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
62 InvalidDictIndex { dict_index: usize, max_index: usize },
64 #[error("Dictionary index {0} refers to a long string continuation.")]
65 DictIndexIsContinuation(usize),
67 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
68 ValueLabelsDifferentTypes {
69 numeric_var: Identifier,
70 string_var: Identifier,
74 "Value labels may not be added to long string variable {0} using record types 3 or 4."
76 InvalidLongStringValueLabel(Identifier),
78 #[error("Invalid multiple response set name. {0}")]
79 InvalidMrSetName(IdError),
81 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
82 UnknownMrSetVariable {
84 short_name: Identifier,
87 #[error("Multiple response set {0} has no variables.")]
88 EmptyMrSet(Identifier),
90 #[error("Multiple response set {0} has only one variable.")]
91 OneVarMrSet(Identifier),
93 #[error("Multiple response set {0} contains both string and numeric variables.")]
94 MixedMrSet(Identifier),
97 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
99 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
101 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
102 TooWideMDGroupCountedValue {
109 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
110 InvalidLongValueLabelWidth {
117 #[error("Invalid attribute name. {0}")]
118 InvalidAttributeName(IdError),
120 #[error("Invalid short name in long variable name record. {0}")]
121 InvalidShortName(IdError),
123 #[error("Invalid name in long variable name record. {0}")]
124 InvalidLongName(IdError),
126 #[error("Invalid variable name in very long string record. {0}")]
127 InvalidLongStringName(IdError),
129 #[error("Invalid variable name in long string value label record. {0}")]
130 InvalidLongStringValueLabelName(IdError),
132 #[error("Invalid variable name in attribute record. {0}")]
133 InvalidAttributeVariableName(IdError),
135 #[error("Details TBD")]
139 #[derive(Clone, Debug)]
141 Header(HeaderRecord),
142 Variable(VariableRecord),
143 ValueLabel(ValueLabelRecord),
144 Document(DocumentRecord),
145 IntegerInfo(IntegerInfoRecord),
146 FloatInfo(FloatInfoRecord),
147 VariableSets(VariableSetRecord),
148 VarDisplay(VarDisplayRecord),
149 MultipleResponse(MultipleResponseRecord),
150 LongStringValueLabels(LongStringValueLabelRecord),
151 Encoding(EncodingRecord),
152 NumberOfCases(NumberOfCasesRecord),
153 ProductInfo(ProductInfoRecord),
154 LongNames(LongNameRecord),
155 VeryLongStrings(VeryLongStringRecord),
156 FileAttributes(FileAttributeRecord),
157 VariableAttributes(VariableAttributeRecord),
158 OtherExtension(Extension),
161 //ZTrailer(ZTrailer),
165 pub use crate::raw::EncodingRecord;
166 pub use crate::raw::Extension;
167 pub use crate::raw::FloatInfoRecord;
168 pub use crate::raw::IntegerInfoRecord;
169 pub use crate::raw::NumberOfCasesRecord;
171 type DictIndex = usize;
173 pub struct Variable {
174 pub dict_index: DictIndex,
175 pub short_name: Identifier,
176 pub long_name: Option<Identifier>,
181 pub compression: Option<Compression>,
183 pub encoding: &'static Encoding,
184 pub variables: HashMap<DictIndex, Variable>,
185 pub var_names: HashMap<Identifier, DictIndex>,
186 n_dict_indexes: usize,
187 n_generated_names: usize,
190 pub fn decode<T>(headers: Vec<raw::Record>) -> Vec<Record> {
191 let encoding = headers.iter().find_map(|rec| {
192 if let raw::Record::Encoding(ref e) = rec {
198 let character_code = headers.iter().find_map(|rec| {
199 if let raw::Record::IntegerInfo(ref r) = rec {
200 Some(r.character_code)
211 fn generate_name(&mut self) -> Identifier {
213 self.n_generated_names += 1;
214 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
216 if !self.var_names.contains_key(&name) {
219 assert!(self.n_generated_names < usize::MAX);
222 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
223 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
229 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
230 self.decode_string_cow(input, warn).into()
232 pub fn decode_identifier(
235 warn: &impl Fn(Error),
236 ) -> Result<Identifier, IdError> {
237 let s = self.decode_string_cow(input, warn);
238 Identifier::new(&s, self.encoding)
240 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
241 let max_index = self.n_dict_indexes - 1;
242 if dict_index == 0 || dict_index as usize > max_index {
243 return Err(Error::InvalidDictIndex {
248 let Some(variable) = self.variables.get(&dict_index) else {
249 return Err(Error::DictIndexIsContinuation(dict_index));
254 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
255 /// re-encoding the result back into `self.encoding` will have exactly the
256 /// same length in bytes.
258 /// XXX warn about errors?
259 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
260 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
261 // This is the common case. Usually there will be no errors.
264 // Unusual case. Don't bother to optimize it much.
265 let mut decoder = self.encoding.new_decoder_without_bom_handling();
266 let mut output = String::with_capacity(
268 .max_utf8_buffer_length_without_replacement(input.len())
271 let mut rest = input;
272 while !rest.is_empty() {
273 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
274 (DecoderResult::InputEmpty, _) => break,
275 (DecoderResult::OutputFull, _) => unreachable!(),
276 (DecoderResult::Malformed(a, b), consumed) => {
277 let skipped = a as usize + b as usize;
278 output.extend(repeat('?').take(skipped));
279 rest = &rest[consumed..];
283 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
289 pub trait TryDecode: Sized {
294 warn: impl Fn(Error),
295 ) -> Result<Self, Error>;
298 pub trait Decode<Input>: Sized {
299 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
302 impl<const N: usize> Decode<UnencodedStr<N>> for String {
303 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
304 decoder.decode_string(&input.0, &warn)
308 #[derive(Clone, Debug)]
309 pub struct HeaderRecord {
310 pub eye_catcher: String,
311 pub weight_index: Option<usize>,
312 pub n_cases: Option<u64>,
313 pub creation: NaiveDateTime,
314 pub file_label: String,
317 impl TryDecode for HeaderRecord {
318 type Input = crate::raw::HeaderRecord;
323 warn: impl Fn(Error),
324 ) -> Result<Self, Error> {
325 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
326 let file_label = decoder.decode_string(&input.file_label.0, &warn);
327 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
328 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
329 warn(Error::InvalidCreationDate {
330 creation_date: creation_date.into(),
334 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
336 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
337 warn(Error::InvalidCreationTime {
338 creation_time: creation_time.into(),
344 weight_index: input.weight_index.map(|n| n as usize),
345 n_cases: input.n_cases.map(|n| n as u64),
346 creation: NaiveDateTime::new(creation_date, creation_time),
352 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
358 impl PartialOrd for VarWidth {
359 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
360 match (self, other) {
361 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
362 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
369 const MAX_STRING: u16 = 32767;
371 fn n_dict_indexes(self) -> usize {
373 VarWidth::Numeric => 1,
374 VarWidth::String(w) => div_ceil(w as usize, 8),
381 f: impl Fn(u16, u16) -> u16,
382 ) -> Option<VarWidth> {
384 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
385 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
386 Some(VarWidth::String(f(a, b)))
392 /// Returns the wider of `self` and `other`:
393 /// - Numerical variable widths are equally wide.
394 /// - Longer strings are wider than shorter strings.
395 /// - Numerical and string types are incomparable, so result in `None`.
396 /// - Any `None` in the input yields `None` in the output.
397 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
398 Self::width_predicate(a, b, |a, b| a.max(b))
401 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
402 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
403 Self::width_predicate(a, b, |a, b| a.min(b))
407 impl From<VarWidth> for VarType {
408 fn from(source: VarWidth) -> Self {
410 VarWidth::Numeric => VarType::Numeric,
411 VarWidth::String(_) => VarType::String,
416 #[derive(Clone, Debug)]
417 pub struct VariableRecord {
419 pub name: Identifier,
420 pub print_format: Spec,
421 pub write_format: Spec,
422 pub missing_values: MissingValues,
423 pub label: Option<String>,
426 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
427 UncheckedSpec::try_from(raw)
428 .and_then(Spec::try_from)
429 .and_then(|x| x.check_width_compatibility(width))
430 .unwrap_or_else(|error| {
431 let new_format = Spec::default_for_width(width);
432 warn(new_format, error);
437 impl VariableRecord {
439 decoder: &mut Decoder,
440 input: &crate::raw::VariableRecord,
441 warn: impl Fn(Error),
442 ) -> Result<Option<VariableRecord>, Error> {
443 let width = match input.width {
444 0 => VarWidth::Numeric,
445 w @ 1..=255 => VarWidth::String(w as u16),
446 -1 => return Ok(None),
448 return Err(Error::InvalidVariableWidth {
449 offset: input.offset,
454 let name = match decoder.decode_identifier(&input.name.0, &warn) {
456 if !decoder.var_names.contains_key(&name) {
459 let new_name = decoder.generate_name();
460 warn(Error::DuplicateVariableName {
461 duplicate_name: name.clone(),
462 new_name: new_name.clone(),
468 let new_name = decoder.generate_name();
469 warn(Error::InvalidVariableName {
471 new_name: new_name.clone(),
476 let variable = Variable {
477 dict_index: decoder.n_dict_indexes,
478 short_name: name.clone(),
482 decoder.n_dict_indexes += width.n_dict_indexes();
485 .insert(name.clone(), variable.dict_index)
489 .insert(variable.dict_index, variable)
492 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
493 warn(Error::InvalidPrintFormat {
495 variable: name.clone(),
499 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
500 warn(Error::InvalidWriteFormat {
502 variable: name.clone(),
509 .map(|label| decoder.decode_string(&label.0, &warn));
510 Ok(Some(VariableRecord {
515 missing_values: input.missing_values.clone(),
521 #[derive(Clone, Debug)]
522 pub struct DocumentRecord(Vec<String>);
524 impl TryDecode for DocumentRecord {
525 type Input = crate::raw::DocumentRecord;
530 warn: impl Fn(Error),
531 ) -> Result<Self, Error> {
536 .map(|s| decoder.decode_string(&s.0, &warn))
546 const NAME: &'static str;
547 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
550 #[derive(Clone, Debug)]
551 pub struct VariableSet {
553 pub vars: Vec<String>,
557 fn parse(input: &str) -> Result<Self, Error> {
558 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
559 let vars = input.split_ascii_whitespace().map(String::from).collect();
567 trait WarnOnError<T> {
568 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
570 impl<T> WarnOnError<T> for Result<T, Error> {
571 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
573 Ok(result) => Some(result),
582 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
584 Number(Option<OrderedFloat<f64>>),
589 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
591 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
592 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
597 #[derive(Clone, Debug)]
598 pub struct ValueLabel {
603 #[derive(Clone, Debug)]
604 pub struct ValueLabelRecord {
605 pub var_type: VarType,
606 pub labels: Vec<ValueLabel>,
607 pub variables: Vec<Identifier>,
610 impl ValueLabelRecord {
612 decoder: &mut Decoder,
613 raw_value_label: &crate::raw::ValueLabelRecord,
614 dict_indexes: &crate::raw::VarIndexRecord,
615 warn: impl Fn(Error),
616 ) -> Result<Option<ValueLabelRecord>, Error> {
617 let variables: Vec<&Variable> = dict_indexes
620 .filter_map(|&dict_index| {
622 .get_var_by_index(dict_index as usize)
623 .warn_on_error(&warn)
625 .filter(|&variable| match variable.width {
626 VarWidth::String(width) if width > 8 => {
627 warn(Error::InvalidLongStringValueLabel(
628 variable.short_name.clone(),
635 let mut i = variables.iter();
636 let Some(&first_var) = i.next() else {
639 let var_type: VarType = first_var.width.into();
641 let this_type: VarType = variable.width.into();
642 if var_type != this_type {
643 let (numeric_var, string_var) = match var_type {
644 VarType::Numeric => (first_var, variable),
645 VarType::String => (variable, first_var),
647 warn(Error::ValueLabelsDifferentTypes {
648 numeric_var: numeric_var.short_name.clone(),
649 string_var: string_var.short_name.clone(),
654 let labels = raw_value_label
657 .map(|(value, label)| {
658 let label = decoder.decode_string(&label.0, &warn);
659 let value = Value::decode(
660 raw::Value::from_raw(*value, var_type, decoder.endian),
663 ValueLabel { value, label }
666 let variables = variables
668 .map(|&variable| variable.short_name.clone())
670 Ok(Some(ValueLabelRecord {
678 #[derive(Clone, Debug)]
679 pub struct VariableSetRecord(Vec<VariableSet>);
681 impl TextRecord for VariableSetRecord {
682 const NAME: &'static str = "variable set";
683 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
684 let mut sets = Vec::new();
685 for line in input.lines() {
686 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
690 Ok(VariableSetRecord(sets))
694 #[derive(Clone, Debug)]
695 pub struct ProductInfoRecord(pub String);
697 impl TextRecord for ProductInfoRecord {
698 const NAME: &'static str = "extra product info";
699 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
700 Ok(ProductInfoRecord(input.into()))
704 #[derive(Clone, Debug)]
705 pub struct LongName {
706 pub short_name: Identifier,
707 pub long_name: Identifier,
711 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
712 let short_name = Identifier::new(short_name, decoder.encoding)
713 .map_err(|e| Error::InvalidShortName(e))?;
715 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
723 #[derive(Clone, Debug)]
724 pub struct LongNameRecord(Vec<LongName>);
726 impl LongNameRecord {
727 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
728 let mut names = Vec::new();
729 for pair in input.split('\t').filter(|s| !s.is_empty()) {
730 if let Some((short_name, long_name)) = pair.split_once('=') {
731 if let Some(long_name) =
732 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
734 names.push(long_name);
740 Ok(LongNameRecord(names))
744 #[derive(Clone, Debug)]
745 pub struct VeryLongString {
746 pub short_name: Identifier,
750 impl VeryLongString {
751 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
752 let Some((short_name, length)) = input.split_once('=') else {
753 return Err(Error::TBD);
755 let short_name = Identifier::new(short_name, decoder.encoding)
756 .map_err(|e| Error::InvalidLongStringName(e))?;
757 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
758 if length > VarWidth::MAX_STRING {
759 return Err(Error::TBD);
762 short_name: short_name.into(),
768 #[derive(Clone, Debug)]
769 pub struct VeryLongStringRecord(Vec<VeryLongString>);
771 impl VeryLongStringRecord {
772 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
773 let mut very_long_strings = Vec::new();
776 .map(|s| s.trim_end_matches('\t'))
777 .filter(|s| !s.is_empty())
779 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
780 very_long_strings.push(vls)
783 Ok(VeryLongStringRecord(very_long_strings))
787 #[derive(Clone, Debug)]
788 pub struct Attribute {
789 pub name: Identifier,
790 pub values: Vec<String>,
797 warn: &impl Fn(Error),
798 ) -> Result<(Option<Attribute>, &'a str), Error> {
799 let Some((name, mut input)) = input.split_once('(') else {
800 return Err(Error::TBD);
802 let mut values = Vec::new();
804 let Some((value, rest)) = input.split_once('\n') else {
805 return Err(Error::TBD);
807 if let Some(stripped) = value
809 .and_then(|value| value.strip_suffix('\''))
811 values.push(stripped.into());
814 values.push(value.into());
816 if let Some(rest) = rest.strip_prefix(')') {
817 let attribute = Identifier::new(name, decoder.encoding)
818 .map_err(|e| Error::InvalidAttributeName(e))
820 .map(|name| Attribute { name, values });
821 return Ok((attribute, rest));
828 #[derive(Clone, Debug)]
829 pub struct AttributeSet(pub Vec<Attribute>);
835 sentinel: Option<char>,
836 warn: &impl Fn(Error),
837 ) -> Result<(AttributeSet, &'a str), Error> {
838 let mut attributes = Vec::new();
840 match input.chars().next() {
842 c if c == sentinel => break &input[1..],
844 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
845 if let Some(attribute) = attribute {
846 attributes.push(attribute);
852 Ok((AttributeSet(attributes), rest))
856 #[derive(Clone, Debug)]
857 pub struct FileAttributeRecord(AttributeSet);
859 impl FileAttributeRecord {
860 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
861 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
862 if !rest.is_empty() {
865 Ok(FileAttributeRecord(set))
869 #[derive(Clone, Debug)]
870 pub struct VarAttributeSet {
871 pub long_var_name: Identifier,
872 pub attributes: AttributeSet,
875 impl VarAttributeSet {
879 warn: &impl Fn(Error),
880 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
881 let Some((long_var_name, rest)) = input.split_once(':') else {
882 return Err(Error::TBD);
884 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
885 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
886 .map_err(|e| Error::InvalidAttributeVariableName(e))
888 .map(|name| VarAttributeSet {
892 Ok((var_attribute, rest))
896 #[derive(Clone, Debug)]
897 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
899 impl VariableAttributeRecord {
900 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
901 let mut var_attribute_sets = Vec::new();
902 while !input.is_empty() {
903 let Some((var_attribute, rest)) =
904 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
908 if let Some(var_attribute) = var_attribute {
909 var_attribute_sets.push(var_attribute);
913 Ok(VariableAttributeRecord(var_attribute_sets))
917 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
924 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
931 #[derive(Clone, Debug)]
932 pub struct VarDisplay {
933 pub measure: Option<Measure>,
935 pub align: Option<Alignment>,
938 #[derive(Clone, Debug)]
939 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
941 #[derive(Clone, Debug)]
942 pub enum MultipleResponseType {
945 labels: CategoryLabels,
950 impl MultipleResponseType {
954 input: &raw::MultipleResponseType,
956 warn: &impl Fn(Error),
957 ) -> Result<Self, Error> {
958 let mr_type = match input {
959 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
960 let value = decoder.decode_string_cow(&value.0, warn);
961 let value = match min_width {
962 VarWidth::Numeric => {
963 let number: f64 = value.trim().parse().map_err(|_| {
964 Error::InvalidMDGroupCountedValue {
965 mr_set: mr_set.clone(),
966 number: value.into(),
969 Value::Number(Some(number.into()))
971 VarWidth::String(max_width) => {
972 let value = value.trim_end_matches(' ');
973 let width = value.len();
974 if width > max_width as usize {
975 return Err(Error::TooWideMDGroupCountedValue {
976 mr_set: mr_set.clone(),
982 Value::String(value.into())
985 MultipleResponseType::MultipleDichotomy {
990 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
996 #[derive(Clone, Debug)]
997 pub struct MultipleResponseSet {
998 pub name: Identifier,
999 pub min_width: VarWidth,
1000 pub max_width: VarWidth,
1002 pub mr_type: MultipleResponseType,
1003 pub dict_indexes: Vec<DictIndex>,
1006 impl MultipleResponseSet {
1009 input: &raw::MultipleResponseSet,
1010 warn: &impl Fn(Error),
1011 ) -> Result<Self, Error> {
1012 let mr_set_name = decoder
1013 .decode_identifier(&input.name.0, warn)
1014 .map_err(|error| Error::InvalidMrSetName(error))?;
1016 let label = decoder.decode_string(&input.label.0, warn);
1018 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
1019 for short_name in input.short_names.iter() {
1020 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1023 warn(Error::InvalidMrSetName(error));
1027 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1028 warn(Error::UnknownMrSetVariable {
1029 mr_set: mr_set_name.clone(),
1030 short_name: short_name.clone(),
1034 dict_indexes.push(dict_index);
1037 match dict_indexes.len() {
1038 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1039 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1043 let Some((Some(min_width), Some(max_width))) = dict_indexes
1045 .map(|dict_index| decoder.variables[dict_index].width)
1046 .map(|w| (Some(w), Some(w)))
1047 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1049 return Err(Error::MixedMrSet(mr_set_name));
1053 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1055 Ok(MultipleResponseSet {
1066 #[derive(Clone, Debug)]
1067 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1069 impl TryDecode for MultipleResponseRecord {
1070 type Input = raw::MultipleResponseRecord;
1074 input: &Self::Input,
1075 warn: impl Fn(Error),
1076 ) -> Result<Self, Error> {
1077 let mut sets = Vec::with_capacity(input.0.len());
1078 for set in &input.0 {
1079 match MultipleResponseSet::decode(decoder, set, &warn) {
1080 Ok(set) => sets.push(set),
1081 Err(error) => warn(error),
1084 Ok(MultipleResponseRecord(sets))
1088 #[derive(Clone, Debug)]
1089 pub struct LongStringValueLabels {
1090 pub var_name: Identifier,
1091 pub width: VarWidth,
1092 pub labels: Vec<ValueLabel>,
1095 impl LongStringValueLabels {
1098 input: &raw::LongStringValueLabels,
1099 warn: &impl Fn(Error),
1100 ) -> Result<Self, Error> {
1101 let var_name = decoder
1102 .decode_identifier(&input.var_name.0, warn)
1103 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1106 let max_width = VarWidth::MAX_STRING;
1107 if input.width < 9 || input.width > max_width as u32 {
1108 return Err(Error::InvalidLongValueLabelWidth {
1109 name: var_name.into(),
1115 let width = input.width as u16;
1117 let mut labels = Vec::with_capacity(input.labels.len());
1118 for (value, label) in input.labels.iter() {
1119 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1120 let label = decoder.decode_string(&label.0, warn);
1121 labels.push(ValueLabel { value, label });
1124 Ok(LongStringValueLabels {
1126 width: VarWidth::String(width),
1132 #[derive(Clone, Debug)]
1133 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1135 impl TryDecode for LongStringValueLabelRecord {
1136 type Input = raw::LongStringValueLabelRecord;
1140 input: &Self::Input,
1141 warn: impl Fn(Error),
1142 ) -> Result<Self, Error> {
1143 let mut labels = Vec::with_capacity(input.0.len());
1144 for label in &input.0 {
1145 match LongStringValueLabels::decode(decoder, label, &warn) {
1146 Ok(set) => labels.push(set),
1147 Err(error) => warn(error),
1150 Ok(LongStringValueLabelRecord(labels))
1156 use encoding_rs::WINDOWS_1252;
1160 let mut s = String::new();
1161 s.push(char::REPLACEMENT_CHARACTER);
1162 let encoded = WINDOWS_1252.encode(&s).0;
1163 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1164 println!("{:?}", decoded);
1169 let charset: Vec<u8> = (0..=255).collect();
1170 println!("{}", charset.len());
1171 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1172 println!("{}", decoded.len());
1173 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1174 println!("{}", encoded.len());
1175 assert_eq!(&charset[..], &encoded[..]);