1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
5 format::{Error as FormatError, Spec, UncheckedSpec},
6 identifier::{Error as IdError, Identifier},
7 raw::{self, MissingValues, UnencodedStr, VarType},
9 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
10 use encoding_rs::{DecoderResult, Encoding};
11 use num::integer::div_ceil;
12 use ordered_float::OrderedFloat;
13 use thiserror::Error as ThisError;
15 pub use crate::raw::{CategoryLabels, Compression};
17 #[derive(ThisError, Debug)]
19 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
20 InvalidVariableWidth { offset: u64, width: i32 },
22 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
23 InvalidLongMissingValueFormat,
25 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
26 InvalidCreationDate { creation_date: String },
28 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
29 InvalidCreationTime { creation_time: String },
31 #[error("{id_error} Renaming variable to {new_name}.")]
38 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
43 format_error: FormatError,
47 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
52 format_error: FormatError,
55 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
56 DuplicateVariableName {
57 duplicate_name: Identifier,
61 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
62 InvalidDictIndex { dict_index: usize, max_index: usize },
64 #[error("Dictionary index {0} refers to a long string continuation.")]
65 DictIndexIsContinuation(usize),
67 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
68 ValueLabelsDifferentTypes {
69 numeric_var: Identifier,
70 string_var: Identifier,
74 "Value labels may not be added to long string variable {0} using record types 3 or 4."
76 InvalidLongStringValueLabel(Identifier),
78 #[error("Invalid multiple response set name. {0}")]
79 InvalidMrSetName(IdError),
81 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
82 UnknownMrSetVariable {
84 short_name: Identifier,
87 #[error("Multiple response set {0} has no variables.")]
88 EmptyMrSet(Identifier),
90 #[error("Multiple response set {0} has only one variable.")]
91 OneVarMrSet(Identifier),
93 #[error("Multiple response set {0} contains both string and numeric variables.")]
94 MixedMrSet(Identifier),
97 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
99 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
101 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
102 TooWideMDGroupCountedValue {
109 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
110 InvalidLongValueLabelWidth {
117 #[error("Invalid attribute name. {0}")]
118 InvalidAttributeName(IdError),
120 #[error("Invalid short name in long variable name record. {0}")]
121 InvalidShortName(IdError),
123 #[error("Invalid name in long variable name record. {0}")]
124 InvalidLongName(IdError),
126 #[error("Invalid variable name in very long string record. {0}")]
127 InvalidLongStringName(IdError),
129 #[error("Invalid variable name in long string value label record. {0}")]
130 InvalidLongStringValueLabelName(IdError),
132 #[error("Invalid variable name in attribute record. {0}")]
133 InvalidAttributeVariableName(IdError),
135 #[error("Details TBD")]
139 #[derive(Clone, Debug)]
141 Header(HeaderRecord),
142 Variable(VariableRecord),
143 ValueLabel(ValueLabelRecord),
144 Document(DocumentRecord),
145 IntegerInfo(IntegerInfoRecord),
146 FloatInfo(FloatInfoRecord),
147 VariableSets(VariableSetRecord),
148 VarDisplay(VarDisplayRecord),
149 MultipleResponse(MultipleResponseRecord),
150 LongStringValueLabels(LongStringValueLabelRecord),
151 Encoding(EncodingRecord),
152 NumberOfCases(NumberOfCasesRecord),
153 ProductInfo(ProductInfoRecord),
154 LongNames(LongNameRecord),
155 VeryLongStrings(VeryLongStringRecord),
156 FileAttributes(FileAttributeRecord),
157 VariableAttributes(VariableAttributeRecord),
158 OtherExtension(Extension),
161 //ZTrailer(ZTrailer),
165 pub use crate::raw::EncodingRecord;
166 pub use crate::raw::FloatInfoRecord;
167 pub use crate::raw::IntegerInfoRecord;
168 pub use crate::raw::NumberOfCasesRecord;
169 pub use crate::raw::Extension;
171 type DictIndex = usize;
173 pub struct Variable {
174 pub dict_index: DictIndex,
175 pub short_name: Identifier,
176 pub long_name: Option<Identifier>,
181 pub compression: Option<Compression>,
183 pub encoding: &'static Encoding,
184 pub variables: HashMap<DictIndex, Variable>,
185 pub var_names: HashMap<Identifier, DictIndex>,
186 n_dict_indexes: usize,
187 n_generated_names: usize,
191 fn generate_name(&mut self) -> Identifier {
193 self.n_generated_names += 1;
194 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
196 if !self.var_names.contains_key(&name) {
199 assert!(self.n_generated_names < usize::MAX);
202 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
203 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
209 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
210 self.decode_string_cow(input, warn).into()
212 pub fn decode_identifier(
215 warn: &impl Fn(Error),
216 ) -> Result<Identifier, IdError> {
217 let s = self.decode_string_cow(input, warn);
218 Identifier::new(&s, self.encoding)
220 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
221 let max_index = self.n_dict_indexes - 1;
222 if dict_index == 0 || dict_index as usize > max_index {
223 return Err(Error::InvalidDictIndex {
228 let Some(variable) = self.variables.get(&dict_index) else {
229 return Err(Error::DictIndexIsContinuation(dict_index));
234 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
235 /// re-encoding the result back into `self.encoding` will have exactly the
236 /// same length in bytes.
238 /// XXX warn about errors?
239 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
240 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
241 // This is the common case. Usually there will be no errors.
244 // Unusual case. Don't bother to optimize it much.
245 let mut decoder = self.encoding.new_decoder_without_bom_handling();
246 let mut output = String::with_capacity(
248 .max_utf8_buffer_length_without_replacement(input.len())
251 let mut rest = input;
252 while !rest.is_empty() {
253 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
254 (DecoderResult::InputEmpty, _) => break,
255 (DecoderResult::OutputFull, _) => unreachable!(),
256 (DecoderResult::Malformed(a, b), consumed) => {
257 let skipped = a as usize + b as usize;
258 output.extend(repeat('?').take(skipped));
259 rest = &rest[consumed..];
263 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
269 pub trait TryDecode: Sized {
274 warn: impl Fn(Error),
275 ) -> Result<Self, Error>;
278 pub trait Decode<Input>: Sized {
279 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
282 impl<const N: usize> Decode<UnencodedStr<N>> for String {
283 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
284 decoder.decode_string(&input.0, &warn)
288 #[derive(Clone, Debug)]
289 pub struct HeaderRecord {
290 pub eye_catcher: String,
291 pub weight_index: Option<usize>,
292 pub n_cases: Option<u64>,
293 pub creation: NaiveDateTime,
294 pub file_label: String,
297 impl TryDecode for HeaderRecord {
298 type Input = crate::raw::HeaderRecord;
303 warn: impl Fn(Error),
304 ) -> Result<Self, Error> {
305 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
306 let file_label = decoder.decode_string(&input.file_label.0, &warn);
307 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
308 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
309 warn(Error::InvalidCreationDate {
310 creation_date: creation_date.into(),
314 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
316 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
317 warn(Error::InvalidCreationTime {
318 creation_time: creation_time.into(),
324 weight_index: input.weight_index.map(|n| n as usize),
325 n_cases: input.n_cases.map(|n| n as u64),
326 creation: NaiveDateTime::new(creation_date, creation_time),
332 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
338 impl PartialOrd for VarWidth {
339 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
340 match (self, other) {
341 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
342 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
349 const MAX_STRING: u16 = 32767;
351 fn n_dict_indexes(self) -> usize {
353 VarWidth::Numeric => 1,
354 VarWidth::String(w) => div_ceil(w as usize, 8),
361 f: impl Fn(u16, u16) -> u16,
362 ) -> Option<VarWidth> {
364 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
365 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
366 Some(VarWidth::String(f(a, b)))
372 /// Returns the wider of `self` and `other`:
373 /// - Numerical variable widths are equally wide.
374 /// - Longer strings are wider than shorter strings.
375 /// - Numerical and string types are incomparable, so result in `None`.
376 /// - Any `None` in the input yields `None` in the output.
377 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
378 Self::width_predicate(a, b, |a, b| a.max(b))
381 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
382 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
383 Self::width_predicate(a, b, |a, b| a.min(b))
387 impl From<VarWidth> for VarType {
388 fn from(source: VarWidth) -> Self {
390 VarWidth::Numeric => VarType::Numeric,
391 VarWidth::String(_) => VarType::String,
396 #[derive(Clone, Debug)]
397 pub struct VariableRecord {
399 pub name: Identifier,
400 pub print_format: Spec,
401 pub write_format: Spec,
402 pub missing_values: MissingValues,
403 pub label: Option<String>,
406 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
407 UncheckedSpec::try_from(raw)
408 .and_then(Spec::try_from)
409 .and_then(|x| x.check_width_compatibility(width))
410 .unwrap_or_else(|error| {
411 let new_format = Spec::default_for_width(width);
412 warn(new_format, error);
417 impl VariableRecord {
419 decoder: &mut Decoder,
420 input: &crate::raw::VariableRecord,
421 warn: impl Fn(Error),
422 ) -> Result<Option<VariableRecord>, Error> {
423 let width = match input.width {
424 0 => VarWidth::Numeric,
425 w @ 1..=255 => VarWidth::String(w as u16),
426 -1 => return Ok(None),
428 return Err(Error::InvalidVariableWidth {
429 offset: input.offset,
434 let name = match decoder.decode_identifier(&input.name.0, &warn) {
436 if !decoder.var_names.contains_key(&name) {
439 let new_name = decoder.generate_name();
440 warn(Error::DuplicateVariableName {
441 duplicate_name: name.clone(),
442 new_name: new_name.clone(),
448 let new_name = decoder.generate_name();
449 warn(Error::InvalidVariableName {
451 new_name: new_name.clone(),
456 let variable = Variable {
457 dict_index: decoder.n_dict_indexes,
458 short_name: name.clone(),
462 decoder.n_dict_indexes += width.n_dict_indexes();
465 .insert(name.clone(), variable.dict_index)
469 .insert(variable.dict_index, variable)
472 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
473 warn(Error::InvalidPrintFormat {
475 variable: name.clone(),
479 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
480 warn(Error::InvalidWriteFormat {
482 variable: name.clone(),
489 .map(|label| decoder.decode_string(&label.0, &warn));
490 Ok(Some(VariableRecord {
495 missing_values: input.missing_values.clone(),
501 #[derive(Clone, Debug)]
502 pub struct DocumentRecord(Vec<String>);
504 impl TryDecode for DocumentRecord {
505 type Input = crate::raw::DocumentRecord;
510 warn: impl Fn(Error),
511 ) -> Result<Self, Error> {
516 .map(|s| decoder.decode_string(&s.0, &warn))
526 const NAME: &'static str;
527 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
530 #[derive(Clone, Debug)]
531 pub struct VariableSet {
533 pub vars: Vec<String>,
537 fn parse(input: &str) -> Result<Self, Error> {
538 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
539 let vars = input.split_ascii_whitespace().map(String::from).collect();
547 trait WarnOnError<T> {
548 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
550 impl<T> WarnOnError<T> for Result<T, Error> {
551 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
553 Ok(result) => Some(result),
562 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
564 Number(Option<OrderedFloat<f64>>),
569 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
571 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
572 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
577 #[derive(Clone, Debug)]
578 pub struct ValueLabel {
583 #[derive(Clone, Debug)]
584 pub struct ValueLabelRecord {
585 pub var_type: VarType,
586 pub labels: Vec<ValueLabel>,
587 pub variables: Vec<Identifier>,
590 impl ValueLabelRecord {
592 decoder: &mut Decoder,
593 raw_value_label: &crate::raw::ValueLabelRecord,
594 dict_indexes: &crate::raw::VarIndexRecord,
595 warn: impl Fn(Error),
596 ) -> Result<Option<ValueLabelRecord>, Error> {
597 let variables: Vec<&Variable> = dict_indexes
600 .filter_map(|&dict_index| {
602 .get_var_by_index(dict_index as usize)
603 .warn_on_error(&warn)
605 .filter(|&variable| match variable.width {
606 VarWidth::String(width) if width > 8 => {
607 warn(Error::InvalidLongStringValueLabel(
608 variable.short_name.clone(),
615 let mut i = variables.iter();
616 let Some(&first_var) = i.next() else {
619 let var_type: VarType = first_var.width.into();
621 let this_type: VarType = variable.width.into();
622 if var_type != this_type {
623 let (numeric_var, string_var) = match var_type {
624 VarType::Numeric => (first_var, variable),
625 VarType::String => (variable, first_var),
627 warn(Error::ValueLabelsDifferentTypes {
628 numeric_var: numeric_var.short_name.clone(),
629 string_var: string_var.short_name.clone(),
634 let labels = raw_value_label
637 .map(|(value, label)| {
638 let label = decoder.decode_string(&label.0, &warn);
639 let value = Value::decode(
640 raw::Value::from_raw(*value, var_type, decoder.endian),
643 ValueLabel { value, label }
646 let variables = variables
648 .map(|&variable| variable.short_name.clone())
650 Ok(Some(ValueLabelRecord {
658 #[derive(Clone, Debug)]
659 pub struct VariableSetRecord(Vec<VariableSet>);
661 impl TextRecord for VariableSetRecord {
662 const NAME: &'static str = "variable set";
663 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
664 let mut sets = Vec::new();
665 for line in input.lines() {
666 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
670 Ok(VariableSetRecord(sets))
674 #[derive(Clone, Debug)]
675 pub struct ProductInfoRecord(pub String);
677 impl TextRecord for ProductInfoRecord {
678 const NAME: &'static str = "extra product info";
679 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
680 Ok(ProductInfoRecord(input.into()))
684 #[derive(Clone, Debug)]
685 pub struct LongName {
686 pub short_name: Identifier,
687 pub long_name: Identifier,
691 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
692 let short_name = Identifier::new(short_name, decoder.encoding)
693 .map_err(|e| Error::InvalidShortName(e))?;
695 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
703 #[derive(Clone, Debug)]
704 pub struct LongNameRecord(Vec<LongName>);
706 impl LongNameRecord {
707 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
708 let mut names = Vec::new();
709 for pair in input.split('\t').filter(|s| !s.is_empty()) {
710 if let Some((short_name, long_name)) = pair.split_once('=') {
711 if let Some(long_name) =
712 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
714 names.push(long_name);
720 Ok(LongNameRecord(names))
724 #[derive(Clone, Debug)]
725 pub struct VeryLongString {
726 pub short_name: Identifier,
730 impl VeryLongString {
731 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
732 let Some((short_name, length)) = input.split_once('=') else {
733 return Err(Error::TBD);
735 let short_name = Identifier::new(short_name, decoder.encoding)
736 .map_err(|e| Error::InvalidLongStringName(e))?;
737 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
738 if length > VarWidth::MAX_STRING {
739 return Err(Error::TBD);
742 short_name: short_name.into(),
748 #[derive(Clone, Debug)]
749 pub struct VeryLongStringRecord(Vec<VeryLongString>);
751 impl VeryLongStringRecord {
752 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
753 let mut very_long_strings = Vec::new();
756 .map(|s| s.trim_end_matches('\t'))
757 .filter(|s| !s.is_empty())
759 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
760 very_long_strings.push(vls)
763 Ok(VeryLongStringRecord(very_long_strings))
767 #[derive(Clone, Debug)]
768 pub struct Attribute {
769 pub name: Identifier,
770 pub values: Vec<String>,
777 warn: &impl Fn(Error),
778 ) -> Result<(Option<Attribute>, &'a str), Error> {
779 let Some((name, mut input)) = input.split_once('(') else {
780 return Err(Error::TBD);
782 let mut values = Vec::new();
784 let Some((value, rest)) = input.split_once('\n') else {
785 return Err(Error::TBD);
787 if let Some(stripped) = value
789 .and_then(|value| value.strip_suffix('\''))
791 values.push(stripped.into());
794 values.push(value.into());
796 if let Some(rest) = rest.strip_prefix(')') {
797 let attribute = Identifier::new(name, decoder.encoding)
798 .map_err(|e| Error::InvalidAttributeName(e))
800 .map(|name| Attribute { name, values });
801 return Ok((attribute, rest));
808 #[derive(Clone, Debug)]
809 pub struct AttributeSet(pub Vec<Attribute>);
815 sentinel: Option<char>,
816 warn: &impl Fn(Error),
817 ) -> Result<(AttributeSet, &'a str), Error> {
818 let mut attributes = Vec::new();
820 match input.chars().next() {
822 c if c == sentinel => break &input[1..],
824 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
825 if let Some(attribute) = attribute {
826 attributes.push(attribute);
832 Ok((AttributeSet(attributes), rest))
836 #[derive(Clone, Debug)]
837 pub struct FileAttributeRecord(AttributeSet);
839 impl FileAttributeRecord {
840 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
841 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
842 if !rest.is_empty() {
845 Ok(FileAttributeRecord(set))
849 #[derive(Clone, Debug)]
850 pub struct VarAttributeSet {
851 pub long_var_name: Identifier,
852 pub attributes: AttributeSet,
855 impl VarAttributeSet {
859 warn: &impl Fn(Error),
860 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
861 let Some((long_var_name, rest)) = input.split_once(':') else {
862 return Err(Error::TBD);
864 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
865 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
866 .map_err(|e| Error::InvalidAttributeVariableName(e))
868 .map(|name| VarAttributeSet {
872 Ok((var_attribute, rest))
876 #[derive(Clone, Debug)]
877 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
879 impl VariableAttributeRecord {
880 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
881 let mut var_attribute_sets = Vec::new();
882 while !input.is_empty() {
883 let Some((var_attribute, rest)) =
884 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
888 if let Some(var_attribute) = var_attribute {
889 var_attribute_sets.push(var_attribute);
893 Ok(VariableAttributeRecord(var_attribute_sets))
897 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
904 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
911 #[derive(Clone, Debug)]
912 pub struct VarDisplay {
913 pub measure: Option<Measure>,
915 pub align: Option<Alignment>,
918 #[derive(Clone, Debug)]
919 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
921 #[derive(Clone, Debug)]
922 pub enum MultipleResponseType {
925 labels: CategoryLabels,
930 impl MultipleResponseType {
934 input: &raw::MultipleResponseType,
936 warn: &impl Fn(Error),
937 ) -> Result<Self, Error> {
938 let mr_type = match input {
939 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
940 let value = decoder.decode_string_cow(&value.0, warn);
941 let value = match min_width {
942 VarWidth::Numeric => {
943 let number: f64 = value.trim().parse().map_err(|_| {
944 Error::InvalidMDGroupCountedValue {
945 mr_set: mr_set.clone(),
946 number: value.into(),
949 Value::Number(Some(number.into()))
951 VarWidth::String(max_width) => {
952 let value = value.trim_end_matches(' ');
953 let width = value.len();
954 if width > max_width as usize {
955 return Err(Error::TooWideMDGroupCountedValue {
956 mr_set: mr_set.clone(),
962 Value::String(value.into())
965 MultipleResponseType::MultipleDichotomy {
970 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
976 #[derive(Clone, Debug)]
977 pub struct MultipleResponseSet {
978 pub name: Identifier,
979 pub min_width: VarWidth,
980 pub max_width: VarWidth,
982 pub mr_type: MultipleResponseType,
983 pub dict_indexes: Vec<DictIndex>,
986 impl MultipleResponseSet {
989 input: &raw::MultipleResponseSet,
990 warn: &impl Fn(Error),
991 ) -> Result<Self, Error> {
992 let mr_set_name = decoder
993 .decode_identifier(&input.name.0, warn)
994 .map_err(|error| Error::InvalidMrSetName(error))?;
996 let label = decoder.decode_string(&input.label.0, warn);
998 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
999 for short_name in input.short_names.iter() {
1000 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1003 warn(Error::InvalidMrSetName(error));
1007 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1008 warn(Error::UnknownMrSetVariable {
1009 mr_set: mr_set_name.clone(),
1010 short_name: short_name.clone(),
1014 dict_indexes.push(dict_index);
1017 match dict_indexes.len() {
1018 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1019 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1023 let Some((Some(min_width), Some(max_width))) = dict_indexes
1025 .map(|dict_index| decoder.variables[dict_index].width)
1026 .map(|w| (Some(w), Some(w)))
1027 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1029 return Err(Error::MixedMrSet(mr_set_name));
1033 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1035 Ok(MultipleResponseSet {
1046 #[derive(Clone, Debug)]
1047 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1049 impl TryDecode for MultipleResponseRecord {
1050 type Input = raw::MultipleResponseRecord;
1054 input: &Self::Input,
1055 warn: impl Fn(Error),
1056 ) -> Result<Self, Error> {
1057 let mut sets = Vec::with_capacity(input.0.len());
1058 for set in &input.0 {
1059 match MultipleResponseSet::decode(decoder, set, &warn) {
1060 Ok(set) => sets.push(set),
1061 Err(error) => warn(error),
1064 Ok(MultipleResponseRecord(sets))
1068 #[derive(Clone, Debug)]
1069 pub struct LongStringValueLabels {
1070 pub var_name: Identifier,
1071 pub width: VarWidth,
1072 pub labels: Vec<ValueLabel>,
1075 impl LongStringValueLabels {
1078 input: &raw::LongStringValueLabels,
1079 warn: &impl Fn(Error),
1080 ) -> Result<Self, Error> {
1081 let var_name = decoder
1082 .decode_identifier(&input.var_name.0, warn)
1083 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1086 let max_width = VarWidth::MAX_STRING;
1087 if input.width < 9 || input.width > max_width as u32 {
1088 return Err(Error::InvalidLongValueLabelWidth {
1089 name: var_name.into(),
1095 let width = input.width as u16;
1097 let mut labels = Vec::with_capacity(input.labels.len());
1098 for (value, label) in input.labels.iter() {
1099 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1100 let label = decoder.decode_string(&label.0, warn);
1101 labels.push(ValueLabel { value, label });
1104 Ok(LongStringValueLabels {
1106 width: VarWidth::String(width),
1112 #[derive(Clone, Debug)]
1113 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1115 impl TryDecode for LongStringValueLabelRecord {
1116 type Input = raw::LongStringValueLabelRecord;
1120 input: &Self::Input,
1121 warn: impl Fn(Error),
1122 ) -> Result<Self, Error> {
1123 let mut labels = Vec::with_capacity(input.0.len());
1124 for label in &input.0 {
1125 match LongStringValueLabels::decode(decoder, label, &warn) {
1126 Ok(set) => labels.push(set),
1127 Err(error) => warn(error),
1130 Ok(LongStringValueLabelRecord(labels))
1136 use encoding_rs::WINDOWS_1252;
1140 let mut s = String::new();
1141 s.push(char::REPLACEMENT_CHARACTER);
1142 let encoded = WINDOWS_1252.encode(&s).0;
1143 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1144 println!("{:?}", decoded);
1149 let charset: Vec<u8> = (0..=255).collect();
1150 println!("{}", charset.len());
1151 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1152 println!("{}", decoded.len());
1153 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1154 println!("{}", encoded.len());
1155 assert_eq!(&charset[..], &encoded[..]);