1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
5 format::{Error as FormatError, Spec, UncheckedSpec},
6 identifier::{Error as IdError, Identifier},
7 raw::{self, MissingValues, UnencodedStr, VarType}, encoding::get_encoding,
9 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
10 use encoding_rs::{DecoderResult, Encoding};
11 use num::integer::div_ceil;
12 use ordered_float::OrderedFloat;
13 use thiserror::Error as ThisError;
15 pub use crate::raw::{CategoryLabels, Compression};
17 #[derive(ThisError, Debug)]
19 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
20 InvalidVariableWidth { offset: u64, width: i32 },
22 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
23 InvalidLongMissingValueFormat,
25 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
26 InvalidCreationDate { creation_date: String },
28 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
29 InvalidCreationTime { creation_time: String },
31 #[error("{id_error} Renaming variable to {new_name}.")]
38 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
43 format_error: FormatError,
47 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
52 format_error: FormatError,
55 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
56 DuplicateVariableName {
57 duplicate_name: Identifier,
61 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
62 InvalidDictIndex { dict_index: usize, max_index: usize },
64 #[error("Dictionary index {0} refers to a long string continuation.")]
65 DictIndexIsContinuation(usize),
67 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
68 ValueLabelsDifferentTypes {
69 numeric_var: Identifier,
70 string_var: Identifier,
74 "Value labels may not be added to long string variable {0} using record types 3 or 4."
76 InvalidLongStringValueLabel(Identifier),
78 #[error("Invalid multiple response set name. {0}")]
79 InvalidMrSetName(IdError),
81 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
82 UnknownMrSetVariable {
84 short_name: Identifier,
87 #[error("Multiple response set {0} has no variables.")]
88 EmptyMrSet(Identifier),
90 #[error("Multiple response set {0} has only one variable.")]
91 OneVarMrSet(Identifier),
93 #[error("Multiple response set {0} contains both string and numeric variables.")]
94 MixedMrSet(Identifier),
97 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
99 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
101 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
102 TooWideMDGroupCountedValue {
109 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
110 InvalidLongValueLabelWidth {
117 #[error("Invalid attribute name. {0}")]
118 InvalidAttributeName(IdError),
120 #[error("Invalid short name in long variable name record. {0}")]
121 InvalidShortName(IdError),
123 #[error("Invalid name in long variable name record. {0}")]
124 InvalidLongName(IdError),
126 #[error("Invalid variable name in very long string record. {0}")]
127 InvalidLongStringName(IdError),
129 #[error("Invalid variable name in long string value label record. {0}")]
130 InvalidLongStringValueLabelName(IdError),
132 #[error("Invalid variable name in attribute record. {0}")]
133 InvalidAttributeVariableName(IdError),
135 #[error("Details TBD")]
139 #[derive(Clone, Debug)]
141 Header(HeaderRecord),
142 Variable(VariableRecord),
143 ValueLabel(ValueLabelRecord),
144 Document(DocumentRecord),
145 IntegerInfo(IntegerInfoRecord),
146 FloatInfo(FloatInfoRecord),
147 VariableSets(VariableSetRecord),
148 VarDisplay(VarDisplayRecord),
149 MultipleResponse(MultipleResponseRecord),
150 LongStringValueLabels(LongStringValueLabelRecord),
151 Encoding(EncodingRecord),
152 NumberOfCases(NumberOfCasesRecord),
153 ProductInfo(ProductInfoRecord),
154 LongNames(LongNameRecord),
155 VeryLongStrings(VeryLongStringRecord),
156 FileAttributes(FileAttributeRecord),
157 VariableAttributes(VariableAttributeRecord),
158 OtherExtension(Extension),
161 //ZTrailer(ZTrailer),
165 pub use crate::raw::EncodingRecord;
166 pub use crate::raw::Extension;
167 pub use crate::raw::FloatInfoRecord;
168 pub use crate::raw::IntegerInfoRecord;
169 pub use crate::raw::NumberOfCasesRecord;
171 type DictIndex = usize;
173 pub struct Variable {
174 pub dict_index: DictIndex,
175 pub short_name: Identifier,
176 pub long_name: Option<Identifier>,
181 pub compression: Option<Compression>,
183 pub encoding: &'static Encoding,
184 pub variables: HashMap<DictIndex, Variable>,
185 pub var_names: HashMap<Identifier, DictIndex>,
186 n_dict_indexes: usize,
187 n_generated_names: usize,
190 pub fn decode<T>(headers: Vec<raw::Record>, warn: &impl Fn(Error)) -> Vec<Record> {
191 let encoding = headers.iter().find_map(|rec| {
192 if let raw::Record::Encoding(ref e) = rec {
198 let character_code = headers.iter().find_map(|rec| {
199 if let raw::Record::IntegerInfo(ref r) = rec {
200 Some(r.character_code)
205 let encoding = get_encoding(encoding, character_code)
207 let decoder = Decoder {
214 fn generate_name(&mut self) -> Identifier {
216 self.n_generated_names += 1;
217 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
219 if !self.var_names.contains_key(&name) {
222 assert!(self.n_generated_names < usize::MAX);
225 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
226 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
232 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
233 self.decode_string_cow(input, warn).into()
235 pub fn decode_identifier(
238 warn: &impl Fn(Error),
239 ) -> Result<Identifier, IdError> {
240 let s = self.decode_string_cow(input, warn);
241 Identifier::new(&s, self.encoding)
243 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
244 let max_index = self.n_dict_indexes - 1;
245 if dict_index == 0 || dict_index as usize > max_index {
246 return Err(Error::InvalidDictIndex {
251 let Some(variable) = self.variables.get(&dict_index) else {
252 return Err(Error::DictIndexIsContinuation(dict_index));
257 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
258 /// re-encoding the result back into `self.encoding` will have exactly the
259 /// same length in bytes.
261 /// XXX warn about errors?
262 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
263 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
264 // This is the common case. Usually there will be no errors.
267 // Unusual case. Don't bother to optimize it much.
268 let mut decoder = self.encoding.new_decoder_without_bom_handling();
269 let mut output = String::with_capacity(
271 .max_utf8_buffer_length_without_replacement(input.len())
274 let mut rest = input;
275 while !rest.is_empty() {
276 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
277 (DecoderResult::InputEmpty, _) => break,
278 (DecoderResult::OutputFull, _) => unreachable!(),
279 (DecoderResult::Malformed(a, b), consumed) => {
280 let skipped = a as usize + b as usize;
281 output.extend(repeat('?').take(skipped));
282 rest = &rest[consumed..];
286 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
292 pub trait TryDecode: Sized {
297 warn: impl Fn(Error),
298 ) -> Result<Self, Error>;
301 pub trait Decode<Input>: Sized {
302 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
305 impl<const N: usize> Decode<UnencodedStr<N>> for String {
306 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
307 decoder.decode_string(&input.0, &warn)
311 #[derive(Clone, Debug)]
312 pub struct HeaderRecord {
313 pub eye_catcher: String,
314 pub weight_index: Option<usize>,
315 pub n_cases: Option<u64>,
316 pub creation: NaiveDateTime,
317 pub file_label: String,
320 impl TryDecode for HeaderRecord {
321 type Input = crate::raw::HeaderRecord;
326 warn: impl Fn(Error),
327 ) -> Result<Self, Error> {
328 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
329 let file_label = decoder.decode_string(&input.file_label.0, &warn);
330 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
331 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
332 warn(Error::InvalidCreationDate {
333 creation_date: creation_date.into(),
337 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
339 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
340 warn(Error::InvalidCreationTime {
341 creation_time: creation_time.into(),
347 weight_index: input.weight_index.map(|n| n as usize),
348 n_cases: input.n_cases.map(|n| n as u64),
349 creation: NaiveDateTime::new(creation_date, creation_time),
355 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
361 impl PartialOrd for VarWidth {
362 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
363 match (self, other) {
364 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
365 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
372 const MAX_STRING: u16 = 32767;
374 fn n_dict_indexes(self) -> usize {
376 VarWidth::Numeric => 1,
377 VarWidth::String(w) => div_ceil(w as usize, 8),
384 f: impl Fn(u16, u16) -> u16,
385 ) -> Option<VarWidth> {
387 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
388 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
389 Some(VarWidth::String(f(a, b)))
395 /// Returns the wider of `self` and `other`:
396 /// - Numerical variable widths are equally wide.
397 /// - Longer strings are wider than shorter strings.
398 /// - Numerical and string types are incomparable, so result in `None`.
399 /// - Any `None` in the input yields `None` in the output.
400 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
401 Self::width_predicate(a, b, |a, b| a.max(b))
404 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
405 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
406 Self::width_predicate(a, b, |a, b| a.min(b))
410 impl From<VarWidth> for VarType {
411 fn from(source: VarWidth) -> Self {
413 VarWidth::Numeric => VarType::Numeric,
414 VarWidth::String(_) => VarType::String,
419 #[derive(Clone, Debug)]
420 pub struct VariableRecord {
422 pub name: Identifier,
423 pub print_format: Spec,
424 pub write_format: Spec,
425 pub missing_values: MissingValues,
426 pub label: Option<String>,
429 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
430 UncheckedSpec::try_from(raw)
431 .and_then(Spec::try_from)
432 .and_then(|x| x.check_width_compatibility(width))
433 .unwrap_or_else(|error| {
434 let new_format = Spec::default_for_width(width);
435 warn(new_format, error);
440 impl VariableRecord {
442 decoder: &mut Decoder,
443 input: &crate::raw::VariableRecord,
444 warn: impl Fn(Error),
445 ) -> Result<Option<VariableRecord>, Error> {
446 let width = match input.width {
447 0 => VarWidth::Numeric,
448 w @ 1..=255 => VarWidth::String(w as u16),
449 -1 => return Ok(None),
451 return Err(Error::InvalidVariableWidth {
452 offset: input.offset,
457 let name = match decoder.decode_identifier(&input.name.0, &warn) {
459 if !decoder.var_names.contains_key(&name) {
462 let new_name = decoder.generate_name();
463 warn(Error::DuplicateVariableName {
464 duplicate_name: name.clone(),
465 new_name: new_name.clone(),
471 let new_name = decoder.generate_name();
472 warn(Error::InvalidVariableName {
474 new_name: new_name.clone(),
479 let variable = Variable {
480 dict_index: decoder.n_dict_indexes,
481 short_name: name.clone(),
485 decoder.n_dict_indexes += width.n_dict_indexes();
488 .insert(name.clone(), variable.dict_index)
492 .insert(variable.dict_index, variable)
495 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
496 warn(Error::InvalidPrintFormat {
498 variable: name.clone(),
502 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
503 warn(Error::InvalidWriteFormat {
505 variable: name.clone(),
512 .map(|label| decoder.decode_string(&label.0, &warn));
513 Ok(Some(VariableRecord {
518 missing_values: input.missing_values.clone(),
524 #[derive(Clone, Debug)]
525 pub struct DocumentRecord(Vec<String>);
527 impl TryDecode for DocumentRecord {
528 type Input = crate::raw::DocumentRecord;
533 warn: impl Fn(Error),
534 ) -> Result<Self, Error> {
539 .map(|s| decoder.decode_string(&s.0, &warn))
549 const NAME: &'static str;
550 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
553 #[derive(Clone, Debug)]
554 pub struct VariableSet {
556 pub vars: Vec<String>,
560 fn parse(input: &str) -> Result<Self, Error> {
561 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
562 let vars = input.split_ascii_whitespace().map(String::from).collect();
570 trait WarnOnError<T> {
571 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
573 impl<T> WarnOnError<T> for Result<T, Error> {
574 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
576 Ok(result) => Some(result),
585 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
587 Number(Option<OrderedFloat<f64>>),
592 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
594 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
595 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
600 #[derive(Clone, Debug)]
601 pub struct ValueLabel {
606 #[derive(Clone, Debug)]
607 pub struct ValueLabelRecord {
608 pub var_type: VarType,
609 pub labels: Vec<ValueLabel>,
610 pub variables: Vec<Identifier>,
613 impl ValueLabelRecord {
615 decoder: &mut Decoder,
616 raw_value_label: &crate::raw::ValueLabelRecord,
617 dict_indexes: &crate::raw::VarIndexRecord,
618 warn: impl Fn(Error),
619 ) -> Result<Option<ValueLabelRecord>, Error> {
620 let variables: Vec<&Variable> = dict_indexes
623 .filter_map(|&dict_index| {
625 .get_var_by_index(dict_index as usize)
626 .warn_on_error(&warn)
628 .filter(|&variable| match variable.width {
629 VarWidth::String(width) if width > 8 => {
630 warn(Error::InvalidLongStringValueLabel(
631 variable.short_name.clone(),
638 let mut i = variables.iter();
639 let Some(&first_var) = i.next() else {
642 let var_type: VarType = first_var.width.into();
644 let this_type: VarType = variable.width.into();
645 if var_type != this_type {
646 let (numeric_var, string_var) = match var_type {
647 VarType::Numeric => (first_var, variable),
648 VarType::String => (variable, first_var),
650 warn(Error::ValueLabelsDifferentTypes {
651 numeric_var: numeric_var.short_name.clone(),
652 string_var: string_var.short_name.clone(),
657 let labels = raw_value_label
660 .map(|(value, label)| {
661 let label = decoder.decode_string(&label.0, &warn);
662 let value = Value::decode(
663 raw::Value::from_raw(*value, var_type, decoder.endian),
666 ValueLabel { value, label }
669 let variables = variables
671 .map(|&variable| variable.short_name.clone())
673 Ok(Some(ValueLabelRecord {
681 #[derive(Clone, Debug)]
682 pub struct VariableSetRecord(Vec<VariableSet>);
684 impl TextRecord for VariableSetRecord {
685 const NAME: &'static str = "variable set";
686 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
687 let mut sets = Vec::new();
688 for line in input.lines() {
689 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
693 Ok(VariableSetRecord(sets))
697 #[derive(Clone, Debug)]
698 pub struct ProductInfoRecord(pub String);
700 impl TextRecord for ProductInfoRecord {
701 const NAME: &'static str = "extra product info";
702 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
703 Ok(ProductInfoRecord(input.into()))
707 #[derive(Clone, Debug)]
708 pub struct LongName {
709 pub short_name: Identifier,
710 pub long_name: Identifier,
714 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
715 let short_name = Identifier::new(short_name, decoder.encoding)
716 .map_err(|e| Error::InvalidShortName(e))?;
718 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
726 #[derive(Clone, Debug)]
727 pub struct LongNameRecord(Vec<LongName>);
729 impl LongNameRecord {
730 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
731 let mut names = Vec::new();
732 for pair in input.split('\t').filter(|s| !s.is_empty()) {
733 if let Some((short_name, long_name)) = pair.split_once('=') {
734 if let Some(long_name) =
735 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
737 names.push(long_name);
743 Ok(LongNameRecord(names))
747 #[derive(Clone, Debug)]
748 pub struct VeryLongString {
749 pub short_name: Identifier,
753 impl VeryLongString {
754 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
755 let Some((short_name, length)) = input.split_once('=') else {
756 return Err(Error::TBD);
758 let short_name = Identifier::new(short_name, decoder.encoding)
759 .map_err(|e| Error::InvalidLongStringName(e))?;
760 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
761 if length > VarWidth::MAX_STRING {
762 return Err(Error::TBD);
765 short_name: short_name.into(),
771 #[derive(Clone, Debug)]
772 pub struct VeryLongStringRecord(Vec<VeryLongString>);
774 impl VeryLongStringRecord {
775 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
776 let mut very_long_strings = Vec::new();
779 .map(|s| s.trim_end_matches('\t'))
780 .filter(|s| !s.is_empty())
782 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
783 very_long_strings.push(vls)
786 Ok(VeryLongStringRecord(very_long_strings))
790 #[derive(Clone, Debug)]
791 pub struct Attribute {
792 pub name: Identifier,
793 pub values: Vec<String>,
800 warn: &impl Fn(Error),
801 ) -> Result<(Option<Attribute>, &'a str), Error> {
802 let Some((name, mut input)) = input.split_once('(') else {
803 return Err(Error::TBD);
805 let mut values = Vec::new();
807 let Some((value, rest)) = input.split_once('\n') else {
808 return Err(Error::TBD);
810 if let Some(stripped) = value
812 .and_then(|value| value.strip_suffix('\''))
814 values.push(stripped.into());
817 values.push(value.into());
819 if let Some(rest) = rest.strip_prefix(')') {
820 let attribute = Identifier::new(name, decoder.encoding)
821 .map_err(|e| Error::InvalidAttributeName(e))
823 .map(|name| Attribute { name, values });
824 return Ok((attribute, rest));
831 #[derive(Clone, Debug)]
832 pub struct AttributeSet(pub Vec<Attribute>);
838 sentinel: Option<char>,
839 warn: &impl Fn(Error),
840 ) -> Result<(AttributeSet, &'a str), Error> {
841 let mut attributes = Vec::new();
843 match input.chars().next() {
845 c if c == sentinel => break &input[1..],
847 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
848 if let Some(attribute) = attribute {
849 attributes.push(attribute);
855 Ok((AttributeSet(attributes), rest))
859 #[derive(Clone, Debug)]
860 pub struct FileAttributeRecord(AttributeSet);
862 impl FileAttributeRecord {
863 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
864 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
865 if !rest.is_empty() {
868 Ok(FileAttributeRecord(set))
872 #[derive(Clone, Debug)]
873 pub struct VarAttributeSet {
874 pub long_var_name: Identifier,
875 pub attributes: AttributeSet,
878 impl VarAttributeSet {
882 warn: &impl Fn(Error),
883 ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
884 let Some((long_var_name, rest)) = input.split_once(':') else {
885 return Err(Error::TBD);
887 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
888 let var_attribute = Identifier::new(long_var_name, decoder.encoding)
889 .map_err(|e| Error::InvalidAttributeVariableName(e))
891 .map(|name| VarAttributeSet {
895 Ok((var_attribute, rest))
899 #[derive(Clone, Debug)]
900 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
902 impl VariableAttributeRecord {
903 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
904 let mut var_attribute_sets = Vec::new();
905 while !input.is_empty() {
906 let Some((var_attribute, rest)) =
907 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
911 if let Some(var_attribute) = var_attribute {
912 var_attribute_sets.push(var_attribute);
916 Ok(VariableAttributeRecord(var_attribute_sets))
920 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
927 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
934 #[derive(Clone, Debug)]
935 pub struct VarDisplay {
936 pub measure: Option<Measure>,
938 pub align: Option<Alignment>,
941 #[derive(Clone, Debug)]
942 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
944 #[derive(Clone, Debug)]
945 pub enum MultipleResponseType {
948 labels: CategoryLabels,
953 impl MultipleResponseType {
957 input: &raw::MultipleResponseType,
959 warn: &impl Fn(Error),
960 ) -> Result<Self, Error> {
961 let mr_type = match input {
962 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
963 let value = decoder.decode_string_cow(&value.0, warn);
964 let value = match min_width {
965 VarWidth::Numeric => {
966 let number: f64 = value.trim().parse().map_err(|_| {
967 Error::InvalidMDGroupCountedValue {
968 mr_set: mr_set.clone(),
969 number: value.into(),
972 Value::Number(Some(number.into()))
974 VarWidth::String(max_width) => {
975 let value = value.trim_end_matches(' ');
976 let width = value.len();
977 if width > max_width as usize {
978 return Err(Error::TooWideMDGroupCountedValue {
979 mr_set: mr_set.clone(),
985 Value::String(value.into())
988 MultipleResponseType::MultipleDichotomy {
993 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
999 #[derive(Clone, Debug)]
1000 pub struct MultipleResponseSet {
1001 pub name: Identifier,
1002 pub min_width: VarWidth,
1003 pub max_width: VarWidth,
1005 pub mr_type: MultipleResponseType,
1006 pub dict_indexes: Vec<DictIndex>,
1009 impl MultipleResponseSet {
1012 input: &raw::MultipleResponseSet,
1013 warn: &impl Fn(Error),
1014 ) -> Result<Self, Error> {
1015 let mr_set_name = decoder
1016 .decode_identifier(&input.name.0, warn)
1017 .map_err(|error| Error::InvalidMrSetName(error))?;
1019 let label = decoder.decode_string(&input.label.0, warn);
1021 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
1022 for short_name in input.short_names.iter() {
1023 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
1026 warn(Error::InvalidMrSetName(error));
1030 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
1031 warn(Error::UnknownMrSetVariable {
1032 mr_set: mr_set_name.clone(),
1033 short_name: short_name.clone(),
1037 dict_indexes.push(dict_index);
1040 match dict_indexes.len() {
1041 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1042 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1046 let Some((Some(min_width), Some(max_width))) = dict_indexes
1048 .map(|dict_index| decoder.variables[dict_index].width)
1049 .map(|w| (Some(w), Some(w)))
1050 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1052 return Err(Error::MixedMrSet(mr_set_name));
1056 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1058 Ok(MultipleResponseSet {
1069 #[derive(Clone, Debug)]
1070 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1072 impl TryDecode for MultipleResponseRecord {
1073 type Input = raw::MultipleResponseRecord;
1077 input: &Self::Input,
1078 warn: impl Fn(Error),
1079 ) -> Result<Self, Error> {
1080 let mut sets = Vec::with_capacity(input.0.len());
1081 for set in &input.0 {
1082 match MultipleResponseSet::decode(decoder, set, &warn) {
1083 Ok(set) => sets.push(set),
1084 Err(error) => warn(error),
1087 Ok(MultipleResponseRecord(sets))
1091 #[derive(Clone, Debug)]
1092 pub struct LongStringValueLabels {
1093 pub var_name: Identifier,
1094 pub width: VarWidth,
1095 pub labels: Vec<ValueLabel>,
1098 impl LongStringValueLabels {
1101 input: &raw::LongStringValueLabels,
1102 warn: &impl Fn(Error),
1103 ) -> Result<Self, Error> {
1104 let var_name = decoder
1105 .decode_identifier(&input.var_name.0, warn)
1106 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1109 let max_width = VarWidth::MAX_STRING;
1110 if input.width < 9 || input.width > max_width as u32 {
1111 return Err(Error::InvalidLongValueLabelWidth {
1112 name: var_name.into(),
1118 let width = input.width as u16;
1120 let mut labels = Vec::with_capacity(input.labels.len());
1121 for (value, label) in input.labels.iter() {
1122 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1123 let label = decoder.decode_string(&label.0, warn);
1124 labels.push(ValueLabel { value, label });
1127 Ok(LongStringValueLabels {
1129 width: VarWidth::String(width),
1135 #[derive(Clone, Debug)]
1136 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1138 impl TryDecode for LongStringValueLabelRecord {
1139 type Input = raw::LongStringValueLabelRecord;
1143 input: &Self::Input,
1144 warn: impl Fn(Error),
1145 ) -> Result<Self, Error> {
1146 let mut labels = Vec::with_capacity(input.0.len());
1147 for label in &input.0 {
1148 match LongStringValueLabels::decode(decoder, label, &warn) {
1149 Ok(set) => labels.push(set),
1150 Err(error) => warn(error),
1153 Ok(LongStringValueLabelRecord(labels))
1159 use encoding_rs::WINDOWS_1252;
1163 let mut s = String::new();
1164 s.push(char::REPLACEMENT_CHARACTER);
1165 let encoded = WINDOWS_1252.encode(&s).0;
1166 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1167 println!("{:?}", decoded);
1172 let charset: Vec<u8> = (0..=255).collect();
1173 println!("{}", charset.len());
1174 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1175 println!("{}", decoded.len());
1176 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1177 println!("{}", encoded.len());
1178 assert_eq!(&charset[..], &encoded[..]);