1 use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
5 format::{Error as FormatError, Spec, UncheckedSpec},
6 identifier::{Error as IdError, Identifier},
7 raw::{self, MissingValues, UnencodedStr, VarType},
9 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
10 use encoding_rs::{DecoderResult, Encoding};
11 use num::integer::div_ceil;
12 use ordered_float::OrderedFloat;
13 use thiserror::Error as ThisError;
15 pub use crate::raw::{CategoryLabels, Compression};
17 #[derive(ThisError, Debug)]
19 #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
20 InvalidVariableWidth { offset: u64, width: i32 },
22 #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
23 InvalidLongMissingValueFormat,
25 #[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
26 InvalidCreationDate { creation_date: String },
28 #[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
29 InvalidCreationTime { creation_time: String },
31 #[error("{id_error} Renaming variable to {new_name}.")]
38 "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
43 format_error: FormatError,
47 "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
52 format_error: FormatError,
55 #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
56 DuplicateVariableName {
57 duplicate_name: Identifier,
61 #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
62 InvalidDictIndex { dict_index: usize, max_index: usize },
64 #[error("Dictionary index {0} refers to a long string continuation.")]
65 DictIndexIsContinuation(usize),
67 #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
68 ValueLabelsDifferentTypes {
69 numeric_var: Identifier,
70 string_var: Identifier,
74 "Value labels may not be added to long string variable {0} using record types 3 or 4."
76 InvalidLongStringValueLabel(Identifier),
78 #[error("Invalid multiple response set name. {0}")]
79 InvalidMrSetName(IdError),
81 #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
82 UnknownMrSetVariable {
84 short_name: Identifier,
87 #[error("Multiple response set {0} has no variables.")]
88 EmptyMrSet(Identifier),
90 #[error("Multiple response set {0} has only one variable.")]
91 OneVarMrSet(Identifier),
93 #[error("Multiple response set {0} contains both string and numeric variables.")]
94 MixedMrSet(Identifier),
97 "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
99 InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
101 #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
102 TooWideMDGroupCountedValue {
109 #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
110 InvalidLongValueLabelWidth {
117 #[error("Invalid attribute name. {0}")]
118 InvalidAttributeName(IdError),
120 #[error("Invalid short name in long variable name record. {0}")]
121 InvalidShortName(IdError),
123 #[error("Invalid name in long variable name record. {0}")]
124 InvalidLongName(IdError),
126 #[error("Invalid variable name in very long string record. {0}")]
127 InvalidLongStringName(IdError),
129 #[error("Invalid variable name in long string value label record. {0}")]
130 InvalidLongStringValueLabelName(IdError),
132 #[error("Details TBD")]
136 #[derive(Clone, Debug)]
138 Header(HeaderRecord),
139 Variable(VariableRecord),
140 ValueLabel(ValueLabelRecord),
141 Document(DocumentRecord),
142 IntegerInfo(IntegerInfoRecord),
143 FloatInfo(FloatInfoRecord),
144 VariableSets(VariableSetRecord),
145 VarDisplay(VarDisplayRecord),
146 MultipleResponse(MultipleResponseRecord),
147 LongStringValueLabels(LongStringValueLabelRecord),
148 Encoding(EncodingRecord),
149 NumberOfCases(NumberOfCasesRecord),
150 ProductInfo(ProductInfoRecord),
151 LongNames(LongNameRecord),
152 VeryLongStrings(VeryLongStringRecord),
153 FileAttributes(FileAttributeRecord),
154 //VariableAttributes(UnencodedString),
155 //OtherExtension(Extension),
158 //ZTrailer(ZTrailer),
162 pub use crate::raw::EncodingRecord;
163 pub use crate::raw::FloatInfoRecord;
164 pub use crate::raw::IntegerInfoRecord;
165 pub use crate::raw::NumberOfCasesRecord;
167 type DictIndex = usize;
169 pub struct Variable {
170 pub dict_index: DictIndex,
171 pub short_name: Identifier,
172 pub long_name: Option<Identifier>,
177 pub compression: Option<Compression>,
179 pub encoding: &'static Encoding,
180 pub variables: HashMap<DictIndex, Variable>,
181 pub var_names: HashMap<Identifier, DictIndex>,
182 n_dict_indexes: usize,
183 n_generated_names: usize,
187 fn generate_name(&mut self) -> Identifier {
189 self.n_generated_names += 1;
190 let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
192 if !self.var_names.contains_key(&name) {
195 assert!(self.n_generated_names < usize::MAX);
198 fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
199 let (output, malformed) = self.encoding.decode_without_bom_handling(input);
205 fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
206 self.decode_string_cow(input, warn).into()
208 pub fn decode_identifier(
211 warn: &impl Fn(Error),
212 ) -> Result<Identifier, IdError> {
213 let s = self.decode_string_cow(input, warn);
214 Identifier::new(&s, self.encoding)
216 fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
217 let max_index = self.n_dict_indexes - 1;
218 if dict_index == 0 || dict_index as usize > max_index {
219 return Err(Error::InvalidDictIndex {
224 let Some(variable) = self.variables.get(&dict_index) else {
225 return Err(Error::DictIndexIsContinuation(dict_index));
230 /// Returns `input` decoded from `self.encoding` into UTF-8 such that
231 /// re-encoding the result back into `self.encoding` will have exactly the
232 /// same length in bytes.
234 /// XXX warn about errors?
235 fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
236 if let (s, false) = self.encoding.decode_without_bom_handling(input) {
237 // This is the common case. Usually there will be no errors.
240 // Unusual case. Don't bother to optimize it much.
241 let mut decoder = self.encoding.new_decoder_without_bom_handling();
242 let mut output = String::with_capacity(
244 .max_utf8_buffer_length_without_replacement(input.len())
247 let mut rest = input;
248 while !rest.is_empty() {
249 match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
250 (DecoderResult::InputEmpty, _) => break,
251 (DecoderResult::OutputFull, _) => unreachable!(),
252 (DecoderResult::Malformed(a, b), consumed) => {
253 let skipped = a as usize + b as usize;
254 output.extend(repeat('?').take(skipped));
255 rest = &rest[consumed..];
259 assert_eq!(self.encoding.encode(&output).0.len(), input.len());
265 pub trait TryDecode: Sized {
270 warn: impl Fn(Error),
271 ) -> Result<Self, Error>;
274 pub trait Decode<Input>: Sized {
275 fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
278 impl<const N: usize> Decode<UnencodedStr<N>> for String {
279 fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
280 decoder.decode_string(&input.0, &warn)
284 #[derive(Clone, Debug)]
285 pub struct HeaderRecord {
286 pub eye_catcher: String,
287 pub weight_index: Option<usize>,
288 pub n_cases: Option<u64>,
289 pub creation: NaiveDateTime,
290 pub file_label: String,
293 impl TryDecode for HeaderRecord {
294 type Input = crate::raw::HeaderRecord;
299 warn: impl Fn(Error),
300 ) -> Result<Self, Error> {
301 let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
302 let file_label = decoder.decode_string(&input.file_label.0, &warn);
303 let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
304 let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
305 warn(Error::InvalidCreationDate {
306 creation_date: creation_date.into(),
310 let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
312 NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
313 warn(Error::InvalidCreationTime {
314 creation_time: creation_time.into(),
320 weight_index: input.weight_index.map(|n| n as usize),
321 n_cases: input.n_cases.map(|n| n as u64),
322 creation: NaiveDateTime::new(creation_date, creation_time),
328 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
334 impl PartialOrd for VarWidth {
335 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
336 match (self, other) {
337 (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
338 (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
345 const MAX_STRING: u16 = 32767;
347 fn n_dict_indexes(self) -> usize {
349 VarWidth::Numeric => 1,
350 VarWidth::String(w) => div_ceil(w as usize, 8),
357 f: impl Fn(u16, u16) -> u16,
358 ) -> Option<VarWidth> {
360 (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
361 (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
362 Some(VarWidth::String(f(a, b)))
368 /// Returns the wider of `self` and `other`:
369 /// - Numerical variable widths are equally wide.
370 /// - Longer strings are wider than shorter strings.
371 /// - Numerical and string types are incomparable, so result in `None`.
372 /// - Any `None` in the input yields `None` in the output.
373 pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
374 Self::width_predicate(a, b, |a, b| a.max(b))
377 /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
378 pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
379 Self::width_predicate(a, b, |a, b| a.min(b))
383 impl From<VarWidth> for VarType {
384 fn from(source: VarWidth) -> Self {
386 VarWidth::Numeric => VarType::Numeric,
387 VarWidth::String(_) => VarType::String,
392 #[derive(Clone, Debug)]
393 pub struct VariableRecord {
395 pub name: Identifier,
396 pub print_format: Spec,
397 pub write_format: Spec,
398 pub missing_values: MissingValues,
399 pub label: Option<String>,
402 fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
403 UncheckedSpec::try_from(raw)
404 .and_then(Spec::try_from)
405 .and_then(|x| x.check_width_compatibility(width))
406 .unwrap_or_else(|error| {
407 let new_format = Spec::default_for_width(width);
408 warn(new_format, error);
413 impl VariableRecord {
415 decoder: &mut Decoder,
416 input: &crate::raw::VariableRecord,
417 warn: impl Fn(Error),
418 ) -> Result<Option<VariableRecord>, Error> {
419 let width = match input.width {
420 0 => VarWidth::Numeric,
421 w @ 1..=255 => VarWidth::String(w as u16),
422 -1 => return Ok(None),
424 return Err(Error::InvalidVariableWidth {
425 offset: input.offset,
430 let name = match decoder.decode_identifier(&input.name.0, &warn) {
432 if !decoder.var_names.contains_key(&name) {
435 let new_name = decoder.generate_name();
436 warn(Error::DuplicateVariableName {
437 duplicate_name: name.clone(),
438 new_name: new_name.clone(),
444 let new_name = decoder.generate_name();
445 warn(Error::InvalidVariableName {
447 new_name: new_name.clone(),
452 let variable = Variable {
453 dict_index: decoder.n_dict_indexes,
454 short_name: name.clone(),
458 decoder.n_dict_indexes += width.n_dict_indexes();
461 .insert(name.clone(), variable.dict_index)
465 .insert(variable.dict_index, variable)
468 let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
469 warn(Error::InvalidPrintFormat {
471 variable: name.clone(),
475 let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
476 warn(Error::InvalidWriteFormat {
478 variable: name.clone(),
485 .map(|label| decoder.decode_string(&label.0, &warn));
486 Ok(Some(VariableRecord {
491 missing_values: input.missing_values.clone(),
497 #[derive(Clone, Debug)]
498 pub struct DocumentRecord(Vec<String>);
500 impl TryDecode for DocumentRecord {
501 type Input = crate::raw::DocumentRecord;
506 warn: impl Fn(Error),
507 ) -> Result<Self, Error> {
512 .map(|s| decoder.decode_string(&s.0, &warn))
522 const NAME: &'static str;
523 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
526 #[derive(Clone, Debug)]
527 pub struct VariableSet {
529 pub vars: Vec<String>,
533 fn parse(input: &str) -> Result<Self, Error> {
534 let (name, input) = input.split_once('=').ok_or(Error::TBD)?;
535 let vars = input.split_ascii_whitespace().map(String::from).collect();
543 trait WarnOnError<T> {
544 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
546 impl<T> WarnOnError<T> for Result<T, Error> {
547 fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
549 Ok(result) => Some(result),
558 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
560 Number(Option<OrderedFloat<f64>>),
565 pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
567 raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
568 raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
573 #[derive(Clone, Debug)]
574 pub struct ValueLabel {
579 #[derive(Clone, Debug)]
580 pub struct ValueLabelRecord {
581 pub var_type: VarType,
582 pub labels: Vec<ValueLabel>,
583 pub variables: Vec<Identifier>,
586 impl ValueLabelRecord {
588 decoder: &mut Decoder,
589 raw_value_label: &crate::raw::ValueLabelRecord,
590 dict_indexes: &crate::raw::VarIndexRecord,
591 warn: impl Fn(Error),
592 ) -> Result<Option<ValueLabelRecord>, Error> {
593 let variables: Vec<&Variable> = dict_indexes
596 .filter_map(|&dict_index| {
598 .get_var_by_index(dict_index as usize)
599 .warn_on_error(&warn)
601 .filter(|&variable| match variable.width {
602 VarWidth::String(width) if width > 8 => {
603 warn(Error::InvalidLongStringValueLabel(
604 variable.short_name.clone(),
611 let mut i = variables.iter();
612 let Some(&first_var) = i.next() else {
615 let var_type: VarType = first_var.width.into();
617 let this_type: VarType = variable.width.into();
618 if var_type != this_type {
619 let (numeric_var, string_var) = match var_type {
620 VarType::Numeric => (first_var, variable),
621 VarType::String => (variable, first_var),
623 warn(Error::ValueLabelsDifferentTypes {
624 numeric_var: numeric_var.short_name.clone(),
625 string_var: string_var.short_name.clone(),
630 let labels = raw_value_label
633 .map(|(value, label)| {
634 let label = decoder.decode_string(&label.0, &warn);
635 let value = Value::decode(
636 raw::Value::from_raw(*value, var_type, decoder.endian),
639 ValueLabel { value, label }
642 let variables = variables
644 .map(|&variable| variable.short_name.clone())
646 Ok(Some(ValueLabelRecord {
654 #[derive(Clone, Debug)]
655 pub struct VariableSetRecord(Vec<VariableSet>);
657 impl TextRecord for VariableSetRecord {
658 const NAME: &'static str = "variable set";
659 fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
660 let mut sets = Vec::new();
661 for line in input.lines() {
662 if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
666 Ok(VariableSetRecord(sets))
670 #[derive(Clone, Debug)]
671 pub struct ProductInfoRecord(pub String);
673 impl TextRecord for ProductInfoRecord {
674 const NAME: &'static str = "extra product info";
675 fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
676 Ok(ProductInfoRecord(input.into()))
680 #[derive(Clone, Debug)]
681 pub struct LongName {
682 pub short_name: Identifier,
683 pub long_name: Identifier,
687 fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
688 let short_name = Identifier::new(short_name, decoder.encoding)
689 .map_err(|e| Error::InvalidShortName(e))?;
691 Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
699 #[derive(Clone, Debug)]
700 pub struct LongNameRecord(Vec<LongName>);
702 impl LongNameRecord {
703 pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
704 let mut names = Vec::new();
705 for pair in input.split('\t').filter(|s| !s.is_empty()) {
706 if let Some((short_name, long_name)) = pair.split_once('=') {
707 if let Some(long_name) =
708 LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
710 names.push(long_name);
716 Ok(LongNameRecord(names))
720 #[derive(Clone, Debug)]
721 pub struct VeryLongString {
722 pub short_name: Identifier,
726 impl VeryLongString {
727 fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
728 let Some((short_name, length)) = input.split_once('=') else {
729 return Err(Error::TBD);
731 let short_name = Identifier::new(short_name, decoder.encoding)
732 .map_err(|e| Error::InvalidLongStringName(e))?;
733 let length: u16 = length.parse().map_err(|_| Error::TBD)?;
734 if length > VarWidth::MAX_STRING {
735 return Err(Error::TBD);
738 short_name: short_name.into(),
744 #[derive(Clone, Debug)]
745 pub struct VeryLongStringRecord(Vec<VeryLongString>);
747 impl VeryLongStringRecord {
748 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
749 let mut very_long_strings = Vec::new();
752 .map(|s| s.trim_end_matches('\t'))
753 .filter(|s| !s.is_empty())
755 if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
756 very_long_strings.push(vls)
759 Ok(VeryLongStringRecord(very_long_strings))
763 #[derive(Clone, Debug)]
764 pub struct Attribute {
765 pub name: Identifier,
766 pub values: Vec<String>,
773 warn: &impl Fn(Error),
774 ) -> Result<(Option<Attribute>, &'a str), Error> {
775 let Some((name, mut input)) = input.split_once('(') else {
776 return Err(Error::TBD);
778 let mut values = Vec::new();
780 let Some((value, rest)) = input.split_once('\n') else {
781 return Err(Error::TBD);
783 if let Some(stripped) = value
785 .and_then(|value| value.strip_suffix('\''))
787 values.push(stripped.into());
790 values.push(value.into());
792 if let Some(rest) = rest.strip_prefix(')') {
793 let attribute = Identifier::new(name, decoder.encoding)
794 .map_err(|e| Error::InvalidAttributeName(e))
796 .map(|name| Attribute { name, values });
797 return Ok((attribute, rest));
804 #[derive(Clone, Debug)]
805 pub struct AttributeSet(pub Vec<Attribute>);
811 sentinel: Option<char>,
812 warn: &impl Fn(Error),
813 ) -> Result<(AttributeSet, &'a str), Error> {
814 let mut attributes = Vec::new();
816 match input.chars().next() {
818 c if c == sentinel => break &input[1..],
820 let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
821 if let Some(attribute) = attribute {
822 attributes.push(attribute);
828 Ok((AttributeSet(attributes), rest))
832 #[derive(Clone, Debug)]
833 pub struct FileAttributeRecord(AttributeSet);
835 impl FileAttributeRecord {
836 pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
837 let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
838 if !rest.is_empty() {
841 Ok(FileAttributeRecord(set))
845 pub struct VarAttributeSet {
846 pub long_var_name: String,
847 pub attributes: AttributeSet,
850 impl VarAttributeSet {
854 warn: &impl Fn(Error),
855 ) -> Result<(VarAttributeSet, &'a str), Error> {
856 let Some((long_var_name, rest)) = input.split_once(':') else {
857 return Err(Error::TBD);
859 let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
862 long_var_name: long_var_name.into(),
870 pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
872 impl VariableAttributeRecord {
873 pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
874 let mut var_attribute_sets = Vec::new();
875 while !input.is_empty() {
876 let Some((var_attribute, rest)) =
877 VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
881 var_attribute_sets.push(var_attribute);
884 Ok(VariableAttributeRecord(var_attribute_sets))
888 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
895 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
902 #[derive(Clone, Debug)]
903 pub struct VarDisplay {
904 pub measure: Option<Measure>,
906 pub align: Option<Alignment>,
909 #[derive(Clone, Debug)]
910 pub struct VarDisplayRecord(pub Vec<VarDisplay>);
912 #[derive(Clone, Debug)]
913 pub enum MultipleResponseType {
916 labels: CategoryLabels,
921 impl MultipleResponseType {
925 input: &raw::MultipleResponseType,
927 warn: &impl Fn(Error),
928 ) -> Result<Self, Error> {
929 let mr_type = match input {
930 raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
931 let value = decoder.decode_string_cow(&value.0, warn);
932 let value = match min_width {
933 VarWidth::Numeric => {
934 let number: f64 = value.trim().parse().map_err(|_| {
935 Error::InvalidMDGroupCountedValue {
936 mr_set: mr_set.clone(),
937 number: value.into(),
940 Value::Number(Some(number.into()))
942 VarWidth::String(max_width) => {
943 let value = value.trim_end_matches(' ');
944 let width = value.len();
945 if width > max_width as usize {
946 return Err(Error::TooWideMDGroupCountedValue {
947 mr_set: mr_set.clone(),
953 Value::String(value.into())
956 MultipleResponseType::MultipleDichotomy {
961 raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
967 #[derive(Clone, Debug)]
968 pub struct MultipleResponseSet {
969 pub name: Identifier,
970 pub min_width: VarWidth,
971 pub max_width: VarWidth,
973 pub mr_type: MultipleResponseType,
974 pub dict_indexes: Vec<DictIndex>,
977 impl MultipleResponseSet {
980 input: &raw::MultipleResponseSet,
981 warn: &impl Fn(Error),
982 ) -> Result<Self, Error> {
983 let mr_set_name = decoder
984 .decode_identifier(&input.name.0, warn)
985 .map_err(|error| Error::InvalidMrSetName(error))?;
987 let label = decoder.decode_string(&input.label.0, warn);
989 let mut dict_indexes = Vec::with_capacity(input.short_names.len());
990 for short_name in input.short_names.iter() {
991 let short_name = match decoder.decode_identifier(&short_name.0, warn) {
994 warn(Error::InvalidMrSetName(error));
998 let Some(&dict_index) = decoder.var_names.get(&short_name) else {
999 warn(Error::UnknownMrSetVariable {
1000 mr_set: mr_set_name.clone(),
1001 short_name: short_name.clone(),
1005 dict_indexes.push(dict_index);
1008 match dict_indexes.len() {
1009 0 => return Err(Error::EmptyMrSet(mr_set_name)),
1010 1 => return Err(Error::OneVarMrSet(mr_set_name)),
1014 let Some((Some(min_width), Some(max_width))) = dict_indexes
1016 .map(|dict_index| decoder.variables[dict_index].width)
1017 .map(|w| (Some(w), Some(w)))
1018 .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
1020 return Err(Error::MixedMrSet(mr_set_name));
1024 MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
1026 Ok(MultipleResponseSet {
1037 #[derive(Clone, Debug)]
1038 pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
1040 impl TryDecode for MultipleResponseRecord {
1041 type Input = raw::MultipleResponseRecord;
1045 input: &Self::Input,
1046 warn: impl Fn(Error),
1047 ) -> Result<Self, Error> {
1048 let mut sets = Vec::with_capacity(input.0.len());
1049 for set in &input.0 {
1050 match MultipleResponseSet::decode(decoder, set, &warn) {
1051 Ok(set) => sets.push(set),
1052 Err(error) => warn(error),
1055 Ok(MultipleResponseRecord(sets))
1059 #[derive(Clone, Debug)]
1060 pub struct LongStringValueLabels {
1061 pub var_name: Identifier,
1062 pub width: VarWidth,
1063 pub labels: Vec<ValueLabel>,
1066 impl LongStringValueLabels {
1069 input: &raw::LongStringValueLabels,
1070 warn: &impl Fn(Error),
1071 ) -> Result<Self, Error> {
1072 let var_name = decoder
1073 .decode_identifier(&input.var_name.0, warn)
1074 .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
1077 let max_width = VarWidth::MAX_STRING;
1078 if input.width < 9 || input.width > max_width as u32 {
1079 return Err(Error::InvalidLongValueLabelWidth {
1080 name: var_name.into(),
1086 let width = input.width as u16;
1088 let mut labels = Vec::with_capacity(input.labels.len());
1089 for (value, label) in input.labels.iter() {
1090 let value = Value::String(decoder.decode_exact_length(&value.0).into());
1091 let label = decoder.decode_string(&label.0, warn);
1092 labels.push(ValueLabel { value, label });
1095 Ok(LongStringValueLabels {
1097 width: VarWidth::String(width),
1103 #[derive(Clone, Debug)]
1104 pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
1106 impl TryDecode for LongStringValueLabelRecord {
1107 type Input = raw::LongStringValueLabelRecord;
1111 input: &Self::Input,
1112 warn: impl Fn(Error),
1113 ) -> Result<Self, Error> {
1114 let mut labels = Vec::with_capacity(input.0.len());
1115 for label in &input.0 {
1116 match LongStringValueLabels::decode(decoder, label, &warn) {
1117 Ok(set) => labels.push(set),
1118 Err(error) => warn(error),
1121 Ok(LongStringValueLabelRecord(labels))
1127 use encoding_rs::WINDOWS_1252;
1131 let mut s = String::new();
1132 s.push(char::REPLACEMENT_CHARACTER);
1133 let encoded = WINDOWS_1252.encode(&s).0;
1134 let decoded = WINDOWS_1252.decode(&encoded[..]).0;
1135 println!("{:?}", decoded);
1140 let charset: Vec<u8> = (0..=255).collect();
1141 println!("{}", charset.len());
1142 let decoded = WINDOWS_1252.decode(&charset[..]).0;
1143 println!("{}", decoded.len());
1144 let encoded = WINDOWS_1252.encode(&decoded[..]).0;
1145 println!("{}", encoded.len());
1146 assert_eq!(&charset[..], &encoded[..]);