-use std::{borrow::Cow, collections::{HashSet, HashMap}};
+use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
-use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
-use encoding_rs::Encoding;
-use num::integer::div_ceil;
use crate::{
- format::{Spec, UncheckedSpec, Width},
+ encoding::{default_encoding, get_encoding, Error as EncodingError},
+ endian::Endian,
+ format::{Error as FormatError, Spec, UncheckedSpec},
identifier::{Error as IdError, Identifier},
- raw::{self, MissingValues},
- {endian::Endian, Compression},
+ raw::{self, MissingValues, UnencodedStr, VarType},
};
+use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
+use encoding_rs::{DecoderResult, Encoding};
+use num::integer::div_ceil;
+use ordered_float::OrderedFloat;
use thiserror::Error as ThisError;
+pub use crate::raw::{CategoryLabels, Compression};
+
#[derive(ThisError, Debug)]
pub enum Error {
+ // XXX this is really an internal error and maybe we should change the
+ // interfaces to make it impossible
+ #[error("Missing header record")]
+ MissingHeaderRecord,
+
+ #[error("{0}")]
+ EncodingError(EncodingError),
+
+ #[error("Using default encoding {0}.")]
+ UsingDefaultEncoding(String),
+
#[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
- BadVariableWidth { offset: u64, width: i32 },
+ InvalidVariableWidth { offset: u64, width: i32 },
#[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
- BadLongMissingValueFormat,
+ InvalidLongMissingValueFormat,
#[error("File creation date {creation_date} is not in the expected format \"DD MMM YY\" format. Using 01 Jan 1970.")]
InvalidCreationDate { creation_date: String },
#[error("File creation time {creation_time} is not in the expected format \"HH:MM:SS\" format. Using midnight.")]
InvalidCreationTime { creation_time: String },
- #[error("Invalid variable name: {0}")]
- BadIdentifier(#[from] IdError),
+ #[error("{id_error} Renaming variable to {new_name}.")]
+ InvalidVariableName {
+ id_error: IdError,
+ new_name: Identifier,
+ },
+
+ #[error(
+ "Substituting {new_spec} for invalid print format on variable {variable}. {format_error}"
+ )]
+ InvalidPrintFormat {
+ new_spec: Spec,
+ variable: Identifier,
+ format_error: FormatError,
+ },
+
+ #[error(
+ "Substituting {new_spec} for invalid write format on variable {variable}. {format_error}"
+ )]
+ InvalidWriteFormat {
+ new_spec: Spec,
+ variable: Identifier,
+ format_error: FormatError,
+ },
+
+ #[error("Renaming variable with duplicate name {duplicate_name} to {new_name}.")]
+ DuplicateVariableName {
+ duplicate_name: Identifier,
+ new_name: Identifier,
+ },
+
+ #[error("Dictionary index {dict_index} is outside valid range [1,{max_index}].")]
+ InvalidDictIndex { dict_index: usize, max_index: usize },
+
+ #[error("Dictionary index {0} refers to a long string continuation.")]
+ DictIndexIsContinuation(usize),
+
+ #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")]
+ ValueLabelsDifferentTypes {
+ numeric_var: Identifier,
+ string_var: Identifier,
+ },
+
+ #[error(
+ "Value labels may not be added to long string variable {0} using record types 3 or 4."
+ )]
+ InvalidLongStringValueLabel(Identifier),
+
+ #[error("Invalid multiple response set name. {0}")]
+ InvalidMrSetName(IdError),
+
+ #[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
+ UnknownMrSetVariable {
+ mr_set: Identifier,
+ short_name: Identifier,
+ },
+
+ #[error("Multiple response set {0} has no variables.")]
+ EmptyMrSet(Identifier),
+
+ #[error("Multiple response set {0} has only one variable.")]
+ OneVarMrSet(Identifier),
+
+ #[error("Multiple response set {0} contains both string and numeric variables.")]
+ MixedMrSet(Identifier),
+
+ #[error(
+ "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
+ )]
+ InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
+
+ #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
+ TooWideMDGroupCountedValue {
+ mr_set: Identifier,
+ value: String,
+ width: usize,
+ max_width: u16,
+ },
+
+ #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
+ InvalidLongValueLabelWidth {
+ name: Identifier,
+ width: u32,
+ min_width: u16,
+ max_width: u16,
+ },
+
+ #[error("Invalid attribute name. {0}")]
+ InvalidAttributeName(IdError),
+
+ #[error("Invalid short name in long variable name record. {0}")]
+ InvalidShortName(IdError),
+
+ #[error("Invalid name in long variable name record. {0}")]
+ InvalidLongName(IdError),
+
+ #[error("Invalid variable name in very long string record. {0}")]
+ InvalidLongStringName(IdError),
+
+ #[error("Invalid variable name in long string value label record. {0}")]
+ InvalidLongStringValueLabelName(IdError),
+
+ #[error("Invalid variable name in attribute record. {0}")]
+ InvalidAttributeVariableName(IdError),
+
+ // XXX This is risky because `text` might be arbitarily long.
+ #[error("Text string contains invalid bytes for {encoding} encoding: {text}")]
+ MalformedString { encoding: String, text: String },
+
+ #[error("Invalid variable measurement level value {0}")]
+ InvalidMeasurement(u32),
+
+ #[error("Invalid variable display alignment value {0}")]
+ InvalidAlignment(u32),
#[error("Details TBD")]
TBD,
}
+#[derive(Clone, Debug)]
+pub enum Record {
+ Header(HeaderRecord),
+ Variable(VariableRecord),
+ ValueLabel(ValueLabelRecord),
+ Document(DocumentRecord),
+ IntegerInfo(IntegerInfoRecord),
+ FloatInfo(FloatInfoRecord),
+ VariableSets(VariableSetRecord),
+ VarDisplay(VarDisplayRecord),
+ MultipleResponse(MultipleResponseRecord),
+ LongStringValueLabels(LongStringValueLabelRecord),
+ Encoding(EncodingRecord),
+ NumberOfCases(NumberOfCasesRecord),
+ ProductInfo(ProductInfoRecord),
+ LongNames(LongNameRecord),
+ VeryLongStrings(VeryLongStringRecord),
+ FileAttributes(FileAttributeRecord),
+ VariableAttributes(VariableAttributeRecord),
+ OtherExtension(Extension),
+ //EndOfHeaders(u32),
+ //ZHeader(ZHeader),
+ //ZTrailer(ZTrailer),
+ //Case(Vec<Value>),
+}
+
+pub use crate::raw::EncodingRecord;
+pub use crate::raw::Extension;
+pub use crate::raw::FloatInfoRecord;
+pub use crate::raw::IntegerInfoRecord;
+pub use crate::raw::NumberOfCasesRecord;
+
+type DictIndex = usize;
+
+pub struct Variable {
+ pub dict_index: DictIndex,
+ pub short_name: Identifier,
+ pub long_name: Option<Identifier>,
+ pub width: VarWidth,
+}
+
pub struct Decoder {
pub compression: Option<Compression>,
pub endian: Endian,
pub encoding: &'static Encoding,
- pub var_names: HashSet<Identifier>,
- pub dict_indexes: HashMap<usize, Identifier>,
+ pub variables: HashMap<DictIndex, Variable>,
+ pub var_names: HashMap<Identifier, DictIndex>,
n_dict_indexes: usize,
n_generated_names: usize,
}
-impl Decoder {
- fn take_name(&mut self, id: &Identifier) -> bool {
- self.var_names.insert(id.clone())
+pub fn decode(
+ headers: Vec<raw::Record>,
+ encoding: Option<&'static Encoding>,
+ warn: &impl Fn(Error),
+) -> Result<Vec<Record>, Error> {
+ let Some(header_record) = headers.iter().find_map(|rec| {
+ if let raw::Record::Header(header) = rec {
+ Some(header)
+ } else {
+ None
+ }
+ }) else {
+ return Err(Error::MissingHeaderRecord);
+ };
+ let encoding = match encoding {
+ Some(encoding) => encoding,
+ None => {
+ let encoding = headers.iter().find_map(|rec| {
+ if let raw::Record::Encoding(ref e) = rec {
+ Some(e.0.as_str())
+ } else {
+ None
+ }
+ });
+ let character_code = headers.iter().find_map(|rec| {
+ if let raw::Record::IntegerInfo(ref r) = rec {
+ Some(r.character_code)
+ } else {
+ None
+ }
+ });
+ match get_encoding(encoding, character_code) {
+ Ok(encoding) => encoding,
+ Err(err @ EncodingError::Ebcdic) => return Err(Error::EncodingError(err)),
+ Err(err) => {
+ warn(Error::EncodingError(err));
+ // Warn that we're using the default encoding.
+ default_encoding()
+ }
+ }
+ }
+ };
+
+ let mut decoder = Decoder {
+ compression: header_record.compression,
+ endian: header_record.endian,
+ encoding,
+ variables: HashMap::new(),
+ var_names: HashMap::new(),
+ n_dict_indexes: 0,
+ n_generated_names: 0,
+ };
+
+ let mut output = Vec::with_capacity(headers.len());
+ for header in &headers {
+ match header {
+ raw::Record::Header(ref input) => {
+ if let Some(header) = HeaderRecord::try_decode(&mut decoder, input, warn)? {
+ output.push(Record::Header(header))
+ }
+ }
+ raw::Record::Variable(ref input) => {
+ if let Some(variable) = VariableRecord::try_decode(&mut decoder, input, warn)? {
+ output.push(Record::Variable(variable));
+ }
+ }
+ raw::Record::ValueLabel(ref input) => {
+ if let Some(value_label) = ValueLabelRecord::try_decode(&mut decoder, input, warn)?
+ {
+ output.push(Record::ValueLabel(value_label));
+ }
+ }
+ raw::Record::Document(ref input) => {
+ if let Some(document) = DocumentRecord::try_decode(&mut decoder, input, warn)? {
+ output.push(Record::Document(document))
+ }
+ }
+ raw::Record::IntegerInfo(ref input) => output.push(Record::IntegerInfo(input.clone())),
+ raw::Record::FloatInfo(ref input) => output.push(Record::FloatInfo(input.clone())),
+ raw::Record::VariableSets(ref input) => {
+ let s = decoder.decode_string_cow(&input.text.0, warn);
+ output.push(Record::VariableSets(VariableSetRecord::parse(&s, warn)?));
+ }
+ raw::Record::VarDisplay(ref input) => {
+ if let Some(vdr) = VarDisplayRecord::try_decode(&mut decoder, input, warn)? {
+ output.push(Record::VarDisplay(vdr))
+ }
+ }
+ raw::Record::MultipleResponse(ref input) => {
+ if let Some(mrr) = MultipleResponseRecord::try_decode(&mut decoder, input, warn)? {
+ output.push(Record::MultipleResponse(mrr))
+ }
+ }
+ raw::Record::LongStringValueLabels(ref input) => {
+ if let Some(mrr) =
+ LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)?
+ {
+ output.push(Record::LongStringValueLabels(mrr))
+ }
+ }
+ raw::Record::Encoding(ref input) => output.push(Record::Encoding(input.clone())),
+ raw::Record::NumberOfCases(ref input) => {
+ output.push(Record::NumberOfCases(input.clone()))
+ }
+ raw::Record::ProductInfo(ref input) => {
+ let s = decoder.decode_string_cow(&input.text.0, warn);
+ output.push(Record::ProductInfo(ProductInfoRecord::parse(&s, warn)?));
+ }
+ raw::Record::LongNames(ref input) => {
+ let s = decoder.decode_string_cow(&input.text.0, warn);
+ output.push(Record::LongNames(LongNameRecord::parse(
+ &mut decoder,
+ &s,
+ warn,
+ )?));
+ }
+ raw::Record::VeryLongStrings(ref input) => {
+ let s = decoder.decode_string_cow(&input.text.0, warn);
+ output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
+ &mut decoder,
+ &s,
+ warn,
+ )?));
+ }
+ raw::Record::FileAttributes(ref input) => {
+ let s = decoder.decode_string_cow(&input.text.0, warn);
+ output.push(Record::FileAttributes(FileAttributeRecord::parse(
+ &decoder, &s, warn,
+ )?));
+ }
+ raw::Record::VariableAttributes(ref input) => {
+ let s = decoder.decode_string_cow(&input.text.0, warn);
+ output.push(Record::VariableAttributes(VariableAttributeRecord::parse(
+ &decoder, &s, warn,
+ )?));
+ }
+ raw::Record::OtherExtension(ref input) => {
+ output.push(Record::OtherExtension(input.clone()))
+ }
+ raw::Record::EndOfHeaders(_) => (),
+ raw::Record::ZHeader(_) => (),
+ raw::Record::ZTrailer(_) => (),
+ raw::Record::Case(_) => (),
+ };
}
+ Ok(output)
+}
+
+impl Decoder {
fn generate_name(&mut self) -> Identifier {
loop {
self.n_generated_names += 1;
let name = Identifier::new(&format!("VAR{:03}", self.n_generated_names), self.encoding)
.unwrap();
- if self.take_name(&name) {
+ if !self.var_names.contains_key(&name) {
return name;
}
assert!(self.n_generated_names < usize::MAX);
}
}
- fn take_dict_indexes(&mut self, id: &Identifier, width: Width) -> usize {
- let n = match width {
- 0 => 1,
- w => div_ceil(w, 8) as usize,
- };
- let dict_index = self.n_dict_indexes;
- self.dict_indexes.insert(self.n_dict_indexes, id.clone());
- self.n_dict_indexes += n;
- dict_index
-
- }
- fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
+ fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
let (output, malformed) = self.encoding.decode_without_bom_handling(input);
if malformed {
- warn(Error::TBD);
+ warn(Error::MalformedString {
+ encoding: self.encoding.name().into(),
+ text: output.clone().into(),
+ });
}
output
}
+ fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
+ self.decode_string_cow(input, warn).into()
+ }
+ pub fn decode_identifier(
+ &self,
+ input: &[u8],
+ warn: &impl Fn(Error),
+ ) -> Result<Identifier, IdError> {
+ let s = self.decode_string_cow(input, warn);
+ Identifier::new(&s, self.encoding)
+ }
+ fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
+ let max_index = self.n_dict_indexes;
+ if dict_index == 0 || dict_index > max_index {
+ return Err(Error::InvalidDictIndex {
+ dict_index,
+ max_index,
+ });
+ }
+ let Some(variable) = self.variables.get(&(dict_index - 1)) else {
+ return Err(Error::DictIndexIsContinuation(dict_index));
+ };
+ Ok(variable)
+ }
+
+ /// Returns `input` decoded from `self.encoding` into UTF-8 such that
+ /// re-encoding the result back into `self.encoding` will have exactly the
+ /// same length in bytes.
+ ///
+ /// XXX warn about errors?
+ fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
+ if let (s, false) = self.encoding.decode_without_bom_handling(input) {
+ // This is the common case. Usually there will be no errors.
+ s.into()
+ } else {
+ // Unusual case. Don't bother to optimize it much.
+ let mut decoder = self.encoding.new_decoder_without_bom_handling();
+ let mut output = String::with_capacity(
+ decoder
+ .max_utf8_buffer_length_without_replacement(input.len())
+ .unwrap(),
+ );
+ let mut rest = input;
+ while !rest.is_empty() {
+ match decoder.decode_to_string_without_replacement(rest, &mut output, true) {
+ (DecoderResult::InputEmpty, _) => break,
+ (DecoderResult::OutputFull, _) => unreachable!(),
+ (DecoderResult::Malformed(a, b), consumed) => {
+ let skipped = a as usize + b as usize;
+ output.extend(repeat('?').take(skipped));
+ rest = &rest[consumed..];
+ }
+ }
+ }
+ assert_eq!(self.encoding.encode(&output).0.len(), input.len());
+ output.into()
+ }
+ }
}
-pub trait Decode: Sized {
+pub trait TryDecode: Sized {
type Input;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error>;
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Self>, Error>;
+}
+
+pub trait Decode<Input>: Sized {
+ fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
}
-#[derive(Clone)]
-pub struct Header {
+impl<const N: usize> Decode<UnencodedStr<N>> for String {
+ fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
+ decoder.decode_string(&input.0, &warn)
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct HeaderRecord {
pub eye_catcher: String,
pub weight_index: Option<usize>,
pub n_cases: Option<u64>,
pub file_label: String,
}
-impl Decode for Header {
- type Input = crate::raw::Header;
+fn trim_end_spaces(mut s: String) -> String {
+ s.truncate(s.trim_end_matches(' ').len());
+ s
+}
+
+impl TryDecode for HeaderRecord {
+ type Input = crate::raw::HeaderRecord;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
- let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
- let file_label = decoder.decode_string(&input.file_label.0, &warn);
- let creation_date = decoder.decode_string(&input.creation_date.0, &warn);
- let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
- warn(Error::InvalidCreationDate {
- creation_date: creation_date.into(),
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Self>, Error> {
+ let eye_catcher = trim_end_spaces(decoder.decode_string(&input.eye_catcher.0, &warn));
+ let file_label = trim_end_spaces(decoder.decode_string(&input.file_label.0, &warn));
+ let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
+ let creation_date =
+ NaiveDate::parse_from_str(&creation_date, "%e %b %Y").unwrap_or_else(|_| {
+ warn(Error::InvalidCreationDate {
+ creation_date: creation_date.into(),
+ });
+ Default::default()
});
- Default::default()
- });
- let creation_time = decoder.decode_string(&input.creation_time.0, &warn);
+ let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
let creation_time =
NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
warn(Error::InvalidCreationTime {
});
Default::default()
});
- Ok(Header {
- eye_catcher: eye_catcher.into(),
+ Ok(Some(HeaderRecord {
+ eye_catcher,
weight_index: input.weight_index.map(|n| n as usize),
n_cases: input.n_cases.map(|n| n as u64),
creation: NaiveDateTime::new(creation_date, creation_time),
- file_label: file_label.into(),
- })
+ file_label,
+ }))
}
}
-pub struct Variable {
- pub width: Width,
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum VarWidth {
+ Numeric,
+ String(u16),
+}
+
+impl PartialOrd for VarWidth {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ match (self, other) {
+ (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
+ (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
+ _ => None,
+ }
+ }
+}
+
+impl VarWidth {
+ const MAX_STRING: u16 = 32767;
+
+ fn n_dict_indexes(self) -> usize {
+ match self {
+ VarWidth::Numeric => 1,
+ VarWidth::String(w) => div_ceil(w as usize, 8),
+ }
+ }
+
+ fn width_predicate(
+ a: Option<VarWidth>,
+ b: Option<VarWidth>,
+ f: impl Fn(u16, u16) -> u16,
+ ) -> Option<VarWidth> {
+ match (a, b) {
+ (Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
+ (Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
+ Some(VarWidth::String(f(a, b)))
+ }
+ _ => None,
+ }
+ }
+
+ /// Returns the wider of `self` and `other`:
+ /// - Numerical variable widths are equally wide.
+ /// - Longer strings are wider than shorter strings.
+ /// - Numerical and string types are incomparable, so result in `None`.
+ /// - Any `None` in the input yields `None` in the output.
+ pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ Self::width_predicate(a, b, |a, b| a.max(b))
+ }
+
+ /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
+ pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ Self::width_predicate(a, b, |a, b| a.min(b))
+ }
+}
+
+impl From<VarWidth> for VarType {
+ fn from(source: VarWidth) -> Self {
+ match source {
+ VarWidth::Numeric => VarType::Numeric,
+ VarWidth::String(_) => VarType::String,
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct VariableRecord {
+ pub width: VarWidth,
pub name: Identifier,
pub print_format: Spec,
pub write_format: Spec,
pub label: Option<String>,
}
-fn decode_format(raw: raw::Spec, name: &str, width: Width) -> Spec {
+fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec {
UncheckedSpec::try_from(raw)
.and_then(Spec::try_from)
- .and_then(|x| x.check_width_compatibility(Some(name), width))
- .unwrap_or_else(|_warning| {
- /*warn(warning);*/
- Spec::default_for_width(width)
+ .and_then(|x| x.check_width_compatibility(width))
+ .unwrap_or_else(|error| {
+ let new_format = Spec::default_for_width(width);
+ warn(new_format, error);
+ new_format
})
}
-impl Variable {
- pub fn decode(
+impl TryDecode for VariableRecord {
+ type Input = raw::VariableRecord;
+
+ fn try_decode(
decoder: &mut Decoder,
- input: &crate::raw::Variable,
+ input: &crate::raw::VariableRecord,
warn: impl Fn(Error),
- ) -> Result<Option<Variable>, Error> {
- match input.width {
- 0..=255 => (),
+ ) -> Result<Option<VariableRecord>, Error> {
+ let width = match input.width {
+ 0 => VarWidth::Numeric,
+ w @ 1..=255 => VarWidth::String(w as u16),
-1 => return Ok(None),
_ => {
- return Err(Error::BadVariableWidth {
+ return Err(Error::InvalidVariableWidth {
offset: input.offset,
width: input.width,
})
}
};
- let width = input.width as Width;
- let name = decoder.decode_string(&input.name.0, &warn);
+ let name = trim_end_spaces(decoder.decode_string(&input.name.0, &warn));
let name = match Identifier::new(&name, decoder.encoding) {
Ok(name) => {
- if !decoder.take_name(&name) {
- decoder.generate_name()
- } else {
+ if !decoder.var_names.contains_key(&name) {
name
+ } else {
+ let new_name = decoder.generate_name();
+ warn(Error::DuplicateVariableName {
+ duplicate_name: name.clone(),
+ new_name: new_name.clone(),
+ });
+ new_name
}
}
- Err(error) => {
- warn(error.into());
- decoder.generate_name()
+ Err(id_error) => {
+ let new_name = decoder.generate_name();
+ warn(Error::InvalidVariableName {
+ id_error,
+ new_name: new_name.clone(),
+ });
+ new_name
}
};
- let print_format = decode_format(input.print_format, &name.0, width);
- let write_format = decode_format(input.write_format, &name.0, width);
+ let variable = Variable {
+ dict_index: decoder.n_dict_indexes,
+ short_name: name.clone(),
+ long_name: None,
+ width,
+ };
+ decoder.n_dict_indexes += width.n_dict_indexes();
+ assert!(decoder
+ .var_names
+ .insert(name.clone(), variable.dict_index)
+ .is_none());
+ assert!(decoder
+ .variables
+ .insert(variable.dict_index, variable)
+ .is_none());
+
+ let print_format = decode_format(input.print_format, width, |new_spec, format_error| {
+ warn(Error::InvalidPrintFormat {
+ new_spec,
+ variable: name.clone(),
+ format_error,
+ })
+ });
+ let write_format = decode_format(input.write_format, width, |new_spec, format_error| {
+ warn(Error::InvalidWriteFormat {
+ new_spec,
+ variable: name.clone(),
+ format_error,
+ })
+ });
let label = input
.label
.as_ref()
- .map(|label| decoder.decode_string(&label.0, &warn).into());
- decoder.take_dict_indexes(&name, width);
- Ok(Some(Variable {
+ .map(|label| decoder.decode_string(&label.0, &warn));
+ Ok(Some(VariableRecord {
width,
name,
print_format,
}
}
-#[derive(Clone)]
-pub struct Document(Vec<String>);
+#[derive(Clone, Debug)]
+pub struct DocumentRecord(Vec<String>);
-impl Decode for Document {
- type Input = crate::raw::Document;
+impl TryDecode for DocumentRecord {
+ type Input = crate::raw::DocumentRecord;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
- Ok(Document(
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Self>, Error> {
+ Ok(Some(DocumentRecord(
input
.lines
.iter()
- .map(|s| decoder.decode_string(&s.0, &warn).into())
+ .map(|s| trim_end_spaces(decoder.decode_string(&s.0, &warn)))
.collect(),
- ))
+ )))
}
}
-pub use crate::raw::FloatInfo;
-pub use crate::raw::IntegerInfo;
-
trait TextRecord
where
Self: Sized,
fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error>;
}
+#[derive(Clone, Debug)]
pub struct VariableSet {
pub name: String,
pub vars: Vec<String>,
}
}
+trait WarnOnError<T> {
+ fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
+}
+impl<T> WarnOnError<T> for Result<T, Error> {
+ fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
+ match self {
+ Ok(result) => Some(result),
+ Err(error) => {
+ warn(error);
+ None
+ }
+ }
+ }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Value {
+ Number(Option<OrderedFloat<f64>>),
+ String(String),
+}
+
+impl Value {
+ pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self {
+ match raw {
+ raw::Value::Number(x) => Value::Number(x.map(|x| x.into())),
+ raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct ValueLabel {
+ pub value: Value,
+ pub label: String,
+}
+
+#[derive(Clone, Debug)]
+pub struct ValueLabelRecord {
+ pub var_type: VarType,
+ pub labels: Vec<ValueLabel>,
+ pub variables: Vec<Identifier>,
+}
+
+impl TryDecode for ValueLabelRecord {
+ type Input = crate::raw::ValueLabelRecord;
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<ValueLabelRecord>, Error> {
+ let variables: Vec<&Variable> = input
+ .dict_indexes
+ .iter()
+ .filter_map(|&dict_index| {
+ decoder
+ .get_var_by_index(dict_index as usize)
+ .warn_on_error(&warn)
+ })
+ .filter(|&variable| match variable.width {
+ VarWidth::String(width) if width > 8 => {
+ warn(Error::InvalidLongStringValueLabel(
+ variable.short_name.clone(),
+ ));
+ false
+ }
+ _ => true,
+ })
+ .collect();
+ let mut i = variables.iter();
+ let Some(&first_var) = i.next() else {
+ return Ok(None);
+ };
+ let var_type: VarType = first_var.width.into();
+ for &variable in i {
+ let this_type: VarType = variable.width.into();
+ if var_type != this_type {
+ let (numeric_var, string_var) = match var_type {
+ VarType::Numeric => (first_var, variable),
+ VarType::String => (variable, first_var),
+ };
+ warn(Error::ValueLabelsDifferentTypes {
+ numeric_var: numeric_var.short_name.clone(),
+ string_var: string_var.short_name.clone(),
+ });
+ return Ok(None);
+ }
+ }
+ let labels = input
+ .labels
+ .iter()
+ .map(|(value, label)| {
+ let label = decoder.decode_string(&label.0, &warn);
+ let value = Value::decode(
+ raw::Value::from_raw(*value, var_type, decoder.endian),
+ &decoder,
+ );
+ ValueLabel { value, label }
+ })
+ .collect();
+ let variables = variables
+ .iter()
+ .map(|&variable| variable.short_name.clone())
+ .collect();
+ Ok(Some(ValueLabelRecord {
+ var_type,
+ labels,
+ variables,
+ }))
+ }
+}
+
+#[derive(Clone, Debug)]
pub struct VariableSetRecord(Vec<VariableSet>);
impl TextRecord for VariableSetRecord {
fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut sets = Vec::new();
for line in input.lines() {
- match VariableSet::parse(line) {
- Ok(set) => sets.push(set),
- Err(error) => warn(error),
+ if let Some(set) = VariableSet::parse(line).warn_on_error(&warn) {
+ sets.push(set)
}
}
Ok(VariableSetRecord(sets))
}
}
-pub struct ProductInfo(pub String);
+#[derive(Clone, Debug)]
+pub struct ProductInfoRecord(pub String);
-impl TextRecord for ProductInfo {
+impl TextRecord for ProductInfoRecord {
const NAME: &'static str = "extra product info";
fn parse(input: &str, _warn: impl Fn(Error)) -> Result<Self, Error> {
- Ok(ProductInfo(input.into()))
+ Ok(ProductInfoRecord(input.into()))
}
}
-pub struct LongVariableName {
- pub short_name: String,
- pub long_name: String,
+#[derive(Clone, Debug)]
+pub struct LongName {
+ pub short_name: Identifier,
+ pub long_name: Identifier,
}
-pub struct LongVariableNameRecord(Vec<LongVariableName>);
+impl LongName {
+ fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
+ let short_name = Identifier::new(short_name, decoder.encoding)
+ .map_err(|e| Error::InvalidShortName(e))?;
+ let long_name =
+ Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
+ Ok(LongName {
+ short_name,
+ long_name,
+ })
+ }
+}
-impl TextRecord for LongVariableNameRecord {
- const NAME: &'static str = "long variable names";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+#[derive(Clone, Debug)]
+pub struct LongNameRecord(Vec<LongName>);
+
+impl LongNameRecord {
+ pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut names = Vec::new();
for pair in input.split('\t').filter(|s| !s.is_empty()) {
if let Some((short_name, long_name)) = pair.split_once('=') {
- let name = LongVariableName {
- short_name: short_name.into(),
- long_name: long_name.into(),
- };
- names.push(name);
+ if let Some(long_name) =
+ LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
+ {
+ names.push(long_name);
+ }
} else {
warn(Error::TBD)
}
}
- Ok(LongVariableNameRecord(names))
+ Ok(LongNameRecord(names))
}
}
+#[derive(Clone, Debug)]
pub struct VeryLongString {
- pub short_name: String,
- pub length: usize,
+ pub short_name: Identifier,
+ pub length: u16,
}
impl VeryLongString {
- fn parse(input: &str) -> Result<VeryLongString, Error> {
+ fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
let Some((short_name, length)) = input.split_once('=') else {
return Err(Error::TBD);
};
- let length: usize = length.parse().map_err(|_| Error::TBD)?;
+ let short_name = Identifier::new(short_name, decoder.encoding)
+ .map_err(|e| Error::InvalidLongStringName(e))?;
+ let length: u16 = length.parse().map_err(|_| Error::TBD)?;
+ if length > VarWidth::MAX_STRING {
+ return Err(Error::TBD);
+ }
Ok(VeryLongString {
short_name: short_name.into(),
length,
}
}
+#[derive(Clone, Debug)]
pub struct VeryLongStringRecord(Vec<VeryLongString>);
-impl TextRecord for VeryLongStringRecord {
- const NAME: &'static str = "very long strings";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+impl VeryLongStringRecord {
+ pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut very_long_strings = Vec::new();
for tuple in input
.split('\0')
.map(|s| s.trim_end_matches('\t'))
.filter(|s| !s.is_empty())
{
- match VeryLongString::parse(tuple) {
- Ok(vls) => very_long_strings.push(vls),
- Err(error) => warn(error),
+ if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
+ very_long_strings.push(vls)
}
}
Ok(VeryLongStringRecord(very_long_strings))
}
}
+#[derive(Clone, Debug)]
pub struct Attribute {
- pub name: String,
+ pub name: Identifier,
pub values: Vec<String>,
}
impl Attribute {
- fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
+ fn parse<'a>(
+ decoder: &Decoder,
+ input: &'a str,
+ warn: &impl Fn(Error),
+ ) -> Result<(Option<Attribute>, &'a str), Error> {
let Some((name, mut input)) = input.split_once('(') else {
return Err(Error::TBD);
};
values.push(value.into());
}
if let Some(rest) = rest.strip_prefix(')') {
- return Ok((
- Attribute {
- name: name.into(),
- values,
- },
- rest,
- ));
- }
+ let attribute = Identifier::new(name, decoder.encoding)
+ .map_err(|e| Error::InvalidAttributeName(e))
+ .warn_on_error(warn)
+ .map(|name| Attribute { name, values });
+ return Ok((attribute, rest));
+ };
input = rest;
}
}
}
+#[derive(Clone, Debug)]
pub struct AttributeSet(pub Vec<Attribute>);
impl AttributeSet {
fn parse<'a>(
+ decoder: &Decoder,
mut input: &'a str,
sentinel: Option<char>,
warn: &impl Fn(Error),
None => break input,
c if c == sentinel => break &input[1..],
_ => {
- let (attribute, rest) = Attribute::parse(input, &warn)?;
- attributes.push(attribute);
+ let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
+ if let Some(attribute) = attribute {
+ attributes.push(attribute);
+ }
input = rest;
}
}
}
}
+#[derive(Clone, Debug)]
pub struct FileAttributeRecord(AttributeSet);
-impl TextRecord for FileAttributeRecord {
- const NAME: &'static str = "data file attributes";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let (set, rest) = AttributeSet::parse(input, None, &warn)?;
+impl FileAttributeRecord {
+ pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
if !rest.is_empty() {
warn(Error::TBD);
}
}
}
+#[derive(Clone, Debug)]
pub struct VarAttributeSet {
- pub long_var_name: String,
+ pub long_var_name: Identifier,
pub attributes: AttributeSet,
}
impl VarAttributeSet {
fn parse<'a>(
+ decoder: &Decoder,
input: &'a str,
warn: &impl Fn(Error),
- ) -> Result<(VarAttributeSet, &'a str), Error> {
+ ) -> Result<(Option<VarAttributeSet>, &'a str), Error> {
let Some((long_var_name, rest)) = input.split_once(':') else {
return Err(Error::TBD);
};
- let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
- Ok((
- VarAttributeSet {
- long_var_name: long_var_name.into(),
+ let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
+ let var_attribute = Identifier::new(long_var_name, decoder.encoding)
+ .map_err(|e| Error::InvalidAttributeVariableName(e))
+ .warn_on_error(warn)
+ .map(|name| VarAttributeSet {
+ long_var_name: name,
attributes,
- },
- rest,
- ))
+ });
+ Ok((var_attribute, rest))
}
}
+#[derive(Clone, Debug)]
pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
-impl TextRecord for VariableAttributeRecord {
- const NAME: &'static str = "variable attributes";
- fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+impl VariableAttributeRecord {
+ pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut var_attribute_sets = Vec::new();
while !input.is_empty() {
- match VarAttributeSet::parse(input, &warn) {
- Ok((var_attribute, rest)) => {
- var_attribute_sets.push(var_attribute);
- input = rest;
- }
- Err(error) => {
- warn(error);
- break;
- }
+ let Some((var_attribute, rest)) =
+ VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
+ else {
+ break;
+ };
+ if let Some(var_attribute) = var_attribute {
+ var_attribute_sets.push(var_attribute);
}
+ input = rest;
}
Ok(VariableAttributeRecord(var_attribute_sets))
}
}
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Measure {
Nominal,
Ordinal,
Scale,
}
+impl Measure {
+ fn try_decode(source: u32) -> Result<Option<Measure>, Error> {
+ match source {
+ 0 => Ok(None),
+ 1 => Ok(Some(Measure::Nominal)),
+ 2 => Ok(Some(Measure::Ordinal)),
+ 3 => Ok(Some(Measure::Scale)),
+ _ => Err(Error::InvalidMeasurement(source)),
+ }
+ }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Alignment {
Left,
Right,
Center,
}
+impl Alignment {
+ fn try_decode(source: u32) -> Result<Option<Alignment>, Error> {
+ match source {
+ 0 => Ok(None),
+ 1 => Ok(Some(Alignment::Left)),
+ 2 => Ok(Some(Alignment::Right)),
+ 3 => Ok(Some(Alignment::Center)),
+ _ => Err(Error::InvalidAlignment(source)),
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
pub struct VarDisplay {
pub measure: Option<Measure>,
- pub width: u32,
- pub align: Option<Alignment>,
+ pub width: Option<u32>,
+ pub alignment: Option<Alignment>,
}
+#[derive(Clone, Debug)]
pub struct VarDisplayRecord(pub Vec<VarDisplay>);
+
+impl TryDecode for VarDisplayRecord {
+ type Input = raw::VarDisplayRecord;
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Self>, Error> {
+ let n_vars = decoder.variables.len();
+ let n_per_var = if input.0.len() == 3 * n_vars {
+ 3
+ } else if input.0.len() == 2 * n_vars {
+ 2
+ } else {
+ return Err(Error::TBD);
+ };
+
+ let var_displays = input
+ .0
+ .chunks(n_per_var)
+ .map(|chunk| {
+ let (measure, width, alignment) = match n_per_var == 3 {
+ true => (chunk[0], Some(chunk[1]), chunk[2]),
+ false => (chunk[0], None, chunk[1]),
+ };
+ let measure = Measure::try_decode(measure).warn_on_error(&warn).flatten();
+ let alignment = Alignment::try_decode(alignment)
+ .warn_on_error(&warn)
+ .flatten();
+ VarDisplay {
+ measure,
+ width,
+ alignment,
+ }
+ })
+ .collect();
+ Ok(Some(VarDisplayRecord(var_displays)))
+ }
+}
+
+#[derive(Clone, Debug)]
+pub enum MultipleResponseType {
+ MultipleDichotomy {
+ value: Value,
+ labels: CategoryLabels,
+ },
+ MultipleCategory,
+}
+
+impl MultipleResponseType {
+ fn decode(
+ decoder: &Decoder,
+ mr_set: &Identifier,
+ input: &raw::MultipleResponseType,
+ min_width: VarWidth,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let mr_type = match input {
+ raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
+ let value = decoder.decode_string_cow(&value.0, warn);
+ let value = match min_width {
+ VarWidth::Numeric => {
+ let number: f64 = value.trim().parse().map_err(|_| {
+ Error::InvalidMDGroupCountedValue {
+ mr_set: mr_set.clone(),
+ number: value.into(),
+ }
+ })?;
+ Value::Number(Some(number.into()))
+ }
+ VarWidth::String(max_width) => {
+ let value = value.trim_end_matches(' ');
+ let width = value.len();
+ if width > max_width as usize {
+ return Err(Error::TooWideMDGroupCountedValue {
+ mr_set: mr_set.clone(),
+ value: value.into(),
+ width,
+ max_width,
+ });
+ };
+ Value::String(value.into())
+ }
+ };
+ MultipleResponseType::MultipleDichotomy {
+ value,
+ labels: *labels,
+ }
+ }
+ raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
+ };
+ Ok(mr_type)
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet {
+ pub name: Identifier,
+ pub min_width: VarWidth,
+ pub max_width: VarWidth,
+ pub label: String,
+ pub mr_type: MultipleResponseType,
+ pub dict_indexes: Vec<DictIndex>,
+}
+
+impl MultipleResponseSet {
+ fn decode(
+ decoder: &Decoder,
+ input: &raw::MultipleResponseSet,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let mr_set_name = decoder
+ .decode_identifier(&input.name.0, warn)
+ .map_err(|error| Error::InvalidMrSetName(error))?;
+
+ let label = decoder.decode_string(&input.label.0, warn);
+
+ let mut dict_indexes = Vec::with_capacity(input.short_names.len());
+ for short_name in input.short_names.iter() {
+ let short_name = match decoder.decode_identifier(&short_name.0, warn) {
+ Ok(name) => name,
+ Err(error) => {
+ warn(Error::InvalidMrSetName(error));
+ continue;
+ }
+ };
+ let Some(&dict_index) = decoder.var_names.get(&short_name) else {
+ warn(Error::UnknownMrSetVariable {
+ mr_set: mr_set_name.clone(),
+ short_name: short_name.clone(),
+ });
+ continue;
+ };
+ dict_indexes.push(dict_index);
+ }
+
+ match dict_indexes.len() {
+ 0 => return Err(Error::EmptyMrSet(mr_set_name)),
+ 1 => return Err(Error::OneVarMrSet(mr_set_name)),
+ _ => (),
+ }
+
+ let Some((Some(min_width), Some(max_width))) = dict_indexes
+ .iter()
+ .map(|dict_index| decoder.variables[dict_index].width)
+ .map(|w| (Some(w), Some(w)))
+ .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
+ else {
+ return Err(Error::MixedMrSet(mr_set_name));
+ };
+
+ let mr_type =
+ MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
+
+ Ok(MultipleResponseSet {
+ name: mr_set_name,
+ min_width,
+ max_width,
+ label,
+ mr_type,
+ dict_indexes,
+ })
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
+
+impl TryDecode for MultipleResponseRecord {
+ type Input = raw::MultipleResponseRecord;
+
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Self>, Error> {
+ let mut sets = Vec::with_capacity(input.0.len());
+ for set in &input.0 {
+ match MultipleResponseSet::decode(decoder, set, &warn) {
+ Ok(set) => sets.push(set),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(Some(MultipleResponseRecord(sets)))
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels {
+ pub var_name: Identifier,
+ pub width: VarWidth,
+ pub labels: Vec<ValueLabel>,
+}
+
+impl LongStringValueLabels {
+ fn decode(
+ decoder: &Decoder,
+ input: &raw::LongStringValueLabels,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let var_name = decoder.decode_string(&input.var_name.0, warn);
+ let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
+ .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
+
+ let min_width = 9;
+ let max_width = VarWidth::MAX_STRING;
+ if input.width < 9 || input.width > max_width as u32 {
+ return Err(Error::InvalidLongValueLabelWidth {
+ name: var_name.into(),
+ width: input.width,
+ min_width,
+ max_width,
+ });
+ }
+ let width = input.width as u16;
+
+ let mut labels = Vec::with_capacity(input.labels.len());
+ for (value, label) in input.labels.iter() {
+ let value = Value::String(decoder.decode_exact_length(&value.0).into());
+ let label = decoder.decode_string(&label.0, warn);
+ labels.push(ValueLabel { value, label });
+ }
+
+ Ok(LongStringValueLabels {
+ var_name,
+ width: VarWidth::String(width),
+ labels,
+ })
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
+
+impl TryDecode for LongStringValueLabelRecord {
+ type Input = raw::LongStringValueLabelRecord;
+
+ fn try_decode(
+ decoder: &mut Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Option<Self>, Error> {
+ let mut labels = Vec::with_capacity(input.0.len());
+ for label in &input.0 {
+ match LongStringValueLabels::decode(decoder, label, &warn) {
+ Ok(set) => labels.push(set),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(Some(LongStringValueLabelRecord(labels)))
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use encoding_rs::WINDOWS_1252;
+
+ #[test]
+ fn test() {
+ let mut s = String::new();
+ s.push(char::REPLACEMENT_CHARACTER);
+ let encoded = WINDOWS_1252.encode(&s).0;
+ let decoded = WINDOWS_1252.decode(&encoded[..]).0;
+ println!("{:?}", decoded);
+ }
+
+ #[test]
+ fn test2() {
+ let charset: Vec<u8> = (0..=255).collect();
+ println!("{}", charset.len());
+ let decoded = WINDOWS_1252.decode(&charset[..]).0;
+ println!("{}", decoded.len());
+ let encoded = WINDOWS_1252.encode(&decoded[..]).0;
+ println!("{}", encoded.len());
+ assert_eq!(&charset[..], &encoded[..]);
+ }
+}