endian::Endian,
format::{Error as FormatError, Spec, UncheckedSpec},
identifier::{Error as IdError, Identifier},
- raw::{self, MissingValues, VarType},
+ raw::{self, MissingValues, UnencodedStr, VarType},
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::{DecoderResult, Encoding};
InvalidLongStringValueLabel(Identifier),
#[error("Invalid multiple response set name. {0}")]
- InvalidMrSetName(#[from] IdError),
+ InvalidMrSetName(IdError),
#[error("Multiple response set {mr_set} includes unknown variable {short_name}.")]
UnknownMrSetVariable {
#[error("Multiple response set {0} contains both string and numeric variables.")]
MixedMrSet(Identifier),
- #[error("Invalid numeric format for counted value {number} in multiple response set {mr_set}.")]
+ #[error(
+ "Invalid numeric format for counted value {number} in multiple response set {mr_set}."
+ )]
InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
#[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
- TooWideMDGroupCountedValue { mr_set: Identifier, value: String, width: usize, max_width: u16 },
+ TooWideMDGroupCountedValue {
+ mr_set: Identifier,
+ value: String,
+ width: usize,
+ max_width: u16,
+ },
+
+ #[error("Long string value label for variable {name} has width {width}, which is not in the valid range [{min_width},{max_width}].")]
+ InvalidLongValueLabelWidth {
+ name: Identifier,
+ width: u32,
+ min_width: u16,
+ max_width: u16,
+ },
+
+ #[error("Invalid attribute name. {0}")]
+ InvalidAttributeName(IdError),
+
+ #[error("Invalid short name in long variable name record. {0}")]
+ InvalidShortName(IdError),
+
+ #[error("Invalid name in long variable name record. {0}")]
+ InvalidLongName(IdError),
+
+ #[error("Invalid variable name in very long string record. {0}")]
+ InvalidLongStringName(IdError),
+
+ #[error("Invalid variable name in long string value label record. {0}")]
+ InvalidLongStringValueLabelName(IdError),
#[error("Details TBD")]
TBD,
VariableSets(VariableSetRecord),
VarDisplay(VarDisplayRecord),
MultipleResponse(MultipleResponseRecord),
- //LongStringValueLabels(LongStringValueLabelRecord),
+ LongStringValueLabels(LongStringValueLabelRecord),
Encoding(EncodingRecord),
NumberOfCases(NumberOfCasesRecord),
ProductInfo(ProductInfoRecord),
- //LongNames(UnencodedString),
- //LongStrings(UnencodedString),
- //FileAttributes(UnencodedString),
+ LongNames(LongNameRecord),
+ VeryLongStrings(VeryLongStringRecord),
+ FileAttributes(FileAttributeRecord),
//VariableAttributes(UnencodedString),
//OtherExtension(Extension),
//EndOfHeaders(u32),
assert!(self.n_generated_names < usize::MAX);
}
}
- fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
+ fn decode_string_cow<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
let (output, malformed) = self.encoding.decode_without_bom_handling(input);
if malformed {
warn(Error::TBD);
}
output
}
+ fn decode_string(&self, input: &[u8], warn: &impl Fn(Error)) -> String {
+ self.decode_string_cow(input, warn).into()
+ }
pub fn decode_identifier(
&self,
input: &[u8],
warn: &impl Fn(Error),
) -> Result<Identifier, IdError> {
- let s = self.decode_string(input, warn);
+ let s = self.decode_string_cow(input, warn);
Identifier::new(&s, self.encoding)
}
fn get_var_by_index(&self, dict_index: usize) -> Result<&Variable, Error> {
}
}
-pub trait Decode: Sized {
+pub trait TryDecode: Sized {
type Input;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error>;
+ fn try_decode(
+ decoder: &Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Self, Error>;
+}
+
+pub trait Decode<Input>: Sized {
+ fn decode(decoder: &Decoder, input: &Input, warn: impl Fn(Error)) -> Self;
+}
+
+impl<const N: usize> Decode<UnencodedStr<N>> for String {
+ fn decode(decoder: &Decoder, input: &UnencodedStr<N>, warn: impl Fn(Error)) -> Self {
+ decoder.decode_string(&input.0, &warn)
+ }
}
#[derive(Clone, Debug)]
pub file_label: String,
}
-impl Decode for HeaderRecord {
+impl TryDecode for HeaderRecord {
type Input = crate::raw::HeaderRecord;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn try_decode(
+ decoder: &Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Self, Error> {
let eye_catcher = decoder.decode_string(&input.eye_catcher.0, &warn);
let file_label = decoder.decode_string(&input.file_label.0, &warn);
- let creation_date = decoder.decode_string(&input.creation_date.0, &warn);
+ let creation_date = decoder.decode_string_cow(&input.creation_date.0, &warn);
let creation_date = NaiveDate::parse_from_str(&creation_date, "%v").unwrap_or_else(|_| {
warn(Error::InvalidCreationDate {
creation_date: creation_date.into(),
});
Default::default()
});
- let creation_time = decoder.decode_string(&input.creation_time.0, &warn);
+ let creation_time = decoder.decode_string_cow(&input.creation_time.0, &warn);
let creation_time =
NaiveTime::parse_from_str(&creation_time, "%H:%M:%S").unwrap_or_else(|_| {
warn(Error::InvalidCreationTime {
Default::default()
});
Ok(HeaderRecord {
- eye_catcher: eye_catcher.into(),
+ eye_catcher,
weight_index: input.weight_index.map(|n| n as usize),
n_cases: input.n_cases.map(|n| n as u64),
creation: NaiveDateTime::new(creation_date, creation_time),
- file_label: file_label.into(),
+ file_label,
})
}
}
}
impl VarWidth {
+ const MAX_STRING: u16 = 32767;
+
fn n_dict_indexes(self) -> usize {
match self {
VarWidth::Numeric => 1,
let label = input
.label
.as_ref()
- .map(|label| decoder.decode_string(&label.0, &warn).into());
+ .map(|label| decoder.decode_string(&label.0, &warn));
Ok(Some(VariableRecord {
width,
name,
#[derive(Clone, Debug)]
pub struct DocumentRecord(Vec<String>);
-impl Decode for DocumentRecord {
+impl TryDecode for DocumentRecord {
type Input = crate::raw::DocumentRecord;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn try_decode(
+ decoder: &Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Self, Error> {
Ok(DocumentRecord(
input
.lines
.iter()
- .map(|s| decoder.decode_string(&s.0, &warn).into())
+ .map(|s| decoder.decode_string(&s.0, &warn))
.collect(),
))
}
}
}
+trait WarnOnError<T> {
+ fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
+}
+impl<T> WarnOnError<T> for Result<T, Error> {
+ fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
+ match self {
+ Ok(result) => Some(result),
+ Err(error) => {
+ warn(error);
+ None
+ }
+ }
+ }
+}
+
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Value {
Number(Option<OrderedFloat<f64>>),
}
}
+#[derive(Clone, Debug)]
+pub struct ValueLabel {
+ pub value: Value,
+ pub label: String,
+}
+
#[derive(Clone, Debug)]
pub struct ValueLabelRecord {
pub var_type: VarType,
- pub labels: Vec<(Value, String)>,
+ pub labels: Vec<ValueLabel>,
pub variables: Vec<Identifier>,
}
-trait WarnOnError<T> {
- fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T>;
-}
-impl<T> WarnOnError<T> for Result<T, Error> {
- fn warn_on_error<F: Fn(Error)>(self, warn: &F) -> Option<T> {
- match self {
- Ok(result) => Some(result),
- Err(error) => {
- warn(error);
- None
- }
- }
- }
-}
-
impl ValueLabelRecord {
pub fn decode(
decoder: &mut Decoder,
raw::Value::from_raw(*value, var_type, decoder.endian),
&decoder,
);
- (value, label.into())
+ ValueLabel { value, label }
})
.collect();
let variables = variables
}
}
-pub struct LongVariableName {
- pub short_name: String,
- pub long_name: String,
+#[derive(Clone, Debug)]
+pub struct LongName {
+ pub short_name: Identifier,
+ pub long_name: Identifier,
+}
+
+impl LongName {
+ fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
+ let short_name = Identifier::new(short_name, decoder.encoding)
+ .map_err(|e| Error::InvalidShortName(e))?;
+ let long_name =
+ Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
+ Ok(LongName {
+ short_name,
+ long_name,
+ })
+ }
}
-pub struct LongVariableNameRecord(Vec<LongVariableName>);
+#[derive(Clone, Debug)]
+pub struct LongNameRecord(Vec<LongName>);
-impl TextRecord for LongVariableNameRecord {
- const NAME: &'static str = "long variable names";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+impl LongNameRecord {
+ pub fn parse(decoder: &mut Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut names = Vec::new();
for pair in input.split('\t').filter(|s| !s.is_empty()) {
if let Some((short_name, long_name)) = pair.split_once('=') {
- let name = LongVariableName {
- short_name: short_name.into(),
- long_name: long_name.into(),
- };
- names.push(name);
+ if let Some(long_name) =
+ LongName::new(decoder, short_name, long_name).warn_on_error(&warn)
+ {
+ names.push(long_name);
+ }
} else {
warn(Error::TBD)
}
}
- Ok(LongVariableNameRecord(names))
+ Ok(LongNameRecord(names))
}
}
+#[derive(Clone, Debug)]
pub struct VeryLongString {
- pub short_name: String,
- pub length: usize,
+ pub short_name: Identifier,
+ pub length: u16,
}
impl VeryLongString {
- fn parse(input: &str) -> Result<VeryLongString, Error> {
+ fn parse(decoder: &Decoder, input: &str) -> Result<VeryLongString, Error> {
let Some((short_name, length)) = input.split_once('=') else {
return Err(Error::TBD);
};
- let length: usize = length.parse().map_err(|_| Error::TBD)?;
+ let short_name = Identifier::new(short_name, decoder.encoding)
+ .map_err(|e| Error::InvalidLongStringName(e))?;
+ let length: u16 = length.parse().map_err(|_| Error::TBD)?;
+ if length > VarWidth::MAX_STRING {
+ return Err(Error::TBD);
+ }
Ok(VeryLongString {
short_name: short_name.into(),
length,
}
}
+#[derive(Clone, Debug)]
pub struct VeryLongStringRecord(Vec<VeryLongString>);
-impl TextRecord for VeryLongStringRecord {
- const NAME: &'static str = "very long strings";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+impl VeryLongStringRecord {
+ pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut very_long_strings = Vec::new();
for tuple in input
.split('\0')
.map(|s| s.trim_end_matches('\t'))
.filter(|s| !s.is_empty())
{
- if let Some(vls) = VeryLongString::parse(tuple).warn_on_error(&warn) {
+ if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&warn) {
very_long_strings.push(vls)
}
}
}
}
+#[derive(Clone, Debug)]
pub struct Attribute {
- pub name: String,
+ pub name: Identifier,
pub values: Vec<String>,
}
impl Attribute {
- fn parse<'a>(input: &'a str, warn: &impl Fn(Error)) -> Result<(Attribute, &'a str), Error> {
+ fn parse<'a>(
+ decoder: &Decoder,
+ input: &'a str,
+ warn: &impl Fn(Error),
+ ) -> Result<(Option<Attribute>, &'a str), Error> {
let Some((name, mut input)) = input.split_once('(') else {
return Err(Error::TBD);
};
values.push(value.into());
}
if let Some(rest) = rest.strip_prefix(')') {
- return Ok((
- Attribute {
- name: name.into(),
- values,
- },
- rest,
- ));
- }
+ let attribute = Identifier::new(name, decoder.encoding)
+ .map_err(|e| Error::InvalidAttributeName(e))
+ .warn_on_error(warn)
+ .map(|name| Attribute { name, values });
+ return Ok((attribute, rest));
+ };
input = rest;
}
}
}
+#[derive(Clone, Debug)]
pub struct AttributeSet(pub Vec<Attribute>);
impl AttributeSet {
fn parse<'a>(
+ decoder: &Decoder,
mut input: &'a str,
sentinel: Option<char>,
warn: &impl Fn(Error),
None => break input,
c if c == sentinel => break &input[1..],
_ => {
- let (attribute, rest) = Attribute::parse(input, &warn)?;
- attributes.push(attribute);
+ let (attribute, rest) = Attribute::parse(decoder, input, &warn)?;
+ if let Some(attribute) = attribute {
+ attributes.push(attribute);
+ }
input = rest;
}
}
}
}
+#[derive(Clone, Debug)]
pub struct FileAttributeRecord(AttributeSet);
-impl TextRecord for FileAttributeRecord {
- const NAME: &'static str = "data file attributes";
- fn parse(input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
- let (set, rest) = AttributeSet::parse(input, None, &warn)?;
+impl FileAttributeRecord {
+ pub fn parse(decoder: &Decoder, input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let (set, rest) = AttributeSet::parse(decoder, input, None, &warn)?;
if !rest.is_empty() {
warn(Error::TBD);
}
impl VarAttributeSet {
fn parse<'a>(
+ decoder: &Decoder,
input: &'a str,
warn: &impl Fn(Error),
) -> Result<(VarAttributeSet, &'a str), Error> {
let Some((long_var_name, rest)) = input.split_once(':') else {
return Err(Error::TBD);
};
- let (attributes, rest) = AttributeSet::parse(rest, Some('/'), warn)?;
+ let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
Ok((
VarAttributeSet {
long_var_name: long_var_name.into(),
pub struct VariableAttributeRecord(Vec<VarAttributeSet>);
-impl TextRecord for VariableAttributeRecord {
- const NAME: &'static str = "variable attributes";
- fn parse(mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
+impl VariableAttributeRecord {
+ pub fn parse(decoder: &Decoder, mut input: &str, warn: impl Fn(Error)) -> Result<Self, Error> {
let mut var_attribute_sets = Vec::new();
while !input.is_empty() {
let Some((var_attribute, rest)) =
- VarAttributeSet::parse(input, &warn).warn_on_error(&warn)
+ VarAttributeSet::parse(decoder, input, &warn).warn_on_error(&warn)
else {
break;
};
) -> Result<Self, Error> {
let mr_type = match input {
raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
- let value = decoder.decode_string(&value.0, warn);
+ let value = decoder.decode_string_cow(&value.0, warn);
let value = match min_width {
VarWidth::Numeric => {
- let number: f64 = value.trim().parse()
- .map_err(|_| Error::InvalidMDGroupCountedValue { mr_set: mr_set.clone(), number: value.into() })?;
+ let number: f64 = value.trim().parse().map_err(|_| {
+ Error::InvalidMDGroupCountedValue {
+ mr_set: mr_set.clone(),
+ number: value.into(),
+ }
+ })?;
Value::Number(Some(number.into()))
- },
+ }
VarWidth::String(max_width) => {
let value = value.trim_end_matches(' ');
let width = value.len();
if width > max_width as usize {
- return Err(Error::TooWideMDGroupCountedValue { mr_set: mr_set.clone(), value: value.into(), width, max_width });
+ return Err(Error::TooWideMDGroupCountedValue {
+ mr_set: mr_set.clone(),
+ value: value.into(),
+ width,
+ max_width,
+ });
};
Value::String(value.into())
}
};
- MultipleResponseType::MultipleDichotomy { value, labels: *labels }
- },
+ MultipleResponseType::MultipleDichotomy {
+ value,
+ labels: *labels,
+ }
+ }
raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
};
Ok(mr_type)
.decode_identifier(&input.name.0, warn)
.map_err(|error| Error::InvalidMrSetName(error))?;
- let label = decoder.decode_string(&input.label.0, warn).into();
+ let label = decoder.decode_string(&input.label.0, warn);
let mut dict_indexes = Vec::with_capacity(input.short_names.len());
for short_name in input.short_names.iter() {
return Err(Error::MixedMrSet(mr_set_name));
};
- let mr_type = MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
+ let mr_type =
+ MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
Ok(MultipleResponseSet {
name: mr_set_name,
#[derive(Clone, Debug)]
pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
-impl Decode for MultipleResponseRecord {
+impl TryDecode for MultipleResponseRecord {
type Input = raw::MultipleResponseRecord;
- fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
+ fn try_decode(
+ decoder: &Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Self, Error> {
let mut sets = Vec::with_capacity(input.0.len());
for set in &input.0 {
match MultipleResponseSet::decode(decoder, set, &warn) {
}
}
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabels {
+ pub var_name: Identifier,
+ pub width: VarWidth,
+ pub labels: Vec<ValueLabel>,
+}
+
+impl LongStringValueLabels {
+ fn decode(
+ decoder: &Decoder,
+ input: &raw::LongStringValueLabels,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let var_name = decoder
+ .decode_identifier(&input.var_name.0, warn)
+ .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
+
+ let min_width = 9;
+ let max_width = VarWidth::MAX_STRING;
+ if input.width < 9 || input.width > max_width as u32 {
+ return Err(Error::InvalidLongValueLabelWidth {
+ name: var_name.into(),
+ width: input.width,
+ min_width,
+ max_width,
+ });
+ }
+ let width = input.width as u16;
+
+ let mut labels = Vec::with_capacity(input.labels.len());
+ for (value, label) in input.labels.iter() {
+ let value = Value::String(decoder.decode_exact_length(&value.0).into());
+ let label = decoder.decode_string(&label.0, warn);
+ labels.push(ValueLabel { value, label });
+ }
+
+ Ok(LongStringValueLabels {
+ var_name,
+ width: VarWidth::String(width),
+ labels,
+ })
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct LongStringValueLabelRecord(pub Vec<LongStringValueLabels>);
+
+impl TryDecode for LongStringValueLabelRecord {
+ type Input = raw::LongStringValueLabelRecord;
+
+ fn try_decode(
+ decoder: &Decoder,
+ input: &Self::Input,
+ warn: impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let mut labels = Vec::with_capacity(input.0.len());
+ for label in &input.0 {
+ match LongStringValueLabels::decode(decoder, label, &warn) {
+ Ok(set) => labels.push(set),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(LongStringValueLabelRecord(labels))
+ }
+}
+
#[cfg(test)]
mod test {
use encoding_rs::WINDOWS_1252;