use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
use crate::{
+ endian::Endian,
format::{Error as FormatError, Spec, UncheckedSpec},
identifier::{Error as IdError, Identifier},
raw::{self, MissingValues, VarType},
- CategoryLabels,
- {endian::Endian, Compression},
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::{DecoderResult, Encoding};
use ordered_float::OrderedFloat;
use thiserror::Error as ThisError;
+pub use crate::raw::{CategoryLabels, Compression};
+
#[derive(ThisError, Debug)]
pub enum Error {
#[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
#[error("Multiple response set {0} contains both string and numeric variables.")]
MixedMrSet(Identifier),
+ #[error("Invalid numeric format for counted value {number} in multiple response set {mr_set}.")]
+ InvalidMDGroupCountedValue { mr_set: Identifier, number: String },
+
+ #[error("Counted value {value} has width {width}, but it must be no wider than {max_width}, the width of the narrowest variable in multiple response set {mr_set}.")]
+ TooWideMDGroupCountedValue { mr_set: Identifier, value: String, width: usize, max_width: u16 },
+
#[error("Details TBD")]
TBD,
}
}
}
- /// Returns the wider of `self` and `other`:
- /// - Numerical variable widths are equally wide.
- /// - Longer strings are wider than shorter strings.
- /// - Numerical and string types are incomparable, so result in `None`.
- /// - Any `None` in the input yields `None` in the output.
- pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ fn width_predicate(
+ a: Option<VarWidth>,
+ b: Option<VarWidth>,
+ f: impl Fn(u16, u16) -> u16,
+ ) -> Option<VarWidth> {
match (a, b) {
(Some(VarWidth::Numeric), Some(VarWidth::Numeric)) => Some(VarWidth::Numeric),
(Some(VarWidth::String(a)), Some(VarWidth::String(b))) => {
- Some(VarWidth::String(a.max(b)))
+ Some(VarWidth::String(f(a, b)))
}
_ => None,
}
}
+
+ /// Returns the wider of `self` and `other`:
+ /// - Numerical variable widths are equally wide.
+ /// - Longer strings are wider than shorter strings.
+ /// - Numerical and string types are incomparable, so result in `None`.
+ /// - Any `None` in the input yields `None` in the output.
+ pub fn wider(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ Self::width_predicate(a, b, |a, b| a.max(b))
+ }
+
+ /// Returns the narrower of `self` and `other` (see [`Self::wider`]).
+ pub fn narrower(a: Option<VarWidth>, b: Option<VarWidth>) -> Option<VarWidth> {
+ Self::width_predicate(a, b, |a, b| a.min(b))
+ }
}
impl From<VarWidth> for VarType {
MultipleCategory,
}
+impl MultipleResponseType {
+ fn decode(
+ decoder: &Decoder,
+ mr_set: &Identifier,
+ input: &raw::MultipleResponseType,
+ min_width: VarWidth,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let mr_type = match input {
+ raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
+ let value = decoder.decode_string(&value.0, warn);
+ let value = match min_width {
+ VarWidth::Numeric => {
+ let number: f64 = value.trim().parse()
+ .map_err(|_| Error::InvalidMDGroupCountedValue { mr_set: mr_set.clone(), number: value.into() })?;
+ Value::Number(Some(number.into()))
+ },
+ VarWidth::String(max_width) => {
+ let value = value.trim_end_matches(' ');
+ let width = value.len();
+ if width > max_width as usize {
+ return Err(Error::TooWideMDGroupCountedValue { mr_set: mr_set.clone(), value: value.into(), width, max_width });
+ };
+ Value::String(value.into())
+ }
+ };
+ MultipleResponseType::MultipleDichotomy { value, labels: *labels }
+ },
+ raw::MultipleResponseType::MultipleCategory => MultipleResponseType::MultipleCategory,
+ };
+ Ok(mr_type)
+ }
+}
+
#[derive(Clone, Debug)]
pub struct MultipleResponseSet {
pub name: Identifier,
+ pub min_width: VarWidth,
+ pub max_width: VarWidth,
pub label: String,
pub mr_type: MultipleResponseType,
pub dict_indexes: Vec<DictIndex>,
decoder: &Decoder,
input: &raw::MultipleResponseSet,
warn: &impl Fn(Error),
- ) -> Result<Option<Self>, Error> {
+ ) -> Result<Self, Error> {
let mr_set_name = decoder
.decode_identifier(&input.name.0, warn)
.map_err(|error| Error::InvalidMrSetName(error))?;
let label = decoder.decode_string(&input.label.0, warn).into();
- let dict_indexes = Vec::with_capacity(input.short_names.len());
- for &short_name in input.short_names.iter() {
+ let mut dict_indexes = Vec::with_capacity(input.short_names.len());
+ for short_name in input.short_names.iter() {
let short_name = match decoder.decode_identifier(&short_name.0, warn) {
Ok(name) => name,
Err(error) => {
continue;
}
};
- let Some(dict_index) = decoder.var_names.get(&short_name) else {
+ let Some(&dict_index) = decoder.var_names.get(&short_name) else {
warn(Error::UnknownMrSetVariable {
mr_set: mr_set_name.clone(),
short_name: short_name.clone(),
_ => (),
}
- let Some(var_width) = dict_indexes
+ let Some((Some(min_width), Some(max_width))) = dict_indexes
.iter()
- .map(|&dict_index| Some(decoder.variables[dict_index].width))
- .reduce(|a, b| VarWidth::wider(a, b))
- .flatten()
+ .map(|dict_index| decoder.variables[dict_index].width)
+ .map(|w| (Some(w), Some(w)))
+ .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
else {
return Err(Error::MixedMrSet(mr_set_name));
};
+
+ let mr_type = MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
+
+ Ok(MultipleResponseSet {
+ name: mr_set_name,
+ min_width,
+ max_width,
+ label,
+ mr_type,
+ dict_indexes,
+ })
}
}
#[derive(Clone, Debug)]
-pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
+pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
impl Decode for MultipleResponseRecord {
type Input = raw::MultipleResponseRecord;
fn decode(decoder: &Decoder, input: &Self::Input, warn: impl Fn(Error)) -> Result<Self, Error> {
+ let mut sets = Vec::with_capacity(input.0.len());
+ for set in &input.0 {
+ match MultipleResponseSet::decode(decoder, set, &warn) {
+ Ok(set) => sets.push(set),
+ Err(error) => warn(error),
+ }
+ }
+ Ok(MultipleResponseRecord(sets))
}
}
use crate::endian::{Endian, Parse, ToBytes};
-use crate::{CategoryLabels, Compression};
use encoding_rs::mem::decode_latin1;
use flate2::read::ZlibDecoder;
from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
}
+#[derive(Copy, Clone, Debug)]
+pub enum Compression {
+ Simple,
+ ZLib,
+}
+
#[derive(Clone)]
pub struct HeaderRecord {
/// Magic number.
}
}
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum CategoryLabels {
+ VarLabels,
+ CountedValues,
+}
+
#[derive(Clone, Debug)]
pub enum MultipleResponseType {
MultipleDichotomy {
},
MultipleCategory,
}
-#[derive(Clone, Debug)]
-pub struct MultipleResponseSet {
- pub name: UnencodedString,
- pub label: UnencodedString,
- pub mr_type: MultipleResponseType,
- pub short_names: Vec<UnencodedString>,
-}
-impl MultipleResponseSet {
- fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
- let Some(equals) = input.iter().position(|&b| b == b'=') else {
- return Err(Error::TBD);
- };
- let (name, input) = input.split_at(equals);
+impl MultipleResponseType {
+ fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Error> {
let (mr_type, input) = match input.get(0) {
Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]),
Some(b'D') => {
}
_ => return Err(Error::TBD),
};
+ Ok((mr_type, input))
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct MultipleResponseSet {
+ pub name: UnencodedString,
+ pub label: UnencodedString,
+ pub mr_type: MultipleResponseType,
+ pub short_names: Vec<UnencodedString>,
+}
+
+impl MultipleResponseSet {
+ fn parse(input: &[u8]) -> Result<(MultipleResponseSet, &[u8]), Error> {
+ let Some(equals) = input.iter().position(|&b| b == b'=') else {
+ return Err(Error::TBD);
+ };
+ let (name, input) = input.split_at(equals);
+ let (mr_type, input) = MultipleResponseType::parse(input)?;
let Some(b' ') = input.get(0) else {
return Err(Error::TBD);
};
}
#[derive(Clone, Debug)]
-pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
+pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
impl ExtensionRecord for MultipleResponseRecord {
const SUBTYPE: u32 = 7;
impl From<Extension> for TextRecord {
fn from(source: Extension) -> Self {
- TextRecord { offset: source.offset, text: source.data.into() }
+ TextRecord {
+ offset: source.offset,
+ text: source.data.into(),
+ }
}
}