+use core::str;
use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
use crate::{
- dictionary::{Dictionary, Value, VarWidth, Variable},
+ dictionary::{
+ Dictionary, MultipleResponseSet, MultipleResponseType, Value, VarWidth, Variable,
+ },
encoding::Error as EncodingError,
endian::Endian,
format::{Error as FormatError, Format, UncheckedFormat},
- identifier::{Error as IdError, Identifier},
+ identifier::{ByIdentifier, Error as IdError, Identifier},
raw::{
self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongNamesRecord,
LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord,
- NumberOfCasesRecord, ProductInfoRecord, RawStr, ValueLabel, ValueLabelRecord,
+ NumberOfCasesRecord, ProductInfoRecord, RawStr, RawWidth, ValueLabel, ValueLabelRecord,
VarDisplayRecord, VariableAttributeRecord, VariableRecord, VariableSetRecord,
VeryLongStringsRecord, ZHeader, ZTrailer,
},
.variable
.iter()
.enumerate()
- .filter(|(_index, record)| record.width != -1)
+ .filter(|(_index, record)| record.width != RawWidth::Continuation)
{
let name = trim_end_spaces(input.name.to_string());
let name = match Identifier::from_encoding(&name, encoding) {
);
// Check for long string continuation records.
- let n_values = input.n_values().unwrap();
+ let n_values = input.width.n_values().unwrap();
for offset in 1..n_values {
if headers
.variable
.get(index + offset)
- .is_none_or(|record| record.width != -1)
+ .is_none_or(|record| record.width != RawWidth::Continuation)
{
warn(Error::TBD);
break;
}
}
+ if let Some(display) = &headers.var_display {
+ for (index, display) in display.0.iter().enumerate() {
+ if let Some(variable) = dictionary.variables.get_index_mut2(index) {
+ if let Some(width) = display.width {
+ variable.display_width = width;
+ }
+ if let Some(alignment) = display.alignment {
+ variable.alignment = alignment;
+ }
+ if let Some(measure) = display.measure {
+ variable.measure = Some(measure);
+ }
+ } else {
+ warn(Error::TBD);
+ }
+ }
+ }
+
+ for record in headers
+ .multiple_response
+ .iter()
+ .flat_map(|record| record.0.iter())
+ {
+ match MultipleResponseSet::decode(&dictionary, record, &warn) {
+ Ok(mrset) => {
+ dictionary.mrsets.insert(ByIdentifier::new(mrset));
+ }
+ Err(error) => warn(error),
+ }
+ }
+
let metadata = Metadata::decode(&headers, warn);
Ok((dictionary, metadata))
}
+impl MultipleResponseSet {
+ fn decode(
+ dictionary: &Dictionary,
+ input: &raw::MultipleResponseSet<Identifier, String>,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ let mr_set_name = input.name.clone();
+ let mut variables = Vec::with_capacity(input.short_names.len());
+ for short_name in input.short_names.iter() {
+ let Some(dict_index) = dictionary.variables.get_index_of(&short_name.0) else {
+ warn(Error::UnknownMrSetVariable {
+ mr_set: mr_set_name.clone(),
+ short_name: short_name.clone(),
+ });
+ continue;
+ };
+ variables.push(dict_index);
+ }
+
+ match variables.len() {
+ 0 => return Err(Error::EmptyMrSet(mr_set_name)),
+ 1 => return Err(Error::OneVarMrSet(mr_set_name)),
+ _ => (),
+ }
+
+ let Some((Some(min_width), Some(max_width))) = variables
+ .iter()
+ .copied()
+ .map(|dict_index| dictionary.variables[dict_index].width)
+ .map(|w| (Some(w), Some(w)))
+ .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
+ else {
+ return Err(Error::MixedMrSet(mr_set_name));
+ };
+
+ let mr_type = MultipleResponseType::decode(&mr_set_name, &input.mr_type, min_width, warn)?;
+
+ Ok(MultipleResponseSet {
+ name: mr_set_name,
+ min_width,
+ max_width,
+ label: input.label.to_string(),
+ mr_type,
+ variables,
+ })
+ }
+}
+
fn trim_end_spaces(mut s: String) -> String {
s.truncate(s.trim_end_matches(' ').len());
s
})
}
+impl MultipleResponseType {
+ fn decode(
+ mr_set: &Identifier,
+ input: &raw::MultipleResponseType,
+ min_width: VarWidth,
+ warn: &impl Fn(Error),
+ ) -> Result<Self, Error> {
+ match input {
+ raw::MultipleResponseType::MultipleDichotomy { value, labels } => {
+ let value = match min_width {
+ VarWidth::Numeric => {
+ let string = String::from_utf8_lossy(&value.0);
+ let number: f64 = string.trim().parse().map_err(|_| {
+ Error::InvalidMDGroupCountedValue {
+ mr_set: mr_set.clone(),
+ number: string.into(),
+ }
+ })?;
+ Value::Number(Some(number))
+ }
+ VarWidth::String(max_width) => {
+ let mut value = value.0.as_slice();
+ while value.ends_with(b" ") {
+ value = &value[..value.len() - 1];
+ }
+ let width = value.len();
+ if width > max_width as usize {
+ return Err(Error::TooWideMDGroupCountedValue {
+ mr_set: mr_set.clone(),
+ value: String::from_utf8_lossy(value).into(),
+ width,
+ max_width,
+ });
+ };
+ Value::String(value.into())
+ }
+ };
+ Ok(MultipleResponseType::MultipleDichotomy {
+ value,
+ labels: *labels,
+ })
+ }
+ raw::MultipleResponseType::MultipleCategory => {
+ Ok(MultipleResponseType::MultipleCategory)
+ }
+ }
+ }
+}
+
/*
impl Decoder {
fn generate_name(&mut self) -> Identifier {
pub dict_indexes: Vec<DictIndex>,
}
-impl MultipleResponseSet {
- fn decode(
- decoder: &Decoder,
- input: &raw::MultipleResponseSet<Identifier, Cow<str>>,
- warn: &impl Fn(Error),
- ) -> Result<Self, Error> {
- let mr_set_name = input.name.clone();
- let mut dict_indexes = Vec::with_capacity(input.short_names.len());
- for short_name in input.short_names.iter() {
- let Some(&dict_index) = decoder.var_names.get(&short_name) else {
- warn(Error::UnknownMrSetVariable {
- mr_set: mr_set_name.clone(),
- short_name: short_name.clone(),
- });
- continue;
- };
- dict_indexes.push(dict_index);
- }
-
- match dict_indexes.len() {
- 0 => return Err(Error::EmptyMrSet(mr_set_name)),
- 1 => return Err(Error::OneVarMrSet(mr_set_name)),
- _ => (),
- }
-
- let Some((Some(min_width), Some(max_width))) = dict_indexes
- .iter()
- .map(|dict_index| decoder.variables[dict_index].width)
- .map(|w| (Some(w), Some(w)))
- .reduce(|(na, wa), (nb, wb)| (VarWidth::narrower(na, nb), VarWidth::wider(wa, wb)))
- else {
- return Err(Error::MixedMrSet(mr_set_name));
- };
-
- let mr_type =
- MultipleResponseType::decode(decoder, &mr_set_name, &input.mr_type, min_width, warn)?;
-
- Ok(MultipleResponseSet {
- name: mr_set_name,
- min_width,
- max_width,
- label: input.label.to_string(),
- mr_type,
- dict_indexes,
- })
- }
-}
#[derive(Clone, Debug)]
pub struct MultipleResponseRecord(pub Vec<MultipleResponseSet>);
use crate::{
format::Format,
identifier::{ByIdentifier, HasIdentifier, Identifier},
- raw::{Alignment, CategoryLabels, Measure, MissingValues, VarType},
+ raw::{Alignment, CategoryLabels, Measure, MissingValues, RawWidth, VarType},
};
pub type DictIndex = usize;
}
}
- pub fn from_raw(raw: impl Into<i32>) -> Result<Self, ()> {
- let raw: i32 = raw.into();
+ pub fn from_raw(raw: RawWidth) -> Result<Self, ()> {
match raw {
- 0 => Ok(Self::Numeric),
- 1..=255 => Ok(Self::String(raw as u16)),
- _ => Err(()),
+ RawWidth::Continuation => Err(()),
+ RawWidth::Numeric => Ok(Self::Numeric),
+ RawWidth::String(width) => Ok(Self::String(width.get() as u16)),
}
}
pub struct MultipleResponseSet {
pub name: Identifier,
pub label: String,
+ pub min_width: VarWidth,
+ pub max_width: VarWidth,
pub mr_type: MultipleResponseType,
pub variables: Vec<DictIndex>,
}
use std::{
borrow::Cow,
cell::RefCell,
- cmp::Ordering,
collections::{HashMap, VecDeque},
fmt::{Debug, Display, Formatter, Result as FmtResult},
io::{Error as IoError, Read, Seek, SeekFrom},
iter::repeat,
mem::take,
+ num::NonZeroU8,
ops::Range,
rc::Rc,
str::from_utf8,
}
}
+impl TryFrom<RawWidth> for VarType {
+ type Error = ();
+
+ fn try_from(value: RawWidth) -> Result<Self, Self::Error> {
+ match value {
+ RawWidth::Continuation => Err(()),
+ RawWidth::Numeric => Ok(VarType::Numeric),
+ RawWidth::String(_) => Ok(VarType::String),
+ }
+ }
+}
+
#[derive(Copy, Clone)]
pub enum Value<S>
where
) -> Result<Option<Vec<Self>>, Error> {
let case_start = reader.stream_position()?;
let mut values = Vec::with_capacity(var_types.n_values());
- for (i, (var_type, _)) in var_types.types.iter().enumerate() {
+ for (i, var_type) in var_types.iter().enumerate() {
let Some(raw) = try_read_bytes(reader)? else {
if i == 0 {
return Ok(None);
});
}
};
- values.push(Value::from_raw(&UntypedValue(raw), *var_type, endian));
+ values.push(Value::from_raw(&UntypedValue(raw), var_type, endian));
}
Ok(Some(values))
}
) -> Result<Option<Vec<Self>>, Error> {
let case_start = reader.stream_position()?;
let mut values = Vec::with_capacity(var_types.n_values());
- for (i, (var_type, _)) in var_types.types.iter().enumerate() {
+ for (i, var_type) in var_types.iter().enumerate() {
let value = loop {
let Some(code) = codes.pop_front() else {
let Some(new_codes): Option<[u8; 8]> = try_read_bytes(reader)? else {
};
match code {
0 => (),
- 1..=251 => match *var_type {
+ 1..=251 => match var_type {
VarType::Numeric => break Self::Number(Some(code as f64 - bias)),
VarType::String => {
break Self::String(RawStr(endian.to_bytes(code as f64 - bias)))
}
}
253 => {
- break Self::from_raw(&UntypedValue(read_bytes(reader)?), *var_type, endian)
+ break Self::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian)
}
- 254 => match *var_type {
+ 254 => match var_type {
VarType::String => break Self::String(RawStr(*b" ")), // XXX EBCDIC
VarType::Numeric => {
return Err(Error::CompressedStringExpected {
})
}
},
- 255 => match *var_type {
+ 255 => match var_type {
VarType::Numeric => break Self::Number(None),
VarType::String => {
return Err(Error::CompressedNumberExpected {
fn read<R: Read + Seek>(
r: &mut R,
offset: u64,
- width: i32,
+ width: RawWidth,
code: i32,
endian: Endian,
) -> Result<Self, Error> {
let (n_values, has_range) = match (width, code) {
(_, 0..=3) => (code, false),
- (0, -2) => (0, true),
- (0, -3) => (1, true),
- (0, _) => return Err(Error::BadNumericMissingValueCode { offset, code }),
+ (RawWidth::Numeric, -2) => (0, true),
+ (RawWidth::Numeric, -3) => (1, true),
+ (RawWidth::Numeric, _) => {
+ return Err(Error::BadNumericMissingValueCode { offset, code })
+ }
(_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
};
- let var_type = if width == 0 {
- VarType::Numeric
- } else {
- VarType::String
- };
+ let var_type = VarType::try_from(width).unwrap();
let mut values = Vec::new();
for _ in 0..n_values {
pub offsets: Range<u64>,
/// Variable width, in the range -1..=255.
- pub width: i32,
+ pub width: RawWidth,
/// Variable name, padded on the right with spaces.
pub name: S,
pub label: Option<S>,
}
-impl<S, V> VariableRecord<S, V>
-where
- S: Debug,
- V: Debug,
-{
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum RawWidth {
+ Continuation,
+ Numeric,
+ String(NonZeroU8),
+}
+
+impl RawWidth {
pub fn n_values(&self) -> Option<usize> {
- match self.width {
- 0 => Some(1),
- 1..=255 => Some((self.width as usize).div_ceil(8)),
+ match self {
+ RawWidth::Numeric => Some(1),
+ RawWidth::String(width) => Some((width.get() as usize).div_ceil(8)),
_ => None,
}
}
}
+impl TryFrom<i32> for RawWidth {
+ type Error = ();
+
+ fn try_from(value: i32) -> Result<Self, Self::Error> {
+ match value {
+ -1 => Ok(Self::Continuation),
+ 0 => Ok(Self::Numeric),
+ 1..=255 => Ok(Self::String(NonZeroU8::new(value as u8).unwrap())),
+ _ => Err(()),
+ }
+ }
+}
+
+impl Display for RawWidth {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ match self {
+ RawWidth::Continuation => write!(f, "long string continuation"),
+ RawWidth::Numeric => write!(f, "numeric"),
+ RawWidth::String(width) => write!(f, "{width}-byte string"),
+ }
+ }
+}
+
impl<S, V> Debug for VariableRecord<S, V>
where
S: Debug,
V: Debug,
{
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- writeln!(
- f,
- "Width: {} ({})",
- self.width,
- match self.width.cmp(&0) {
- Ordering::Greater => "string",
- Ordering::Equal => "numeric",
- Ordering::Less => "long string continuation record",
- }
- )?;
+ writeln!(f, "Width: {}", self.width,)?;
writeln!(f, "Print format: {:?}", self.print_format)?;
writeln!(f, "Write format: {:?}", self.write_format)?;
writeln!(f, "Name: {:?}", &self.name)?;
fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
let start_offset = r.stream_position()?;
let width: i32 = endian.parse(read_bytes(r)?);
- if !(-1..=255).contains(&width) {
- return Err(Error::BadVariableWidth {
- start_offset,
- width,
- });
- }
+ let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
+ start_offset,
+ width,
+ })?;
let code_offset = r.stream_position()?;
let has_variable_label: u32 = endian.parse(read_bytes(r)?);
let missing_value_code: i32 = endian.parse(read_bytes(r)?);
let Some(&first_index) = dict_indexes.first() else {
return Ok(None);
};
- let var_type = var_types.types[first_index as usize - 1].0;
+ let var_type = var_types.types[first_index as usize - 1].unwrap();
let mut wrong_type_indexes = Vec::new();
dict_indexes.retain(|&index| {
- if var_types.types[index as usize - 1].0 != var_type {
+ if var_types.types[index as usize - 1] != Some(var_type) {
wrong_type_indexes.push(index);
false
} else {
#[derive(Default)]
pub struct VarTypes {
- pub types: Vec<(VarType, usize)>,
+ pub types: Vec<Option<VarType>>,
}
impl VarTypes {
Self::default()
}
- pub fn push(&mut self, width: i32) {
- let var_type = match width {
- -1 => return,
- 0 => VarType::Numeric,
- 1..=255 => VarType::String,
- _ => unreachable!(),
- };
- let n_values = (width as usize).div_ceil(8).max(1);
- for i in 0..n_values {
- self.types.push((var_type, i));
+ pub fn push(&mut self, width: RawWidth) {
+ if let Ok(var_type) = VarType::try_from(width) {
+ self.types.push(Some(var_type));
+ for _ in 1..width.n_values().unwrap() {
+ self.types.push(None);
+ }
}
}
pub fn var_type_at(&self, index: usize) -> Option<VarType> {
if index >= 1 && index <= self.types.len() {
- if let (var_type, 0) = self.types[index - 1] {
- return Some(var_type);
- }
+ self.types[index - 1]
+ } else {
+ None
}
- None
+ }
+
+ pub fn iter(&self) -> impl Iterator<Item = VarType> + use<'_> {
+ self.types
+ .iter()
+ .map(|var_type| var_type.unwrap_or(VarType::String))
}
}