use crate::{
dictionary::{
Dictionary, MultipleResponseSet, MultipleResponseType, Value, VarWidth, Variable,
+ VariableSet,
},
encoding::Error as EncodingError,
endian::Endian,
raw::{
self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord,
FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord,
- LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord,
- NumberOfCasesRecord, ProductInfoRecord, RawStr, RawWidth, ValueLabel, ValueLabelRecord,
- VarDisplayRecord, VariableAttributeRecord, VariableRecord, VariableSetRecord,
- VeryLongStringsRecord, ZHeader, ZTrailer,
+ LongStringMissingValueRecord, LongStringValueLabelRecord, MissingValues,
+ MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStr, RawWidth,
+ ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord,
+ VariableSetRecord, VeryLongStringsRecord, ZHeader, ZTrailer,
},
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
#[derive(Clone, Debug)]
pub struct Headers {
pub header: HeaderRecord<String>,
- pub variable: Vec<VariableRecord<String, String>>,
+ pub variable: Vec<VariableRecord<String>>,
pub value_label: Vec<ValueLabelRecord<RawStr<8>, String>>,
pub document: Vec<DocumentRecord<String>>,
pub integer_info: Option<IntegerInfoRecord>,
pub var_display: Option<VarDisplayRecord>,
pub multiple_response: Vec<MultipleResponseRecord<Identifier, String>>,
pub long_string_value_labels: Vec<LongStringValueLabelRecord<Identifier, String>>,
- pub long_string_missing_values: Vec<LongStringMissingValueRecord<Identifier, String>>,
+ pub long_string_missing_values: Vec<LongStringMissingValueRecord<Identifier>>,
pub encoding: Option<EncodingRecord>,
pub number_of_cases: Option<NumberOfCasesRecord>,
pub variable_sets: Vec<VariableSetRecord>,
new_name
}
};
- let mut variable = Variable::new(name.clone(), VarWidth::from_raw(input.width).unwrap());
+ let mut variable = Variable::new(name.clone(), VarWidth::try_from(input.width).unwrap());
// Set the short name the same as the long name (even if we renamed it).
variable.short_names = vec![name];
for dict_index in dict_indexes {
let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
for ValueLabel { value, label } in record.labels.iter().cloned() {
- let value = match value {
- raw::Value::Number(number) => Value::Number(number.map(|n| n.into())),
- raw::Value::String(string) => {
- string.0[..variable.width.as_string_width().unwrap()].into()
- }
- };
+ let value = value.decode(variable.width);
variable.value_labels.insert(value, label);
}
}
for (mut value, label) in record.labels.into_iter() {
// XXX warn about too-long value?
value.0.resize(width, b' ');
- // XXX warn abouat duplicat value labels?
+ // XXX warn abouat duplicate value labels?
variable
.value_labels
.insert(Value::String(value.0.into_boxed_slice()), label);
}
}
+ let mut value = Vec::new();
+ for record in headers
+ .long_string_missing_values
+ .drain(..)
+ .flat_map(|record| record.0.into_iter())
+ {
+ let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+ warn(Error::TBD);
+ continue;
+ };
+ let values = record
+ .missing_values
+ .into_iter()
+ .map(|v| {
+ value.clear();
+ value.extend_from_slice(v.0.as_slice());
+ value.resize(variable.width.as_string_width().unwrap(), b' ');
+ Value::String(Box::from(value.as_slice()))
+ })
+ .collect::<Vec<_>>();
+ variable.missing_values = MissingValues {
+ values,
+ range: None,
+ };
+ }
+
+ for record in headers
+ .variable_sets
+ .drain(..)
+ .flat_map(|record| record.sets.into_iter())
+ {
+ let mut variables = Vec::with_capacity(record.variable_names.len());
+ for variable_name in record.variable_names {
+ let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else {
+ warn(Error::TBD);
+ continue;
+ };
+ variables.push(dict_index);
+ }
+ if !variables.is_empty() {
+ let variable_set = VariableSet {
+ name: record.name,
+ variables,
+ };
+ dictionary
+ .variable_sets
+ .insert(ByIdentifier::new(variable_set));
+ }
+ }
+
let metadata = Metadata::decode(&headers, warn);
Ok((dictionary, metadata))
}
use crate::{
- dictionary::{Attributes, VarWidth},
+ dictionary::{Attributes, Value, VarWidth},
encoding::{default_encoding, get_encoding, Error as EncodingError},
endian::{Endian, Parse, ToBytes},
identifier::{Error as IdError, Identifier},
code: u32,
},
+ #[error("At offset {offset:#x}, missing value code ({code}) is not -3, -2, 0, 1, 2, or 3.")]
+ BadMissingValueCode { offset: u64, code: i32 },
+
#[error(
"At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
)]
#[derive(Clone, Debug)]
pub enum Record {
Header(HeaderRecord<RawString>),
- Variable(VariableRecord<RawString, RawStr<8>>),
+ Variable(VariableRecord<RawString>),
ValueLabel(ValueLabelRecord<RawStr<8>, RawString>),
Document(DocumentRecord<RawDocumentLine>),
IntegerInfo(IntegerInfoRecord),
VarDisplay(VarDisplayRecord),
MultipleResponse(MultipleResponseRecord<RawString, RawString>),
LongStringValueLabels(LongStringValueLabelRecord<RawString, RawString>),
- LongStringMissingValues(LongStringMissingValueRecord<RawString, RawStr<8>>),
+ LongStringMissingValues(LongStringMissingValueRecord<RawString>),
Encoding(EncodingRecord),
NumberOfCases(NumberOfCasesRecord),
Text(TextRecord),
#[derive(Clone, Debug)]
pub enum DecodedRecord {
Header(HeaderRecord<String>),
- Variable(VariableRecord<String, String>),
+ Variable(VariableRecord<String>),
ValueLabel(ValueLabelRecord<RawStr<8>, String>),
Document(DocumentRecord<String>),
IntegerInfo(IntegerInfoRecord),
VarDisplay(VarDisplayRecord),
MultipleResponse(MultipleResponseRecord<Identifier, String>),
LongStringValueLabels(LongStringValueLabelRecord<Identifier, String>),
- LongStringMissingValues(LongStringMissingValueRecord<Identifier, String>),
+ LongStringMissingValues(LongStringMissingValueRecord<Identifier>),
Encoding(EncodingRecord),
NumberOfCases(NumberOfCasesRecord),
VariableSets(VariableSetRecord),
{
let rec_type: u32 = endian.parse(read_bytes(reader)?);
match rec_type {
- 2 => Ok(Some(VariableRecord::read(reader, endian)?)),
+ 2 => Ok(Some(VariableRecord::read(reader, endian, warn)?)),
3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
7 => Extension::read(reader, endian, var_types.n_values(), warn),
}
impl VarType {
- pub fn from_width(width: VarWidth) -> VarType {
- match width {
- VarWidth::Numeric => Self::Numeric,
- VarWidth::String(_) => Self::String,
- }
- }
-
pub fn opposite(self) -> VarType {
match self {
Self::Numeric => Self::String,
}
}
-#[derive(Copy, Clone)]
-pub enum Value<S>
-where
- S: Debug,
-{
- Number(Option<f64>),
- String(S),
-}
-
-type RawValue = Value<RawStr<8>>;
+impl TryFrom<RawWidth> for VarWidth {
+ type Error = ();
-impl<S> Debug for Value<S>
-where
- S: Debug,
-{
- fn fmt(&self, f: &mut Formatter) -> FmtResult {
- match self {
- Value::Number(Some(number)) => write!(f, "{number:?}"),
- Value::Number(None) => write!(f, "SYSMIS"),
- Value::String(s) => write!(f, "{:?}", s),
+ fn try_from(value: RawWidth) -> Result<Self, Self::Error> {
+ match value {
+ RawWidth::Continuation => Err(()),
+ RawWidth::Numeric => Ok(Self::Numeric),
+ RawWidth::String(width) => Ok(Self::String(width.get() as u16)),
}
}
}
+type RawValue = Value<RawStr<8>>;
+
impl RawValue {
fn read<R: Read>(r: &mut R, var_type: VarType, endian: Endian) -> Result<Self, IoError> {
Ok(Self::from_raw(
pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Self {
match var_type {
VarType::String => Value::String(RawStr(raw.0)),
- VarType::Numeric => {
- let number: f64 = endian.parse(raw.0);
- Value::Number((number != -f64::MAX).then_some(number))
- }
+ VarType::Numeric => Value::Number(endian.parse(raw.0)),
}
}
Ok(Some(values))
}
- pub fn decode(self, decoder: &Decoder) -> Value<String> {
+ pub fn decode(&self, width: VarWidth) -> Value {
match self {
- Self::Number(x) => Value::Number(x),
- Self::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()),
+ Self::Number(x) => Value::Number(*x),
+ Self::String(s) => {
+ let width = width.as_string_width().unwrap();
+ Value::String(Box::from(&s.0[..width]))
+ }
}
}
}
}
#[derive(Clone)]
-pub struct MissingValues<S = String>
+pub struct MissingValues<S = Box<[u8]>>
where
S: Debug,
{
}
}
-impl MissingValues<RawStr<8>> {
+impl MissingValues {
fn read<R: Read + Seek>(
r: &mut R,
offset: u64,
width: RawWidth,
code: i32,
endian: Endian,
+ warn: &dyn Fn(Warning),
) -> Result<Self, Error> {
- let (n_values, has_range) = match (width, code) {
- (_, 0..=3) => (code, false),
- (RawWidth::Numeric, -2) => (0, true),
- (RawWidth::Numeric, -3) => (1, true),
- (RawWidth::Numeric, _) => {
- return Err(Error::BadNumericMissingValueCode { offset, code })
- }
- (_, _) => return Err(Error::BadStringMissingValueCode { offset, code }),
+ let (individual_values, has_range) = match code {
+ 0..=3 => (code as usize, false),
+ -2 => (0, true),
+ -3 => (1, true),
+ _ => return Err(Error::BadMissingValueCode { offset, code }),
};
- let var_type = VarType::try_from(width).unwrap();
-
- let mut values = Vec::new();
- for _ in 0..n_values {
- values.push(RawValue::read(r, var_type, endian)?);
+ let mut values = Vec::with_capacity(individual_values);
+ for _ in 0..individual_values {
+ values.push(read_bytes::<8, _>(r)?);
}
let range = if has_range {
- let low = RawValue::read(r, var_type, endian)?;
- let high = RawValue::read(r, var_type, endian)?;
+ let low = read_bytes::<8, _>(r)?;
+ let high = read_bytes::<8, _>(r)?;
Some((low, high))
} else {
None
};
- Ok(Self { values, range })
- }
- fn decode(&self, decoder: &Decoder) -> MissingValues<String> {
- MissingValues {
- values: self
- .values
- .iter()
- .map(|value| value.decode(decoder))
- .collect(),
- range: self
- .range
- .as_ref()
- .map(|(low, high)| (low.decode(decoder), high.decode(decoder))),
+
+ match VarWidth::try_from(width) {
+ Ok(VarWidth::Numeric) => {
+ let values = values
+ .into_iter()
+ .map(|v| Value::Number(endian.parse(v)))
+ .collect();
+ let range = range.map(|(low, high)| {
+ (
+ Value::Number(endian.parse(low)),
+ Value::Number(endian.parse(high)),
+ )
+ });
+ return Ok(Self { values, range });
+ }
+ Ok(VarWidth::String(width)) if width <= 8 && range.is_none() => {
+ let values = values
+ .into_iter()
+ .map(|value| Value::String(Box::from(&value[..width as usize])))
+ .collect();
+ return Ok(Self {
+ values,
+ range: None,
+ });
+ }
+ Ok(VarWidth::String(width)) if width > 8 => warn(Warning::TBD),
+ Ok(VarWidth::String(_)) => warn(Warning::TBD),
+ Err(()) => warn(Warning::TBD),
}
+ Ok(Self::default())
}
}
#[derive(Clone)]
-pub struct VariableRecord<S, V>
+pub struct VariableRecord<S>
where
S: Debug,
- V: Debug,
{
/// Range of offsets in file.
pub offsets: Range<u64>,
pub write_format: Spec,
/// Missing values.
- pub missing_values: MissingValues<V>,
+ pub missing_values: MissingValues,
/// Optional variable label.
pub label: Option<S>,
}
}
-impl<S, V> Debug for VariableRecord<S, V>
+impl<S> Debug for VariableRecord<S>
where
S: Debug,
- V: Debug,
{
fn fmt(&self, f: &mut Formatter) -> FmtResult {
writeln!(f, "Width: {}", self.width,)?;
}
}
-impl VariableRecord<RawString, RawStr<8>> {
- fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<Record, Error> {
+impl VariableRecord<RawString> {
+ fn read<R: Read + Seek>(
+ r: &mut R,
+ endian: Endian,
+ warn: &dyn Fn(Warning),
+ ) -> Result<Record, Error> {
let start_offset = r.stream_position()?;
let width: i32 = endian.parse(read_bytes(r)?);
let width: RawWidth = width.try_into().map_err(|_| Error::BadVariableWidth {
};
let missing_values =
- MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
+ MissingValues::read(r, start_offset, width, missing_value_code, endian, warn)?;
let end_offset = r.stream_position()?;
name: decoder.decode(&self.name).to_string(),
print_format: self.print_format,
write_format: self.write_format,
- missing_values: self.missing_values.decode(decoder),
+ missing_values: self.missing_values,
label: self
.label
.as_ref()
.labels
.iter()
.map(|ValueLabel { value, label }| ValueLabel {
- value: *value,
+ value: value.clone(),
label: decoder.decode(label).to_string(),
})
.collect();
}
#[derive(Clone, Debug)]
-pub struct LongStringMissingValues<N, V>
+pub struct LongStringMissingValues<N>
where
N: Debug,
- V: Debug,
{
/// Variable name.
pub var_name: N,
/// Missing values.
- pub missing_values: MissingValues<V>,
+ pub missing_values: Vec<RawStr<8>>,
}
-impl LongStringMissingValues<RawString, RawStr<8>> {
- fn decode(
- &self,
- decoder: &Decoder,
- ) -> Result<LongStringMissingValues<Identifier, String>, IdError> {
+impl LongStringMissingValues<RawString> {
+ fn decode(&self, decoder: &Decoder) -> Result<LongStringMissingValues<Identifier>, IdError> {
Ok(LongStringMissingValues {
var_name: decoder.decode_identifier(&self.var_name)?,
- missing_values: self.missing_values.decode(decoder),
+ missing_values: self.missing_values.clone(),
})
}
}
#[derive(Clone, Debug)]
-pub struct LongStringMissingValueRecord<N, V>(pub Vec<LongStringMissingValues<N, V>>)
+pub struct LongStringMissingValueRecord<N>(pub Vec<LongStringMissingValues<N>>)
where
- N: Debug,
- V: Debug;
+ N: Debug;
-impl ExtensionRecord for LongStringMissingValueRecord<RawString, RawStr<8>> {
+impl ExtensionRecord for LongStringMissingValueRecord<RawString> {
const SUBTYPE: u32 = 22;
const SIZE: Option<u32> = Some(1);
const COUNT: Option<u32> = None;
value_len,
});
}
- let mut values = Vec::new();
+ let mut missing_values = Vec::new();
for i in 0..n_missing_values {
let value: [u8; 8] = read_bytes(&mut input)?;
let numeric_value: u64 = endian.parse(value);
} else {
value
};
- values.push(Value::String(RawStr(value)));
+ missing_values.push(RawStr(value));
}
- let missing_values = MissingValues {
- values,
- range: None,
- };
missing_value_set.push(LongStringMissingValues {
var_name,
missing_values,
}
}
-impl LongStringMissingValueRecord<RawString, RawStr<8>> {
- pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier, String> {
+impl LongStringMissingValueRecord<RawString> {
+ pub fn decode(self, decoder: &Decoder) -> LongStringMissingValueRecord<Identifier> {
let mut mvs = Vec::with_capacity(self.0.len());
for mv in self.0.iter() {
if let Some(mv) = mv
}
#[derive(Clone, Debug)]
pub struct VariableSet {
- pub name: String,
- pub vars: Vec<Identifier>,
+ pub name: Identifier,
+ pub variable_names: Vec<Identifier>,
}
impl VariableSet {
fn parse(input: &str, decoder: &Decoder) -> Result<Self, Warning> {
let (name, input) = input.split_once('=').ok_or(Warning::TBD)?;
+ let name = decoder.new_identifier(name).map_err(|_| Warning::TBD)?;
let mut vars = Vec::new();
for var in input.split_ascii_whitespace() {
if let Some(identifier) = decoder
}
}
Ok(VariableSet {
- name: name.into(),
- vars,
+ name,
+ variable_names: vars,
})
}
}