From 6cf498ea4c01dde65fd70ba7da04f319ca2158cb Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 17 Nov 2023 17:07:33 -0800 Subject: [PATCH] long string missing values --- rust/src/cooked.rs | 89 +++++++++++++++++++++++++++++++++++++++++++--- rust/src/raw.rs | 10 +++--- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index f3c6adbbc4..d00f3f3c34 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -5,7 +5,7 @@ use crate::{ endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, - raw::{self, MissingValues, UnencodedStr, VarType}, + raw::{self, UnencodedStr, VarType}, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::{DecoderResult, Encoding}; @@ -169,6 +169,7 @@ pub enum Record { VariableSets(VariableSetRecord), VarDisplay(VarDisplayRecord), MultipleResponse(MultipleResponseRecord), + LongStringMissingValues(LongStringMissingValuesRecord), LongStringValueLabels(LongStringValueLabelRecord), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), @@ -302,6 +303,11 @@ pub fn decode( output.push(Record::MultipleResponse(mrr)) } } + raw::Record::LongStringMissingValues(ref input) => { + if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::LongStringMissingValues(mrr)) + } + } raw::Record::LongStringValueLabels(ref input) => { if let Some(mrr) = LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)? @@ -580,6 +586,31 @@ pub struct VariableRecord { pub label: Option, } +#[derive(Clone, Debug)] +pub struct MissingValues { + /// Individual missing values, up to 3 of them. + pub values: Vec, + + /// Optional range of missing values. + pub range: Option<(Value, Value)>, +} + +impl Decode for MissingValues { + fn decode(decoder: &Decoder, input: &raw::MissingValues, _warn: impl Fn(Error)) -> Self { + MissingValues { + values: input + .values + .iter() + .map(|value| Value::decode(value, decoder)) + .collect(), + range: input + .range + .as_ref() + .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))), + } + } +} + fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec { UncheckedSpec::try_from(raw) .and_then(Spec::try_from) @@ -672,7 +703,7 @@ impl TryDecode for VariableRecord { name, print_format, write_format, - missing_values: input.missing_values.clone(), + missing_values: MissingValues::decode(decoder, &input.missing_values, warn), label, })) } @@ -746,7 +777,7 @@ pub enum Value { } impl Value { - pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self { + pub fn decode(raw: &raw::Value, decoder: &Decoder) -> Self { match raw { raw::Value::Number(x) => Value::Number(x.map(|x| x.into())), raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()), @@ -817,7 +848,7 @@ impl TryDecode for ValueLabelRecord { .map(|(value, label)| { let label = decoder.decode_string(&label.0, &warn); let value = Value::decode( - raw::Value::from_raw(*value, var_type, decoder.endian), + &raw::Value::from_raw(value, var_type, decoder.endian), decoder, ); ValueLabel { value, label } @@ -1305,6 +1336,56 @@ impl TryDecode for MultipleResponseRecord { } } +#[derive(Clone, Debug)] +pub struct LongStringMissingValues { + /// Variable name. + pub var_name: Identifier, + + /// Missing values. + pub missing_values: MissingValues, +} + +impl LongStringMissingValues { + fn decode( + decoder: &Decoder, + input: &raw::LongStringMissingValues, + warn: &impl Fn(Error), + ) -> Result { + let var_name = decoder.decode_string(&input.var_name.0, warn); + let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) + .map_err(Error::InvalidLongStringValueLabelName)?; + + let missing_values = MissingValues::decode(decoder, &input.missing_values, warn); + + Ok(LongStringMissingValues { + var_name, + missing_values + }) + } +} + +#[derive(Clone, Debug)] +pub struct LongStringMissingValuesRecord(Vec); + +impl TryDecode for LongStringMissingValuesRecord { + type Input = raw::LongStringMissingValueSet; + + fn try_decode( + decoder: &mut Decoder, + input: &Self::Input, + warn: impl Fn(Error), + ) -> Result, Error> { + let mut labels = Vec::with_capacity(input.0.len()); + for label in &input.0 { + match LongStringMissingValues::decode(decoder, label, &warn) { + Ok(set) => labels.push(set), + Err(error) => warn(error), + } + } + Ok(Some(LongStringMissingValuesRecord(labels))) + } +} + #[derive(Clone, Debug)] pub struct LongStringValueLabels { pub var_name: Identifier, diff --git a/rust/src/raw.rs b/rust/src/raw.rs index db508ae93f..8722febe88 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -543,13 +543,13 @@ impl Debug for Value { impl Value { fn read(r: &mut R, var_type: VarType, endian: Endian) -> Result { Ok(Self::from_raw( - UntypedValue(read_bytes(r)?), + &UntypedValue(read_bytes(r)?), var_type, endian, )) } - pub fn from_raw(raw: UntypedValue, var_type: VarType, endian: Endian) -> Value { + pub fn from_raw(raw: &UntypedValue, var_type: VarType, endian: Endian) -> Value { match var_type { VarType::String => Value::String(UnencodedStr(raw.0)), VarType::Numeric => { @@ -579,7 +579,7 @@ impl Value { }); } }; - values.push(Value::from_raw(UntypedValue(raw), var_type, endian)); + values.push(Value::from_raw(&UntypedValue(raw), var_type, endian)); } Ok(Some(values)) } @@ -630,7 +630,7 @@ impl Value { } } 253 => { - break Value::from_raw(UntypedValue(read_bytes(reader)?), var_type, endian) + break Value::from_raw(&UntypedValue(read_bytes(reader)?), var_type, endian) } 254 => match var_type { VarType::String => break Value::String(UnencodedStr(*b" ")), // XXX EBCDIC @@ -1410,7 +1410,7 @@ pub struct LongStringMissingValues { } #[derive(Clone, Debug)] -pub struct LongStringMissingValueSet(Vec); +pub struct LongStringMissingValueSet(pub Vec); impl ExtensionRecord for LongStringMissingValueSet { const SUBTYPE: u32 = 22; -- 2.30.2