X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=rust%2Fsrc%2Fcooked.rs;h=d00f3f3c34f7a6087570c6315e98b0539f82c2ab;hb=e0cbdf0daefcca81be9572aab0deedf945687f5a;hp=2e67965e41ff730f7fed916b22cc9d3f31cedc1a;hpb=a8331d2f67af24ce1f9f5da99641b8d1cdc21300;p=pspp diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 2e67965e41..d00f3f3c34 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -1,11 +1,11 @@ -use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat}; +use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range}; use crate::{ encoding::{default_encoding, get_encoding, Error as EncodingError}, endian::Endian, format::{Error as FormatError, Spec, UncheckedSpec}, identifier::{Error as IdError, Identifier}, - raw::{self, MissingValues, UnencodedStr, VarType}, + raw::{self, UnencodedStr, VarType}, }; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use encoding_rs::{DecoderResult, Encoding}; @@ -28,8 +28,8 @@ pub enum Error { #[error("Using default encoding {0}.")] UsingDefaultEncoding(String), - #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] - InvalidVariableWidth { offset: u64, width: i32 }, + #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)] + InvalidVariableWidth { offsets: Range, width: i32 }, #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")] InvalidLongMissingValueFormat, @@ -169,6 +169,7 @@ pub enum Record { VariableSets(VariableSetRecord), VarDisplay(VarDisplayRecord), MultipleResponse(MultipleResponseRecord), + LongStringMissingValues(LongStringMissingValuesRecord), LongStringValueLabels(LongStringValueLabelRecord), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), @@ -302,6 +303,11 @@ pub fn decode( output.push(Record::MultipleResponse(mrr)) } } + raw::Record::LongStringMissingValues(ref input) => { + if let Some(mrr) = LongStringMissingValuesRecord::try_decode(&mut decoder, input, warn)? { + output.push(Record::LongStringMissingValues(mrr)) + } + } raw::Record::LongStringValueLabels(ref input) => { if let Some(mrr) = LongStringValueLabelRecord::try_decode(&mut decoder, input, warn)? @@ -328,9 +334,7 @@ pub fn decode( raw::Record::VeryLongStrings(ref input) => { let s = decoder.decode_string_cow(&input.text.0, warn); output.push(Record::VeryLongStrings(VeryLongStringRecord::parse( - &mut decoder, - &s, - warn, + &decoder, &s, warn, )?)); } raw::Record::FileAttributes(ref input) => { @@ -412,7 +416,7 @@ impl Decoder { fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { if let (s, false) = self.encoding.decode_without_bom_handling(input) { // This is the common case. Usually there will be no errors. - s.into() + s } else { // Unusual case. Don't bother to optimize it much. let mut decoder = self.encoding.new_decoder_without_bom_handling(); @@ -582,6 +586,31 @@ pub struct VariableRecord { pub label: Option, } +#[derive(Clone, Debug)] +pub struct MissingValues { + /// Individual missing values, up to 3 of them. + pub values: Vec, + + /// Optional range of missing values. + pub range: Option<(Value, Value)>, +} + +impl Decode for MissingValues { + fn decode(decoder: &Decoder, input: &raw::MissingValues, _warn: impl Fn(Error)) -> Self { + MissingValues { + values: input + .values + .iter() + .map(|value| Value::decode(value, decoder)) + .collect(), + range: input + .range + .as_ref() + .map(|(low, high)| (Value::decode(low, decoder), Value::decode(high, decoder))), + } + } +} + fn decode_format(raw: raw::Spec, width: VarWidth, warn: impl Fn(Spec, FormatError)) -> Spec { UncheckedSpec::try_from(raw) .and_then(Spec::try_from) @@ -607,7 +636,7 @@ impl TryDecode for VariableRecord { -1 => return Ok(None), _ => { return Err(Error::InvalidVariableWidth { - offset: input.offset, + offsets: input.offsets.clone(), width: input.width, }) } @@ -674,7 +703,7 @@ impl TryDecode for VariableRecord { name, print_format, write_format, - missing_values: input.missing_values.clone(), + missing_values: MissingValues::decode(decoder, &input.missing_values, warn), label, })) } @@ -748,7 +777,7 @@ pub enum Value { } impl Value { - pub fn decode(raw: raw::Value, decoder: &Decoder) -> Self { + pub fn decode(raw: &raw::Value, decoder: &Decoder) -> Self { match raw { raw::Value::Number(x) => Value::Number(x.map(|x| x.into())), raw::Value::String(s) => Value::String(decoder.decode_exact_length(&s.0).into()), @@ -819,8 +848,8 @@ impl TryDecode for ValueLabelRecord { .map(|(value, label)| { let label = decoder.decode_string(&label.0, &warn); let value = Value::decode( - raw::Value::from_raw(*value, var_type, decoder.endian), - &decoder, + &raw::Value::from_raw(value, var_type, decoder.endian), + decoder, ); ValueLabel { value, label } }) @@ -871,10 +900,10 @@ pub struct LongName { impl LongName { fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result { - let short_name = Identifier::new(short_name, decoder.encoding) - .map_err(|e| Error::InvalidShortName(e))?; + let short_name = + Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?; let long_name = - Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?; + Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?; Ok(LongName { short_name, long_name, @@ -914,16 +943,13 @@ impl VeryLongString { let Some((short_name, length)) = input.split_once('=') else { return Err(Error::TBD); }; - let short_name = Identifier::new(short_name, decoder.encoding) - .map_err(|e| Error::InvalidLongStringName(e))?; + let short_name = + Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?; let length: u16 = length.parse().map_err(|_| Error::TBD)?; if length > VarWidth::MAX_STRING { return Err(Error::TBD); } - Ok(VeryLongString { - short_name: short_name.into(), - length, - }) + Ok(VeryLongString { short_name, length }) } } @@ -977,7 +1003,7 @@ impl Attribute { } if let Some(rest) = rest.strip_prefix(')') { let attribute = Identifier::new(name, decoder.encoding) - .map_err(|e| Error::InvalidAttributeName(e)) + .map_err(Error::InvalidAttributeName) .warn_on_error(warn) .map(|name| Attribute { name, values }); return Ok((attribute, rest)); @@ -1045,7 +1071,7 @@ impl VarAttributeSet { }; let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?; let var_attribute = Identifier::new(long_var_name, decoder.encoding) - .map_err(|e| Error::InvalidAttributeVariableName(e)) + .map_err(Error::InvalidAttributeVariableName) .warn_on_error(warn) .map(|name| VarAttributeSet { long_var_name: name, @@ -1236,7 +1262,7 @@ impl MultipleResponseSet { ) -> Result { let mr_set_name = decoder .decode_identifier(&input.name.0, warn) - .map_err(|error| Error::InvalidMrSetName(error))?; + .map_err(Error::InvalidMrSetName)?; let label = decoder.decode_string(&input.label.0, warn); @@ -1310,6 +1336,56 @@ impl TryDecode for MultipleResponseRecord { } } +#[derive(Clone, Debug)] +pub struct LongStringMissingValues { + /// Variable name. + pub var_name: Identifier, + + /// Missing values. + pub missing_values: MissingValues, +} + +impl LongStringMissingValues { + fn decode( + decoder: &Decoder, + input: &raw::LongStringMissingValues, + warn: &impl Fn(Error), + ) -> Result { + let var_name = decoder.decode_string(&input.var_name.0, warn); + let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) + .map_err(Error::InvalidLongStringValueLabelName)?; + + let missing_values = MissingValues::decode(decoder, &input.missing_values, warn); + + Ok(LongStringMissingValues { + var_name, + missing_values + }) + } +} + +#[derive(Clone, Debug)] +pub struct LongStringMissingValuesRecord(Vec); + +impl TryDecode for LongStringMissingValuesRecord { + type Input = raw::LongStringMissingValueSet; + + fn try_decode( + decoder: &mut Decoder, + input: &Self::Input, + warn: impl Fn(Error), + ) -> Result, Error> { + let mut labels = Vec::with_capacity(input.0.len()); + for label in &input.0 { + match LongStringMissingValues::decode(decoder, label, &warn) { + Ok(set) => labels.push(set), + Err(error) => warn(error), + } + } + Ok(Some(LongStringMissingValuesRecord(labels))) + } +} + #[derive(Clone, Debug)] pub struct LongStringValueLabels { pub var_name: Identifier, @@ -1325,13 +1401,13 @@ impl LongStringValueLabels { ) -> Result { let var_name = decoder.decode_string(&input.var_name.0, warn); let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) - .map_err(|e| Error::InvalidLongStringValueLabelName(e))?; + .map_err(Error::InvalidLongStringValueLabelName)?; let min_width = 9; let max_width = VarWidth::MAX_STRING; if input.width < 9 || input.width > max_width as u32 { return Err(Error::InvalidLongValueLabelWidth { - name: var_name.into(), + name: var_name, width: input.width, min_width, max_width,