From 919a00e01c2248b5a8ace12a5b5d8ab67462f0ea Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 25 Dec 2023 09:54:03 -0800 Subject: [PATCH] work --- rust/src/cooked.rs | 14 +- rust/src/raw.rs | 344 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 321 insertions(+), 37 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index c6eabecdbf..4c0135b949 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -215,8 +215,8 @@ struct Headers<'a> { variable_sets: Vec<&'a raw::TextRecord>, var_display: Option<&'a raw::VarDisplayRecord>, multiple_response: Vec<&'a raw::MultipleResponseRecord>, - long_string_value_labels: Vec<&'a raw::LongStringValueLabelRecord>, - long_string_missing_values: Vec<&'a raw::LongStringMissingValueRecord>, + long_string_value_labels: Vec<&'a raw::LongStringValueLabelRecord>, + long_string_missing_values: Vec<&'a raw::LongStringMissingValueRecord>>, encoding: Option<&'a raw::EncodingRecord>, number_of_cases: Option<&'a raw::NumberOfCasesRecord>, product_info: Option<&'a raw::TextRecord>, @@ -239,6 +239,7 @@ fn set_or_warn(option: &mut Option, value: T, warn: &impl Fn(Error)) { impl<'a> Headers<'a> { fn new(headers: &'a Vec, warn: &impl Fn(Error)) -> Headers<'a> { let mut h = Headers::default(); +/* for header in headers { match header { raw::Record::Header(record) => set_or_warn(&mut h.header, record, warn), @@ -272,6 +273,7 @@ impl<'a> Headers<'a> { raw::Record::Cases(record) => set_or_warn(&mut h.cases, record, warn), } } +*/ h } } @@ -1316,7 +1318,7 @@ pub struct LongStringMissingValues { impl LongStringMissingValues { fn decode( decoder: &Decoder, - input: &raw::LongStringMissingValues, + input: &raw::LongStringMissingValues>, warn: &impl Fn(Error), ) -> Result { let var_name = decoder.decode_string(&input.var_name.0, warn); @@ -1336,7 +1338,7 @@ impl LongStringMissingValues { pub struct LongStringMissingValuesRecord(Vec); impl TryDecode for LongStringMissingValuesRecord { - type Input<'a> = raw::LongStringMissingValueRecord; + type Input<'a> = raw::LongStringMissingValueRecord>; fn try_decode( decoder: &mut Decoder, @@ -1364,7 +1366,7 @@ pub struct LongStringValueLabels { impl LongStringValueLabels { fn decode( decoder: &Decoder, - input: &raw::LongStringValueLabels, + input: &raw::LongStringValueLabels, warn: &impl Fn(Error), ) -> Result { let var_name = decoder.decode_string(&input.var_name.0, warn); @@ -1402,7 +1404,7 @@ impl LongStringValueLabels { pub struct LongStringValueLabelRecord(pub Vec); impl TryDecode for LongStringValueLabelRecord { - type Input<'a> = raw::LongStringValueLabelRecord; + type Input<'a> = raw::LongStringValueLabelRecord; fn try_decode( decoder: &mut Decoder, diff --git a/rust/src/raw.rs b/rust/src/raw.rs index 85312ef4c9..4ab11c3043 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -170,18 +170,19 @@ pub enum Record { Document(DocumentRecord), IntegerInfo(IntegerInfoRecord), FloatInfo(FloatInfoRecord), - VariableSets(TextRecord), + VariableSets(VariableSetRecord), VarDisplay(VarDisplayRecord), MultipleResponse(MultipleResponseRecord), - LongStringValueLabels(LongStringValueLabelRecord), - LongStringMissingValues(LongStringMissingValueRecord), + LongStringValueLabels(LongStringValueLabelRecord), + LongStringMissingValues(LongStringMissingValueRecord>), Encoding(EncodingRecord), NumberOfCases(NumberOfCasesRecord), - ProductInfo(TextRecord), - LongNames(TextRecord), - VeryLongStrings(TextRecord), - FileAttributes(TextRecord), - VariableAttributes(TextRecord), + ProductInfo(ProductInfoRecord), + LongNames(LongNamesRecord), + VeryLongStrings(VeryLongStringsRecord), + FileAttributes(FileAttributeRecord), + VariableAttributes(VariableAttributeRecord), + Text(TextRecord), OtherExtension(Extension), EndOfHeaders(u32), ZHeader(ZHeader), @@ -399,10 +400,13 @@ struct Decoder { } impl Decoder { + fn warn(&self, error: Error) { + (self.warn)(error) + } fn decode_slice<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { let (output, malformed) = self.encoding.decode_without_bom_handling(input); if malformed { - (self.warn)(Error::MalformedString { + self.warn(Error::MalformedString { encoding: self.encoding.name().into(), text: output.clone().into(), }); @@ -1817,18 +1821,34 @@ impl VarDisplayRecord { } #[derive(Clone, Debug)] -pub struct LongStringMissingValues { +pub struct LongStringMissingValues +where + N: Debug, + V: Debug, +{ /// Variable name. - pub var_name: RawString, + pub var_name: N, /// Missing values. - pub missing_values: MissingValues>, + pub missing_values: MissingValues, +} + +impl LongStringMissingValues> { + fn decode<'a>(&self, decoder: &Decoder) -> LongStringMissingValues { + LongStringMissingValues { + var_name: decoder.decode(&self.var_name).to_string(), + missing_values: self.missing_values.decode(decoder), + } + } } #[derive(Clone, Debug)] -pub struct LongStringMissingValueRecord(pub Vec); +pub struct LongStringMissingValueRecord(pub Vec>) +where + N: Debug, + V: Debug; -impl ExtensionRecord for LongStringMissingValueRecord { +impl ExtensionRecord for LongStringMissingValueRecord> { const SUBTYPE: u32 = 22; const SIZE: Option = Some(1); const COUNT: Option = None; @@ -1880,6 +1900,12 @@ impl ExtensionRecord for LongStringMissingValueRecord { } } +impl LongStringMissingValueRecord> { + fn decode<'a>(&self, decoder: &Decoder) -> LongStringMissingValueRecord { + LongStringMissingValueRecord(self.0.iter().map(|mv| mv.decode(decoder)).collect()) + } +} + #[derive(Clone, Debug)] pub struct EncodingRecord(pub String); @@ -1930,19 +1956,252 @@ impl ExtensionRecord for NumberOfCasesRecord { pub struct TextRecord { pub offsets: Range, + /// Type of record. + pub rec_type: TextRecordType, + /// The text content of the record. pub text: RawString, } -impl From for TextRecord { - fn from(source: Extension) -> Self { - TextRecord { - offsets: source.offsets, - text: source.data.into(), +#[derive(Clone, Copy, Debug)] +pub enum TextRecordType { + VariableSets, + ProductInfo, + LongNames, + VeryLongStrings, + FileAttributes, + VariableAttributes, +} + +impl TextRecord { + fn new(extension: Extension, rec_type: TextRecordType) -> Self { + Self { + offsets: extension.offsets, + rec_type, + text: extension.data.into(), + } + } + fn decode<'a>(&self, decoder: &Decoder) -> Result, Error> { + match self.rec_type { + TextRecordType::VariableSets => Ok(Some(Record::VariableSets( + VariableSetRecord::decode(self, decoder), + ))), + TextRecordType::ProductInfo => Ok(Some(Record::ProductInfo( + ProductInfoRecord::decode(self, decoder), + ))), + TextRecordType::LongNames => Ok(Some(Record::LongNames(LongNamesRecord::decode( + self, decoder, + )))), + TextRecordType::VeryLongStrings => Ok(Some(Record::VeryLongStrings( + VeryLongStringsRecord::decode(self, decoder), + ))), + TextRecordType::FileAttributes => { + Ok(FileAttributeRecord::decode(self, decoder).map(|fa| Record::FileAttributes(fa))) + } + TextRecordType::VariableAttributes => { + Ok(Some(Record::VariableAttributes( +VariableAttributeRecord::decode(self, decoder)))) + } + } + } +} + +#[derive(Clone, Debug)] +pub struct VeryLongString { + pub short_name: String, + pub length: u16, +} + +impl VeryLongString { + fn parse(decoder: &Decoder, input: &str) -> Result { + let Some((short_name, length)) = input.split_once('=') else { + return Err(Error::TBD); + }; + let length = length.parse().map_err(|_| Error::TBD)?; + Ok(VeryLongString { + short_name: short_name.into(), + length, + }) + } +} + +#[derive(Clone, Debug)] +pub struct Attribute { + pub name: String, + pub values: Vec, +} + +impl Attribute { + fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(Attribute, &'a str), Error> { + let Some((name, mut input)) = input.split_once('(') else { + return Err(Error::TBD); + }; + let mut values = Vec::new(); + loop { + let Some((value, rest)) = input.split_once('\n') else { + return Err(Error::TBD); + }; + if let Some(stripped) = value + .strip_prefix('\'') + .and_then(|value| value.strip_suffix('\'')) + { + values.push(stripped.into()); + } else { + decoder.warn(Error::TBD); + values.push(value.into()); + } + if let Some(rest) = rest.strip_prefix(')') { + let attribute = Attribute { + name: name.into(), + values, + }; + return Ok((attribute, rest)); + }; + input = rest; + } + } +} + +#[derive(Clone, Debug)] +pub struct AttributeSet(pub Vec); + +impl AttributeSet { + fn parse<'a>( + decoder: &Decoder, + mut input: &'a str, + sentinel: Option, + ) -> Result<(AttributeSet, &'a str), Error> { + let mut attributes = Vec::new(); + let rest = loop { + match input.chars().next() { + None => break input, + c if c == sentinel => break &input[1..], + _ => { + let (attribute, rest) = Attribute::parse(decoder, input)?; + attributes.push(attribute); + input = rest; + } + } + }; + Ok((AttributeSet(attributes), rest)) + } +} + +#[derive(Clone, Debug)] +pub struct FileAttributeRecord(AttributeSet); + +impl FileAttributeRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Option { + let input = decoder.decode(&source.text); + match AttributeSet::parse(decoder, &input, None).warn_on_error(&decoder.warn) { + Some((set, rest)) => { + if !rest.is_empty() { + decoder.warn(Error::TBD); + } + Some(FileAttributeRecord(set)) + } + None => None, } } } +#[derive(Clone, Debug)] +pub struct VarAttributeSet { + pub long_var_name: String, + pub attributes: AttributeSet, +} + +impl VarAttributeSet { + fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Error> { + let Some((long_var_name, rest)) = input.split_once(':') else { + return Err(Error::TBD); + }; + let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?; + let var_attribute = VarAttributeSet { + long_var_name: long_var_name.into(), + attributes, + }; + Ok((var_attribute, rest)) + } +} + +#[derive(Clone, Debug)] +pub struct VariableAttributeRecord(Vec); + +impl VariableAttributeRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let decoded = decoder.decode(&source.text); + let mut input = decoded.as_ref(); + let mut var_attribute_sets = Vec::new(); + while !input.is_empty() { + let Some((var_attribute, rest)) = + VarAttributeSet::parse(decoder, &input).warn_on_error(&decoder.warn) + else { + break; + }; + var_attribute_sets.push(var_attribute); + input = rest.into(); + } + VariableAttributeRecord(var_attribute_sets) + } +} + +#[derive(Clone, Debug)] +pub struct VeryLongStringsRecord(Vec); + +impl VeryLongStringsRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let input = decoder.decode(&source.text); + let mut very_long_strings = Vec::new(); + for tuple in input + .split('\0') + .map(|s| s.trim_end_matches('\t')) + .filter(|s| !s.is_empty()) + { + if let Some(vls) = VeryLongString::parse(decoder, tuple).warn_on_error(&decoder.warn) { + very_long_strings.push(vls) + } + } + VeryLongStringsRecord(very_long_strings) + } +} + +#[derive(Clone, Debug)] +pub struct LongName { + pub short_name: String, + pub long_name: String, +} + +#[derive(Clone, Debug)] +pub struct LongNamesRecord(Vec); + +impl LongNamesRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + let input = decoder.decode(&source.text); + let mut names = Vec::new(); + for pair in input.split('\t').filter(|s| !s.is_empty()) { + if let Some((short_name, long_name)) = pair.split_once('=') { + names.push(LongName { + short_name: short_name.into(), + long_name: long_name.into(), + }); + } else { + decoder.warn(Error::TBD) + } + } + LongNamesRecord(names) + } +} + +#[derive(Clone, Debug)] +pub struct ProductInfoRecord(pub String); + +impl ProductInfoRecord { + const NAME: &'static str = "extra product info"; + fn decode(source: &TextRecord, decoder: &Decoder) -> Self { + Self(decoder.decode(&source.text).into()) + } +} #[derive(Clone, Debug)] pub struct VariableSet { pub name: String, @@ -1967,7 +2226,7 @@ pub struct VariableSetRecord { } impl VariableSetRecord { - fn decode<'a>(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord { + fn decode(source: &TextRecord, decoder: &Decoder) -> VariableSetRecord { let mut sets = Vec::new(); let input = decoder.decode(&source.text); for line in input.lines() { @@ -2079,12 +2338,30 @@ impl Extension { } EncodingRecord::SUBTYPE => EncodingRecord::parse(&extension, endian), NumberOfCasesRecord::SUBTYPE => NumberOfCasesRecord::parse(&extension, endian), - 5 => Ok(Record::VariableSets(extension.into())), - 10 => Ok(Record::ProductInfo(extension.into())), - 13 => Ok(Record::LongNames(extension.into())), - 14 => Ok(Record::VeryLongStrings(extension.into())), - 17 => Ok(Record::FileAttributes(extension.into())), - 18 => Ok(Record::VariableAttributes(extension.into())), + 5 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::VariableSets, + ))), + 10 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::ProductInfo, + ))), + 13 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::LongNames, + ))), + 14 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::VeryLongStrings, + ))), + 17 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::FileAttributes, + ))), + 18 => Ok(Record::Text(TextRecord::new( + extension, + TextRecordType::VariableAttributes, + ))), _ => Ok(Record::OtherExtension(extension)), }; match result { @@ -2244,18 +2521,23 @@ fn read_string(r: &mut R, endian: Endian) -> Result } #[derive(Clone, Debug)] -pub struct LongStringValueLabels { - pub var_name: RawString, +pub struct LongStringValueLabels +where + S: Debug, +{ + pub var_name: S, pub width: u32, /// `(value, label)` pairs, where each value is `width` bytes. - pub labels: Vec<(RawString, RawString)>, + pub labels: Vec<(S, S)>, } #[derive(Clone, Debug)] -pub struct LongStringValueLabelRecord(pub Vec); +pub struct LongStringValueLabelRecord(pub Vec>) +where + S: Debug; -impl ExtensionRecord for LongStringValueLabelRecord { +impl ExtensionRecord for LongStringValueLabelRecord { const SUBTYPE: u32 = 21; const SIZE: Option = Some(1); const COUNT: Option = None; -- 2.30.2