From 3fe450e9e97b0270dba0ec216f5db30309e08151 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 24 Dec 2024 10:24:12 -0800 Subject: [PATCH] finish with variable roles, start with long string value labels --- rust/pspp/src/cooked.rs | 47 +++++++++++++++++---------- rust/pspp/src/dictionary.rs | 63 ++++++++++++++++++++++++++++++++----- rust/pspp/src/macros.rs | 10 ++++-- rust/pspp/src/raw.rs | 36 ++++++++++----------- 4 files changed, 110 insertions(+), 46 deletions(-) diff --git a/rust/pspp/src/cooked.rs b/rust/pspp/src/cooked.rs index 5cf07f67fa..959515a02f 100644 --- a/rust/pspp/src/cooked.rs +++ b/rust/pspp/src/cooked.rs @@ -429,8 +429,8 @@ pub fn decode( dictionary.file_label = Some(file_label); } - for attributes in headers.file_attributes.drain(..) { - dictionary.attributes.extend(attributes.0 .0.into_iter()) + for mut attributes in headers.file_attributes.drain(..) { + dictionary.attributes.append(&mut attributes.0) } // Concatenate all the document records (really there should only be one) @@ -647,31 +647,46 @@ pub fn decode( } } - for (k, v) in headers - .file_attributes - .iter() - .flat_map(|map| map.0 .0.iter()) - { - dictionary.attributes.insert(k.clone(), v.clone()); - } - - for attr_set in headers + for mut attr_set in headers .variable_attributes - .iter() - .flat_map(|record| record.0.iter()) + .drain(..) + .flat_map(|record| record.0.into_iter()) { if let Some((_, variable)) = dictionary .variables .get_full_mut2(&attr_set.long_var_name.0) { - for (k, v) in attr_set.attributes.0.iter() { - variable.attributes.insert(k.clone(), v.clone()); - } + variable.attributes.append(&mut attr_set.attributes); } else { warn(Error::TBD); } } + // Assign variable roles. + for index in 0..dictionary.variables.len() { + let mut variable = dictionary.variables.get_index_mut2(index).unwrap(); + match variable.attributes.role() { + Ok(role) => variable.role = role, + Err(()) => warn(Error::TBD), + } + } + + // Long string value labels. + for record in headers + .long_string_value_labels + .drain(..) + .flat_map(|record| record.0.into_iter()) + { + let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else { + warn(Error::TBD); + continue; + }; + for (value, label) in record.labels.into_iter() { + let value = Value:: + variable.value_labels.insert(value) + } + } + let metadata = Metadata::decode(&headers, warn); Ok((dictionary, metadata)) } diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index ffe63571fa..bed1c50bc7 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -215,7 +215,7 @@ pub struct Dictionary { pub file_label: Option, pub documents: Vec, pub vectors: HashSet>, - pub attributes: HashMap>, + pub attributes: Attributes, pub mrsets: HashSet>, pub variable_sets: HashSet>, pub encoding: &'static Encoding, @@ -235,7 +235,7 @@ impl Dictionary { file_label: None, documents: Vec::new(), vectors: HashSet::new(), - attributes: HashMap::new(), + attributes: Attributes::new(), mrsets: HashSet::new(), variable_sets: HashSet::new(), encoding, @@ -412,11 +412,60 @@ pub enum Role { Input, Target, Both, - None, Partition, Split, } +impl Role { + fn try_from_str(input: &str) -> Result, ()> { + for (string, value) in [ + ("input", Some(Role::Input)), + ("target", Some(Role::Target)), + ("both", Some(Role::Both)), + ("partition", Some(Role::Partition)), + ("split", Some(Role::Split)), + ("none", None), + ] { + if string.eq_ignore_ascii_case(input) { + return Ok(value); + } + } + Err(()) + } +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct Attributes(pub HashMap>); + +impl Attributes { + pub fn new() -> Self { + Self(HashMap::new()) + } + + pub fn append(&mut self, other: &mut Self) { + self.0.extend(other.0.drain()) + } + + pub fn role(&self) -> Result, ()> { + self.try_into() + } +} + +impl TryFrom<&Attributes> for Option { + type Error = (); + + fn try_from(value: &Attributes) -> Result { + let role = Identifier::new("$@Role").unwrap(); + value.0.get(&role).map_or(Ok(None), |attribute| { + if let Ok([string]) = <&[String; 1]>::try_from(attribute.as_slice()) { + Role::try_from_str(string) + } else { + Err(()) + } + }) + } +} + #[derive(Clone, Debug)] pub struct Variable { pub name: Identifier, @@ -427,12 +476,12 @@ pub struct Variable { pub value_labels: HashMap, pub label: Option, pub measure: Option, - pub role: Role, + pub role: Option, pub display_width: u32, pub alignment: Alignment, pub leave: bool, pub short_names: Vec, - pub attributes: HashMap>, + pub attributes: Attributes, } impl Variable { @@ -448,12 +497,12 @@ impl Variable { value_labels: HashMap::new(), label: None, measure: Measure::default_for_type(var_type), - role: Role::default(), + role: None, display_width: width.default_display_width(), alignment: Alignment::default_for_type(var_type), leave, short_names: Vec::new(), - attributes: HashMap::new(), + attributes: Attributes::new(), } } diff --git a/rust/pspp/src/macros.rs b/rust/pspp/src/macros.rs index debf9c3daf..bcd223c151 100644 --- a/rust/pspp/src/macros.rs +++ b/rust/pspp/src/macros.rs @@ -1627,8 +1627,13 @@ impl<'a> Call<'a> { return None; } - pub fn expand(&self, mode: Syntax, call_loc: Location, output: &mut Vec, error: F) - where + pub fn expand( + &self, + mode: Syntax, + call_loc: Location, + output: &mut Vec, + error: F, + ) where F: Fn(MacroError) + 'a, { let error: Box = Box::new(error); @@ -1666,4 +1671,3 @@ impl<'a> Call<'a> { self.0.n_tokens } } - diff --git a/rust/pspp/src/raw.rs b/rust/pspp/src/raw.rs index a34d05bbd8..fc716dbc4f 100644 --- a/rust/pspp/src/raw.rs +++ b/rust/pspp/src/raw.rs @@ -1,5 +1,5 @@ use crate::{ - dictionary::VarWidth, + dictionary::{Attributes, VarWidth}, encoding::{default_encoding, get_encoding, Error as EncodingError}, endian::{Endian, Parse, ToBytes}, identifier::{Error as IdError, Identifier}, @@ -2365,15 +2365,12 @@ impl Attribute { } } -#[derive(Clone, Debug, Default)] -pub struct AttributeSet(pub HashMap>); - -impl AttributeSet { +impl Attributes { fn parse<'a>( decoder: &Decoder, mut input: &'a str, sentinel: Option, - ) -> Result<(AttributeSet, &'a str), Warning> { + ) -> Result<(Attributes, &'a str), Warning> { let mut attributes = HashMap::new(); let rest = loop { match input.chars().next() { @@ -2387,17 +2384,17 @@ impl AttributeSet { } } }; - Ok((AttributeSet(attributes), rest)) + Ok((Attributes(attributes), rest)) } } #[derive(Clone, Debug, Default)] -pub struct FileAttributeRecord(pub AttributeSet); +pub struct FileAttributeRecord(pub Attributes); impl FileAttributeRecord { fn decode(source: &TextRecord, decoder: &Decoder) -> Self { let input = decoder.decode(&source.text); - match AttributeSet::parse(decoder, &input, None).issue_warning(&decoder.warn) { + match Attributes::parse(decoder, &input, None).issue_warning(&decoder.warn) { Some((set, rest)) => { if !rest.is_empty() { decoder.warn(Warning::TBD); @@ -2410,13 +2407,13 @@ impl FileAttributeRecord { } #[derive(Clone, Debug)] -pub struct VarAttributeSet { +pub struct VarAttributes { pub long_var_name: Identifier, - pub attributes: AttributeSet, + pub attributes: Attributes, } -impl VarAttributeSet { - fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributeSet, &'a str), Warning> { +impl VarAttributes { + fn parse<'a>(decoder: &Decoder, input: &'a str) -> Result<(VarAttributes, &'a str), Warning> { let Some((long_var_name, rest)) = input.split_once(':') else { return Err(Warning::TBD); }; @@ -2424,8 +2421,8 @@ impl VarAttributeSet { .new_identifier(long_var_name) .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidAttributeVariableName)?; - let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?; - let var_attribute = VarAttributeSet { + let (attributes, rest) = Attributes::parse(decoder, rest, Some('/'))?; + let var_attribute = VarAttributes { long_var_name, attributes, }; @@ -2434,7 +2431,7 @@ impl VarAttributeSet { } #[derive(Clone, Debug)] -pub struct VariableAttributeRecord(pub Vec); +pub struct VariableAttributeRecord(pub Vec); impl VariableAttributeRecord { fn decode(source: &TextRecord, decoder: &Decoder) -> Self { @@ -2443,7 +2440,7 @@ impl VariableAttributeRecord { let mut var_attribute_sets = Vec::new(); while !input.is_empty() { let Some((var_attribute, rest)) = - VarAttributeSet::parse(decoder, input).issue_warning(&decoder.warn) + VarAttributes::parse(decoder, input).issue_warning(&decoder.warn) else { break; }; @@ -2846,7 +2843,7 @@ where pub width: u32, /// `(value, label)` pairs, where each value is `width` bytes. - pub labels: Vec<(S, S)>, + pub labels: Vec<(RawString, S)>, } impl LongStringValueLabels { @@ -2860,9 +2857,8 @@ impl LongStringValueLabels { let mut labels = Vec::with_capacity(self.labels.len()); for (value, label) in self.labels.iter() { - let value = decoder.decode_exact_length(&value.0).to_string(); let label = decoder.decode(label).to_string(); - labels.push((value, label)); + labels.push((value.clone(), label)); } Ok(LongStringValueLabels { -- 2.30.2