From ae1950f5f3b7297e65adc58c14374d05e5cc4fef Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 3 Mar 2024 14:55:15 -0800 Subject: [PATCH] work on value labels --- rust/src/cooked.rs | 48 ++++++++++++++++++++++++++++++++++++------ rust/src/dictionary.rs | 8 +++++++ rust/src/identifier.rs | 12 +++++++++++ rust/src/raw.rs | 7 ++---- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index ee66890027..71d65adddd 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -1,4 +1,4 @@ -use std::{cell::RefCell, ops::Range, rc::Rc, collections::HashMap}; +use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc}; use crate::{ dictionary::{Dictionary, VarWidth, Variable}, @@ -85,17 +85,23 @@ pub enum Error { #[error("Dictionary index {0} refers to a long string continuation.")] DictIndexIsContinuation(usize), + #[error("At offset {offset:#x}, one or more variable indexes for value labels referred to long string continuation records: {indexes:?}")] + LongStringContinuationIndexes { offset: u64, indexes: Vec }, + + #[error( + "At offsets {:#x}...{:#x}, record types 3 and 4 may not add value labels to one or more long string variables: {variables:?}", .offsets.start, .offsets.end + )] + InvalidLongStringValueLabels { + offsets: Range, + variables: Vec, + }, + #[error("Variables associated with value label are not all of identical type. Variable {numeric_var} is numeric, but variable {string_var} is string.")] ValueLabelsDifferentTypes { numeric_var: Identifier, string_var: Identifier, }, - #[error( - "Value labels may not be added to long string variable {0} using record types 3 or 4." - )] - InvalidLongStringValueLabel(Identifier), - #[error("Invalid multiple response set name. {0}")] InvalidMrSetName(IdError), @@ -512,6 +518,36 @@ pub fn decode( assert_eq!(var_index_map.insert(value_index, dict_index), None); } + for record in headers.value_label.drain(..) { + let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len()); + let mut continuation_indexes = Vec::new(); + let mut long_string_variables = Vec::new(); + for value_index in record.dict_indexes.iter() { + if let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) { + let variable = &dictionary.variables[*dict_index]; + if variable.width.is_long_string() { + long_string_variables.push(variable.name.clone()); + } else { + dict_indexes.push(*dict_index); + } + } else { + continuation_indexes.push(*value_index); + } + } + if !continuation_indexes.is_empty() { + warn(Error::LongStringContinuationIndexes { + offset: record.offsets.start, + indexes: continuation_indexes, + }); + } + if !long_string_variables.is_empty() { + warn(Error::InvalidLongStringValueLabels { + offsets: record.offsets.clone(), + variables: long_string_variables, + }); + } + } + let metadata = Metadata::decode(&headers, warn); Ok((dictionary, metadata)) } diff --git a/rust/src/dictionary.rs b/rust/src/dictionary.rs index 042a294452..8d28ff329b 100644 --- a/rust/src/dictionary.rs +++ b/rust/src/dictionary.rs @@ -87,6 +87,14 @@ impl VarWidth { _ => Err(()), } } + + pub fn is_long_string(&self) -> bool { + if let Self::String(width) = self { + *width > 8 + } else { + false + } + } } impl From for VarType { diff --git a/rust/src/identifier.rs b/rust/src/identifier.rs index 70fbc00aa1..1108a46a7e 100644 --- a/rust/src/identifier.rs +++ b/rust/src/identifier.rs @@ -3,6 +3,7 @@ use std::{ cmp::Ordering, fmt::{Debug, Display, Formatter, Result as FmtResult}, hash::{Hash, Hasher}, + ops::Deref, }; use encoding_rs::{EncoderResult, Encoding, UTF_8}; @@ -237,3 +238,14 @@ where Self(self.0.clone()) } } + +impl Deref for ByIdentifier +where + T: HasIdentifier + Clone, +{ + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/rust/src/raw.rs b/rust/src/raw.rs index e8a279f5e8..f492710378 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -47,10 +47,7 @@ pub enum Error { BadRecordType { offset: u64, rec_type: u32 }, #[error("In variable record starting at offset {start_offset:#x}, variable width is not in the valid range -1 to 255.")] - BadVariableWidth { - start_offset: u64, - width: i32, - }, + BadVariableWidth { start_offset: u64, width: i32 }, #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")] BadVariableLabelCode { @@ -1544,7 +1541,7 @@ impl ValueLabelRecord, RawString> { max: Self::MAX_INDEXES, }); } - + let index_offset = r.stream_position()?; let mut dict_indexes = Vec::with_capacity(n as usize); let mut invalid_indexes = Vec::new(); -- 2.30.2