From: Ben Pfaff Date: Mon, 23 Dec 2024 03:26:34 +0000 (-0800) Subject: work on sysfile reader X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7147d153d8f5de6ac4dbb67b9d80ca6b9d553542;p=pspp work on sysfile reader --- diff --git a/rust/pspp/src/cooked.rs b/rust/pspp/src/cooked.rs index 7a3c62e70c..84b473fdf7 100644 --- a/rust/pspp/src/cooked.rs +++ b/rust/pspp/src/cooked.rs @@ -11,7 +11,7 @@ use crate::{ identifier::{ByIdentifier, Error as IdError, Identifier}, raw::{ self, Cases, DecodedRecord, DocumentRecord, EncodingRecord, Extension, FileAttributeRecord, - FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongNamesRecord, + FloatInfoRecord, HeaderRecord, IntegerInfoRecord, LongName, LongNamesRecord, LongStringMissingValueRecord, LongStringValueLabelRecord, MultipleResponseRecord, NumberOfCasesRecord, ProductInfoRecord, RawStr, RawWidth, ValueLabel, ValueLabelRecord, VarDisplayRecord, VariableAttributeRecord, VariableRecord, VariableSetRecord, @@ -405,7 +405,7 @@ impl Decoder { loop { self.n_generated_names += 1; let name = Identifier::from_encoding( - &format!("VAR{:03}", self.n_generated_names), + format!("VAR{:03}", self.n_generated_names), self.encoding, ) .unwrap(); @@ -459,7 +459,7 @@ pub fn decode( .filter(|(_index, record)| record.width != RawWidth::Continuation) { let name = trim_end_spaces(input.name.to_string()); - let name = match Identifier::from_encoding(&name, encoding) { + let name = match Identifier::from_encoding(name, encoding) { Ok(name) => { if !dictionary.variables.contains(&name.0) { name @@ -610,6 +610,43 @@ pub fn decode( } } + if headers.long_names.is_empty() { + // There are no long variable names. Use the short variable names, + // converted to lowercase, as the long variable names. + for index in 0..dictionary.variables.len() { + let lower = dictionary.variables[index].name.0.as_ref().to_lowercase(); + if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) { + dictionary.try_rename_var(index, new_name); + } + } + } else { + // Rename each of the variables, one by one. (In a correctly + // constructed system file, this cannot create any intermediate + // duplicate variable names, because all of the new variable names are + // longer than any of the old variable names and thus there cannot be + // any overlaps.) + for renaming in headers + .long_names + .iter() + .flat_map(|record| record.0.iter().cloned()) + { + let LongName { + short_name, + long_name, + } = renaming; + if let Some(index) = dictionary.variables.get_index_of(&short_name.0) { + dictionary.try_rename_var(index, long_name); + dictionary + .variables + .get_index_mut2(index) + .unwrap() + .short_names = vec![short_name]; + } else { + warn(Error::TBD); + } + } + } + let metadata = Metadata::decode(&headers, warn); Ok((dictionary, metadata)) } diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 5e7f25d208..e8988ca2c1 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -374,6 +374,22 @@ impl Dictionary { }) .collect(); } + + pub fn try_rename_var(&mut self, index: usize, new_name: Identifier) -> bool { + let mut variable = self.variables.swap_remove_index(index).unwrap(); + let may_rename = !self.variables.contains(&new_name.0); + if may_rename { + variable.name = new_name; + variable.short_names = Vec::new(); + }; + assert!(self.variables.insert(variable)); + self.variables.swap_indices(self.variables.len() - 1, index); + may_rename + } + + pub fn rename_var(&mut self, index: usize, new_name: Identifier) { + assert!(self.try_rename_var(index, new_name)); + } } fn update_dict_index_vec(dict_indexes: &mut Vec, f: F) @@ -390,8 +406,9 @@ where }); } -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum Role { + #[default] Input, Target, Both, @@ -400,12 +417,7 @@ pub enum Role { Split, } -impl Default for Role { - fn default() -> Self { - Self::Input - } -} - +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum DictClass { Ordinary, System, @@ -413,7 +425,13 @@ pub enum DictClass { } impl DictClass { - pub fn from_identifier(id: &Identifier) -> Self { + pub fn must_leave(self) -> bool { + self == DictClass::Scratch + } +} + +impl From<&Identifier> for DictClass { + fn from(id: &Identifier) -> Self { if id.0.starts_with('$') { Self::System } else if id.0.starts_with('#') { @@ -422,14 +440,6 @@ impl DictClass { Self::Ordinary } } - - pub fn must_leave(self) -> bool { - match self { - DictClass::Ordinary => false, - DictClass::System => false, - DictClass::Scratch => true, - } - } } #[derive(Clone, Debug)] @@ -453,7 +463,7 @@ pub struct Variable { impl Variable { pub fn new(name: Identifier, width: VarWidth) -> Self { let var_type = VarType::from_width(width); - let leave = DictClass::from_identifier(&name).must_leave(); + let leave = DictClass::from(&name).must_leave(); Self { name, width, diff --git a/rust/pspp/src/identifier.rs b/rust/pspp/src/identifier.rs index c6909fd58a..1f6694b633 100644 --- a/rust/pspp/src/identifier.rs +++ b/rust/pspp/src/identifier.rs @@ -11,6 +11,8 @@ use finl_unicode::categories::{CharacterCategories, MajorCategory}; use thiserror::Error as ThisError; use unicase::UniCase; +use crate::dictionary::DictClass; + pub trait IdentifierChar { /// Returns true if `self` is an ASCII character that may be the first /// character in an identifier. @@ -153,12 +155,13 @@ impl Identifier { /// encoding used by the dictionary, not in UTF-8. pub const MAX_LEN: usize = 64; - pub fn new(s: &str) -> Result { + pub fn new(s: impl Into>) -> Result { Self::from_encoding(s, UTF_8) } - pub fn from_encoding(s: &str, encoding: &'static Encoding) -> Result { - Self::is_plausible(s)?; - let identifier = Identifier(s.into()); + pub fn from_encoding(s: impl Into>, encoding: &'static Encoding) -> Result { + let s: UniCase = s.into(); + Self::is_plausible(&s)?; + let identifier = Identifier(s); identifier.check_encoding(encoding)?; Ok(identifier) } @@ -241,6 +244,17 @@ impl Identifier { pub fn matches_keyword_n(&self, keyword: &str, n: usize) -> bool { id_match_n_nonstatic(keyword, self.0.as_str(), n) } + + pub fn must_be_ordinary(self) -> Result { + match DictClass::from(&self) { + DictClass::Ordinary => Ok(self), + _ => { + let s = self.0.into_inner(); + let first = s.chars().next().unwrap(); + Err(Error::BadFirstCharacter(s, first)) + } + } + } } impl PartialEq for Identifier { diff --git a/rust/pspp/src/raw.rs b/rust/pspp/src/raw.rs index e5933d3756..63b7c04c3e 100644 --- a/rust/pspp/src/raw.rs +++ b/rust/pspp/src/raw.rs @@ -2301,6 +2301,7 @@ impl VeryLongString { }; let short_name = decoder .new_identifier(short_name) + .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidLongStringName)?; let length = length.parse().map_err(|_| Warning::TBD)?; Ok(VeryLongString { short_name, length }) @@ -2421,6 +2422,7 @@ impl VarAttributeSet { }; let long_var_name = decoder .new_identifier(long_var_name) + .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidAttributeVariableName)?; let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'))?; let var_attribute = VarAttributeSet { @@ -2465,9 +2467,11 @@ impl LongName { }; let short_name = decoder .new_identifier(short_name) + .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidShortName)?; let long_name = decoder .new_identifier(long_name) + .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidLongName)?; Ok(LongName { short_name, @@ -2477,7 +2481,7 @@ impl LongName { } #[derive(Clone, Debug)] -pub struct LongNamesRecord(Vec); +pub struct LongNamesRecord(pub Vec); impl LongNamesRecord { fn decode(source: &TextRecord, decoder: &Decoder) -> Self { @@ -2513,6 +2517,7 @@ impl VariableSet { for var in input.split_ascii_whitespace() { if let Some(identifier) = decoder .new_identifier(var) + .and_then(Identifier::must_be_ordinary) .map_err(Warning::InvalidVariableSetName) .issue_warning(&decoder.warn) { diff --git a/rust/pspp/src/settings.rs b/rust/pspp/src/settings.rs index de51951202..b48420aefa 100644 --- a/rust/pspp/src/settings.rs +++ b/rust/pspp/src/settings.rs @@ -68,9 +68,9 @@ impl Default for Settings { testing: false, fuzz_bits: 6, scale_min: 24, - commands: Compatibility::Enhanced, - global: Compatibility::Enhanced, - syntax: Compatibility::Enhanced, + commands: Compatibility::default(), + global: Compatibility::default(), + syntax: Compatibility::default(), formats: FormatSettings::default(), small: 0.0001, } @@ -84,9 +84,14 @@ impl Settings { } } +#[derive(Copy, Clone, PartialEq, Eq, Default)] pub enum Compatibility { - Compatible, + /// Use improved PSPP behavior. + #[default] Enhanced, + + /// Be as compatible as possible. + Compatible, } pub struct MacroSettings {