From: Ben Pfaff Date: Wed, 23 Jul 2025 14:29:56 +0000 (-0700) Subject: make encoding private X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a62a2ff22f1cb608bc4fcc748b80c256f536d237;p=pspp make encoding private --- diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index b0a3efcf6e..0b6e7fa47d 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -522,7 +522,7 @@ pub enum EncodedStr<'a> { /// The string's encoding. /// - /// THis can be [UTF_8]. + /// This can be [UTF_8]. encoding: &'static Encoding, }, diff --git a/rust/pspp/src/dictionary.rs b/rust/pspp/src/dictionary.rs index 181639c3c1..e1a98d6afb 100644 --- a/rust/pspp/src/dictionary.rs +++ b/rust/pspp/src/dictionary.rs @@ -284,7 +284,7 @@ pub struct Dictionary { pub documents: Vec, /// Named collections of variables within the dictionary. - pub vectors: HashSet>, + vectors: HashSet>, /// Attributes for the dictionary itself. /// @@ -297,10 +297,10 @@ pub struct Dictionary { /// Variable sets. /// /// Only the GUI makes use of variable sets. - pub variable_sets: Vec, + variable_sets: Vec, /// Character encoding for the dictionary and the data. - pub encoding: &'static Encoding, + encoding: &'static Encoding, } impl Serialize for Dictionary { @@ -364,6 +364,10 @@ impl Dictionary { } } + pub fn encoding(&self) -> &'static Encoding { + self.encoding + } + /// Returns a reference to the weight variable, if any. pub fn weight_var(&self) -> Option<&Variable> { self.weight.map(|index| &self.variables[index].0) @@ -441,6 +445,18 @@ impl Dictionary { VariableSets::new(self) } + pub fn add_variable_set(&mut self, set: DictIndexVariableSet) { + assert!(set + .variables + .iter() + .all(|dict_index| *dict_index < self.variables.len())); + self.variable_sets.push(set); + } + + pub fn remove_variable_set(&mut self, var_set_index: usize) { + self.variable_sets.remove(var_set_index); + } + /// Adds `variable` at the end of the dictionary and returns its index. /// /// The operation fails if the dictionary already contains a variable with @@ -1316,7 +1332,7 @@ pub struct Variable { /// /// The variables in a [Dictionary] must all use the same encoding as the /// dictionary. - pub encoding: &'static Encoding, + encoding: &'static Encoding, } pub fn escape_value_label(unescaped: &str) -> Cow<'_, str> { @@ -1358,6 +1374,10 @@ impl Variable { } } + pub fn encoding(&self) -> &'static Encoding { + self.encoding + } + pub fn is_numeric(&self) -> bool { self.width.is_numeric() } diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index 7067ef0c38..972775998f 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -162,7 +162,7 @@ impl Convert { case?.0.into_iter().zip(dictionary.variables.iter()).map( |(datum, variable)| { datum - .display(variable.print_format, variable.encoding) + .display(variable.print_format, variable.encoding()) .to_string() }, ), diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs index 2f670a572c..6062028663 100644 --- a/rust/pspp/src/output/pivot/mod.rs +++ b/rust/pspp/src/output/pivot/mod.rs @@ -1890,7 +1890,7 @@ impl Value { Datum::String(string) => Self::new(ValueInner::String(StringValue { show: None, hex: variable.print_format.type_() == Type::AHex, - s: string.decode(variable.encoding).into_owned(), + s: string.decode(variable.encoding()).into_owned(), var_name, value_label, })), diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 14aafcf167..4c51c9cf9e 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -1035,7 +1035,7 @@ impl Records { .map(|value| { value .decode(variable.width) - .display(variable.print_format, variable.encoding) + .display(variable.print_format, variable.encoding()) .with_trimming() .with_quoted_string() .to_string() @@ -1139,7 +1139,7 @@ impl Records { // converted to lowercase, as the long variable names. for index in 0..dictionary.variables.len() { let lower = dictionary.variables[index].name.0.as_ref().to_lowercase(); - if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) { + if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding()) { let _ = dictionary.try_rename_var(index, new_name); } } @@ -1292,7 +1292,7 @@ impl Records { name: record.name, variables, }; - dictionary.variable_sets.push(variable_set); + dictionary.add_variable_set(variable_set); } for record in self.other_extension.drain(..) { diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index 722bcf04eb..eda4aebd67 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -667,7 +667,7 @@ where data.push( case.0 .into_iter() - .map(|datum| Value::new_datum(&datum, dictionary.encoding)) + .map(|datum| Value::new_datum(&datum, dictionary.encoding())) .collect::>(), ); } diff --git a/rust/pspp/src/sys/write.rs b/rust/pspp/src/sys/write.rs index 859bdad154..eed16fca49 100644 --- a/rust/pspp/src/sys/write.rs +++ b/rust/pspp/src/sys/write.rs @@ -245,13 +245,13 @@ where }, print_format: to_raw_format(variable.print_format, seg0_width), write_format: to_raw_format(variable.write_format, seg0_width), - name: encode_fixed_string(name0, variable.encoding), + name: encode_fixed_string(name0, variable.encoding()), }; (2u32, record).write_le(self.writer)?; // Variable label. if let Some(label) = variable.label() { - let label = variable.encoding.encode(&label).0; + let label = variable.encoding().encode(&label).0; let len = label.len().min(255) as u32; let padded_len = len.next_multiple_of(4); (len, &*label, Zeros((padded_len - len) as usize)).write_le(self.writer)?; @@ -281,7 +281,7 @@ where missing_value_code: 0, print_format: format, write_format: format, - name: encode_fixed_string(name, variable.encoding), + name: encode_fixed_string(name, variable.encoding()), }, ) .write_le(self.writer)?; @@ -345,7 +345,7 @@ where // Label record. (3u32, value_labels.0.len() as u32).write_le(self.writer)?; for (datum, label) in &value_labels.0 { - let label = &*self.dictionary.encoding.encode(&label).0; + let label = &*self.dictionary.encoding().encode(&label).0; let label = if label.len() > 255 { &label[..255] } else { @@ -365,7 +365,7 @@ where if !self.dictionary.documents.is_empty() { (6u32, self.dictionary.documents.len() as u32).write_le(self.writer)?; for line in &self.dictionary.documents { - Padded::exact(&*self.dictionary.encoding.encode(&line).0, 80, b' ') + Padded::exact(&*self.dictionary.encoding().encode(&line).0, 80, b' ') .write_le(self.writer)?; } } @@ -387,7 +387,7 @@ where // We always write files in little-endian. 2 }, - character_code: codepage_from_encoding(self.dictionary.encoding) as i32, + character_code: codepage_from_encoding(self.dictionary.encoding()) as i32, }, ) .write_le(self.writer) @@ -410,16 +410,11 @@ where fn write_var_sets(&mut self) -> Result<(), BinError> { let mut s = String::new(); - for set in &self.dictionary.variable_sets { - write!(&mut s, "{}= ", set.name).unwrap(); - for (index, variable) in set.variables.iter().enumerate() { + for set in &self.dictionary.variable_sets() { + write!(&mut s, "{}= ", set.name()).unwrap(); + for (index, variable) in set.variables().iter().enumerate() { let prefix = if index > 0 { " " } else { "" }; - write!( - &mut s, - "{prefix}{}", - self.dictionary.variables[*variable].name - ) - .unwrap(); + write!(&mut s, "{prefix}{}", &variable.name).unwrap(); } writeln!(&mut s).unwrap(); } @@ -436,7 +431,7 @@ where .iter() .filter(|set| set.mr_type.supported_before_v14() == pre_v14) { - output.extend_from_slice(&self.dictionary.encoding.encode(&set.name).0[..]); + output.extend_from_slice(&self.dictionary.encoding().encode(&set.name).0[..]); output.push(b'='); match &set.mr_type { MultipleResponseType::MultipleDichotomy { datum, labels } => { @@ -467,7 +462,7 @@ where let label = if set.mr_type.label_from_var_label() { Cow::from(&[]) } else { - self.dictionary.encoding.encode(&set.label).0 + self.dictionary.encoding().encode(&set.label).0 }; write!(&mut output, "{} ", label.len()).unwrap(); output.extend_from_slice(&label[..]); @@ -477,7 +472,7 @@ where // might expand upon lowercasing. let short_name = self.short_names[variable][0].as_str().to_ascii_lowercase(); output.push(b' '); - output.extend_from_slice(&self.dictionary.encoding.encode(&short_name).0); + output.extend_from_slice(&self.dictionary.encoding().encode(&short_name).0); } output.push(b'\n'); } @@ -542,7 +537,7 @@ where if variable.value_labels.is_empty() || !variable.width.is_long_string() { break; } - let name = self.dictionary.encoding.encode(&variable.name).0; + let name = self.dictionary.encoding().encode(&variable.name).0; ( name.len() as u32, &name[..], @@ -553,7 +548,7 @@ where for (value, label) in &variable.value_labels.0 { let value = value.as_string().unwrap(); - let label = self.dictionary.encoding.encode(&label).0; + let label = self.dictionary.encoding().encode(&label).0; ( value.len() as u32, value.as_bytes(), @@ -573,7 +568,7 @@ where if variable.missing_values.is_empty() || !variable.width.is_long_string() { break; } - let name = self.dictionary.encoding.encode(&variable.name).0; + let name = self.dictionary.encoding().encode(&variable.name).0; ( name.len() as u32, &name[..], @@ -620,7 +615,7 @@ where } fn write_encoding(&mut self) -> Result<(), BinError> { - self.write_string_record(20, self.dictionary.encoding.name()) + self.write_string_record(20, self.dictionary.encoding().name()) } fn write_bytes_record(&mut self, subtype: u32, bytes: &[u8]) -> Result<(), BinError> { @@ -632,7 +627,7 @@ where } fn write_string_record(&mut self, subtype: u32, s: &str) -> Result<(), BinError> { - self.write_bytes_record(subtype, &self.dictionary.encoding.encode(&s).0) + self.write_bytes_record(subtype, &self.dictionary.encoding().encode(&s).0) } }