From: Ben Pfaff Date: Sun, 24 Aug 2025 22:16:19 +0000 (-0700) Subject: rust: Switch to `hashbrown` for value labels to allow more flexible lookup. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3056cdef720e856688302be95c70519689e1a109;p=pspp rust: Switch to `hashbrown` for value labels to allow more flexible lookup. The `Equivalent` trait allows us to look up keys that cannot directly `Borrow` the values in the hash table, which increases the flexibility and makes value label lookup much easier. --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 00e1ff6e1a..f2b1417615 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -37,6 +37,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -683,6 +689,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -903,9 +915,15 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "hashbrown" -version = "0.15.3" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", + "serde", +] [[package]] name = "heck" @@ -1085,7 +1103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.3", + "hashbrown 0.15.5", "serde", ] @@ -1618,6 +1636,7 @@ dependencies = [ "enum-map", "flagset", "flate2", + "hashbrown 0.15.5", "hexplay", "indexmap", "itertools", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 31b1d743b3..8f15d0a7af 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -52,6 +52,7 @@ unicode-properties = "0.1.3" unicode-segmentation = "1.12.0" serde_json = "1.0.141" toml = "0.9.5" +hashbrown = { version = "0.15.5", features = ["serde"] } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/data.rs b/rust/pspp/src/data.rs index 45208176ae..72b1db8cfd 100644 --- a/rust/pspp/src/data.rs +++ b/rust/pspp/src/data.rs @@ -143,6 +143,12 @@ impl RawString for &'_ String { #[repr(transparent)] pub struct ByteStr(pub [u8]); +impl PartialEq for &ByteStr { + fn eq(&self, other: &ByteString) -> bool { + self.raw_string_bytes() == other.raw_string_bytes() + } +} + impl ByteStr { pub fn new(s: &[u8]) -> &ByteStr { // SAFETY: ByteStr is just a wrapper of [u8], @@ -625,6 +631,10 @@ where } } + pub fn as_raw(&self) -> Datum<&ByteStr> { + self.as_ref().map_string(|s| s.as_ref()) + } + pub fn as_encoded(&self, encoding: &'static Encoding) -> Datum> { self.as_ref().map_string(|s| s.as_encoded(encoding)) } diff --git a/rust/pspp/src/variable.rs b/rust/pspp/src/variable.rs index 866de4f836..a596fb02d6 100644 --- a/rust/pspp/src/variable.rs +++ b/rust/pspp/src/variable.rs @@ -17,7 +17,7 @@ //! Variables. use std::{ - collections::{BTreeMap, HashMap}, + collections::BTreeMap, fmt::{Debug, Display}, hash::{DefaultHasher, Hash, Hasher}, ops::{Deref, Not}, @@ -25,13 +25,17 @@ use std::{ }; use encoding_rs::{Encoding, UTF_8}; +use hashbrown::HashMap; +use indexmap::Equivalent; use num::integer::div_ceil; use serde::{ser::SerializeSeq, Serialize}; use thiserror::Error as ThisError; use unicase::UniCase; use crate::{ - data::{ByteString, Datum, Encoded, EncodedString, ResizeError, WithEncoding}, + data::{ + ByteStr, ByteString, Datum, Encoded, EncodedString, RawString, ResizeError, WithEncoding, + }, format::{DisplayPlain, Format}, identifier::{HasIdentifier, Identifier}, }; @@ -585,6 +589,12 @@ impl HasIdentifier for Variable { #[derive(Clone, Default, PartialEq, Eq)] pub struct ValueLabels(pub HashMap, String>); +impl<'a> Equivalent> for Datum<&'a ByteStr> { + fn equivalent(&self, key: &Datum) -> bool { + self == key + } +} + impl ValueLabels { pub fn new() -> Self { Self::default() @@ -594,8 +604,11 @@ impl ValueLabels { self.0.is_empty() } - pub fn get(&self, value: &Datum) -> Option<&str> { - self.0.get(value).map(|s| s.as_str()) + pub fn get(&self, value: &Datum) -> Option<&str> + where + T: RawString, + { + self.0.get(&value.as_raw()).map(|s| s.as_str()) } pub fn insert(&mut self, value: Datum, label: impl Into) -> Option {