rust: Switch to `hashbrown` for value labels to allow more flexible lookup.
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 24 Aug 2025 22:16:19 +0000 (15:16 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 24 Aug 2025 22:18:25 +0000 (15:18 -0700)
The `Equivalent` trait allows us to look up keys that cannot directly
`Borrow` the values in the hash table, which increases the flexibility
and makes value label lookup much easier.

rust/Cargo.lock
rust/pspp/Cargo.toml
rust/pspp/src/data.rs
rust/pspp/src/variable.rs

index 00e1ff6e1abae79967c21a6846f46db5ed252dd1..f2b1417615011d2be36e853d3aae022c0c9db536 100644 (file)
@@ -37,6 +37,12 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
 [[package]]
 name = "android-tzdata"
 version = "0.1.1"
@@ -683,6 +689,12 @@ dependencies = [
  "miniz_oxide",
 ]
 
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -903,9 +915,15 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 
 [[package]]
 name = "hashbrown"
-version = "0.15.3"
+version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash",
+ "serde",
+]
 
 [[package]]
 name = "heck"
@@ -1085,7 +1103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.3",
+ "hashbrown 0.15.5",
  "serde",
 ]
 
@@ -1618,6 +1636,7 @@ dependencies = [
  "enum-map",
  "flagset",
  "flate2",
+ "hashbrown 0.15.5",
  "hexplay",
  "indexmap",
  "itertools",
index 31b1d743b3ff8bd176e95d9557f6cb3d721c7bef..8f15d0a7af413beaaebbf8d43885699f9e57ea58 100644 (file)
@@ -52,6 +52,7 @@ unicode-properties = "0.1.3"
 unicode-segmentation = "1.12.0"
 serde_json = "1.0.141"
 toml = "0.9.5"
+hashbrown = { version = "0.15.5", features = ["serde"] }
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index 45208176ae3907b077057ead1d4bc9aef27dd11d..72b1db8cfd5940ee85e5f912a25486c4b0ab6f55 100644 (file)
@@ -143,6 +143,12 @@ impl RawString for &'_ String {
 #[repr(transparent)]
 pub struct ByteStr(pub [u8]);
 
+impl PartialEq<ByteString> for &ByteStr {
+    fn eq(&self, other: &ByteString) -> bool {
+        self.raw_string_bytes() == other.raw_string_bytes()
+    }
+}
+
 impl ByteStr {
     pub fn new(s: &[u8]) -> &ByteStr {
         // SAFETY: ByteStr is just a wrapper of [u8],
@@ -625,6 +631,10 @@ where
         }
     }
 
+    pub fn as_raw(&self) -> Datum<&ByteStr> {
+        self.as_ref().map_string(|s| s.as_ref())
+    }
+
     pub fn as_encoded(&self, encoding: &'static Encoding) -> Datum<WithEncoding<&ByteStr>> {
         self.as_ref().map_string(|s| s.as_encoded(encoding))
     }
index 866de4f836f2e19613284ce8ebdc5ca3513d2709..a596fb02d6e027fda4884c531fb9a550ada12ac7 100644 (file)
@@ -17,7 +17,7 @@
 //! Variables.
 
 use std::{
-    collections::{BTreeMap, HashMap},
+    collections::BTreeMap,
     fmt::{Debug, Display},
     hash::{DefaultHasher, Hash, Hasher},
     ops::{Deref, Not},
@@ -25,13 +25,17 @@ use std::{
 };
 
 use encoding_rs::{Encoding, UTF_8};
+use hashbrown::HashMap;
+use indexmap::Equivalent;
 use num::integer::div_ceil;
 use serde::{ser::SerializeSeq, Serialize};
 use thiserror::Error as ThisError;
 use unicase::UniCase;
 
 use crate::{
-    data::{ByteString, Datum, Encoded, EncodedString, ResizeError, WithEncoding},
+    data::{
+        ByteStr, ByteString, Datum, Encoded, EncodedString, RawString, ResizeError, WithEncoding,
+    },
     format::{DisplayPlain, Format},
     identifier::{HasIdentifier, Identifier},
 };
@@ -585,6 +589,12 @@ impl HasIdentifier for Variable {
 #[derive(Clone, Default, PartialEq, Eq)]
 pub struct ValueLabels(pub HashMap<Datum<ByteString>, String>);
 
+impl<'a> Equivalent<Datum<ByteString>> for Datum<&'a ByteStr> {
+    fn equivalent(&self, key: &Datum<ByteString>) -> bool {
+        self == key
+    }
+}
+
 impl ValueLabels {
     pub fn new() -> Self {
         Self::default()
@@ -594,8 +604,11 @@ impl ValueLabels {
         self.0.is_empty()
     }
 
-    pub fn get(&self, value: &Datum<ByteString>) -> Option<&str> {
-        self.0.get(value).map(|s| s.as_str())
+    pub fn get<T>(&self, value: &Datum<T>) -> Option<&str>
+    where
+        T: RawString,
+    {
+        self.0.get(&value.as_raw()).map(|s| s.as_str())
     }
 
     pub fn insert(&mut self, value: Datum<ByteString>, label: impl Into<String>) -> Option<String> {