From aa72b08d411ffbd5cbb4d63848b943a2a26d7c24 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 1 Jun 2009 15:19:22 -0700 Subject: [PATCH] hash: New function hash_int() for hashing a single 32-bit integer value. --- lib/hash.h | 14 +++++++ lib/mac-learning.c | 3 +- tests/test-classifier.c | 14 +++---- tests/test-hash.c | 86 +++++++++++++++++++++++++++----------- tests/test-hmap.c | 3 +- vswitchd/proc-net-compat.c | 2 +- 6 files changed, 85 insertions(+), 37 deletions(-) diff --git a/lib/hash.h b/lib/hash.h index 0dabb439..d85f29b1 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -71,4 +71,18 @@ static inline uint32_t hash_string(const char *s, uint32_t basis) return hash_bytes(s, strlen(s), basis); } +/* This is Bob Jenkins' integer hash from + * http://burtleburtle.net/bob/hash/integer.html, modified for style. */ +static inline uint32_t hash_int(uint32_t x, uint32_t basis) +{ + x -= x << 6; + x ^= x >> 17; + x -= x << 9; + x ^= x << 4; + x -= x << 3; + x ^= x << 10; + x ^= x >> 15; + return x + basis; +} + #endif /* hash.h */ diff --git a/lib/mac-learning.c b/lib/mac-learning.c index f82e030b..4a25a1c1 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -95,8 +95,7 @@ static tag_type make_unknown_mac_tag(const struct mac_learning *ml, const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) { - uint32_t h = hash_bytes(&ml->secret, sizeof ml->secret, - mac_table_hash(mac, vlan)); + uint32_t h = hash_int(ml->secret, mac_table_hash(mac, vlan)); return tag_create_deterministic(h); } diff --git a/tests/test-classifier.c b/tests/test-classifier.c index a47e8380..0a6806c6 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -586,7 +586,7 @@ table_mask(int table) static int random_wcf_in_table(int table, int seed) { - int wc_fields = (1u << table) | hash_bytes(&seed, sizeof seed, 0); + int wc_fields = (1u << table) | hash_int(seed, 0); return wc_fields & table_mask(table); } @@ -719,11 +719,10 @@ test_two_rules_in_one_table(void) /* Generate value patterns that will put the two rules into * different buckets. */ value_mask = ((1u << table) - 1); - value_pat1 = hash_bytes(&pri1, sizeof pri1, 1) & value_mask; + value_pat1 = hash_int(pri1, 1) & value_mask; i = 0; do { - value_pat2 = (hash_bytes(&pri2, sizeof pri2, i++) - & value_mask); + value_pat2 = (hash_int(pri2, i++) & value_mask); } while (value_pat1 == value_pat2); rule1 = make_rule(wcf1, pri1, value_pat1); rule2 = make_rule(wcf2, pri2, value_pat2); @@ -840,7 +839,7 @@ test_many_rules_in_one_bucket(void) struct tcls tcls; int i; - srand(hash_bytes(&table, sizeof table, iteration)); + srand(hash_int(table, iteration)); for (i = 0; i < MAX_RULES; i++) { priorities[i] = i * 129; } @@ -884,7 +883,7 @@ test_many_rules_in_one_table(void) struct tcls tcls; int i; - srand(hash_bytes(&table, sizeof table, iteration)); + srand(hash_int(table, iteration)); for (i = 0; i < MAX_RULES; i++) { priorities[i] = i * 129; } @@ -899,8 +898,7 @@ test_many_rules_in_one_table(void) int wcf; wcf = random_wcf_in_table(table, priority); - rule = make_rule(wcf, priority, - hash_bytes(&priority, sizeof priority, 1)); + rule = make_rule(wcf, priority, hash_int(priority, 1)); tcls_insert(&tcls, rule); assert(!classifier_insert(&cls, &rule->cls_rule)); check_tables(&cls, 1, -1, i + 1); diff --git a/tests/test-hash.c b/tests/test-hash.c index 610fad41..473c7b0f 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -51,43 +51,39 @@ set_bit(uint32_t array[3], int bit) } } -int -main(void) +static uint32_t +hash_words_cb(uint32_t input) +{ + return hash_words(&input, 1, 0); +} + +static uint32_t +hash_int_cb(uint32_t input) +{ + return hash_int(input, 0); +} + +static void +check_word_hash(uint32_t (*hash)(uint32_t), const char *name, + int min_unique) { int i, j; - /* Check that all hash functions of with one 1-bit (or no 1-bits) set - * within a single 32-bit word have different values in all 11-bit - * consecutive runs. - * - * Given a random distribution, the probability of at least one collision - * in any set of 11 bits is approximately - * - * 1 - ((2**11 - 1)/2**11)**C(33,2) - * == 1 - (2047/2048)**528 - * =~ 0.22 - * - * There are 21 ways to pick 11 consecutive bits in a 32-bit word, so - * if we assumed independence then the chance of having no collisions in - * any of those 11-bit runs would be 0.22**21 =~ 1.5e-14. Obviously - * independence must be a bad assumption :-) - */ for (i = 0; i <= 32; i++) { uint32_t in1 = i < 32 ? UINT32_C(1) << i : 0; for (j = i + 1; j <= 32; j++) { uint32_t in2 = j < 32 ? UINT32_C(1) << j : 0; - uint32_t out1 = hash_words(&in1, 1, 0); - uint32_t out2 = hash_words(&in2, 1, 0); - const int min_unique = 11; + uint32_t out1 = hash(in1); + uint32_t out2 = hash(in2); const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; int ofs; for (ofs = 0; ofs < 32 - min_unique; ofs++) { uint32_t bits1 = (out1 >> ofs) & unique_mask; uint32_t bits2 = (out2 >> ofs) & unique_mask; if (bits1 == bits2) { - printf("Partial collision:\n"); - printf("hash(%08"PRIx32") == %08"PRIx32"\n", in1, out1); - printf("hash(%08"PRIx32") == %08"PRIx32"\n", in2, out2); + printf("Partial collision for '%s':\n", name); + printf("%s(%08"PRIx32") = %08"PRIx32"\n", name, in1, out1); + printf("%s(%08"PRIx32") = %08"PRIx32"\n", name, in2, out2); printf("%d bits of output starting at bit %d " "are both 0x%"PRIx32"\n", min_unique, ofs, bits1); exit(1); @@ -95,6 +91,30 @@ main(void) } } } +} + +int +main(void) +{ + int i, j; + + /* Check that all hashes computed with hash_words with one 1-bit (or no + * 1-bits) set within a single 32-bit word have different values in all + * 11-bit consecutive runs. + * + * Given a random distribution, the probability of at least one collision + * in any set of 11 bits is approximately + * + * 1 - ((2**11 - 1)/2**11)**C(33,2) + * == 1 - (2047/2048)**528 + * =~ 0.22 + * + * There are 21 ways to pick 11 consecutive bits in a 32-bit word, so if we + * assumed independence then the chance of having no collisions in any of + * those 11-bit runs would be (1-0.22)**21 =~ .0044. Obviously + * independence must be a bad assumption :-) + */ + check_word_hash(hash_words_cb, "hash_words", 11); /* Check that all hash functions of with one 1-bit (or no 1-bits) set * within three 32-bit words have different values in their lowest 12 @@ -130,5 +150,23 @@ main(void) } } } + + /* Check that all hashes computed with hash_int with one 1-bit (or no + * 1-bits) set within a single 32-bit word have different values in all + * 14-bit consecutive runs. + * + * Given a random distribution, the probability of at least one collision + * in any set of 14 bits is approximately + * + * 1 - ((2**14 - 1)/2**14)**C(33,2) + * == 1 - (16,383/16,834)**528 + * =~ 0.031 + * + * There are 18 ways to pick 14 consecutive bits in a 32-bit word, so if we + * assumed independence then the chance of having no collisions in any of + * those 14-bit runs would be (1-0.03)**18 =~ 0.56. This seems reasonable. + */ + check_word_hash(hash_int_cb, "hash_int", 14); + return 0; } diff --git a/tests/test-hmap.c b/tests/test-hmap.c index 962389fa..684a5bae 100644 --- a/tests/test-hmap.c +++ b/tests/test-hmap.c @@ -137,8 +137,7 @@ identity_hash(int value) static size_t good_hash(int value) { - const uint32_t x = value; - return hash_words(&x, 1, 0x1234abcd); + return hash_int(value, 0x1234abcd); } static size_t diff --git a/vswitchd/proc-net-compat.c b/vswitchd/proc-net-compat.c index a5b86ea8..b74883eb 100644 --- a/vswitchd/proc-net-compat.c +++ b/vswitchd/proc-net-compat.c @@ -296,7 +296,7 @@ remove_tagged_dev(struct shash_node *node, const char *tagged_dev) static uint32_t hash_vlan(const char *trunk_dev, uint32_t vid) { - return hash_words(&vid, 1, hash_string(trunk_dev, 0)); + return hash_int(vid, hash_string(trunk_dev, 0)); } /* Update /proc/net/vlan/ for 'vlan'. */ -- 2.30.2