X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fhash.c;h=9961612ccf0ec2f5b24e61c2dd2650e9250cfc16;hb=ccf2f45c091ce1555b4e2a36186c501675c18a59;hp=94e6995b5635722ac19301855812f8b517576c8d;hpb=2e0595dd8e344dbdcab740d7d2a3b67d153d6b39;p=pspp-builds.git diff --git a/src/hash.c b/src/hash.c index 94e6995b..9961612c 100644 --- a/src/hash.c +++ b/src/hash.c @@ -18,13 +18,28 @@ 02111-1307, USA. */ #include +#include "hash.h" #include #include #include +#include "algorithm.h" #include "alloc.h" -#include "hash.h" +#include "misc.h" #include "str.h" +/* Hash table. */ +struct hsh_table + { + size_t used; /* Number of filled entries. */ + size_t size; /* Number of entries (a power of 2). */ + void **entries; /* Hash table proper. */ + + void *aux; /* Auxiliary data for comparison functions. */ + hsh_compare_func *compare; + hsh_hash_func *hash; + hsh_free_func *free; + }; + /* Note for constructing hash functions: You can store the hash values in the records, then compare hash @@ -41,79 +56,105 @@ /* Prime numbers and hash functions. */ -static int hsh_prime_tab[] = +/* Returns smallest power of 2 greater than X. */ +static size_t +next_power_of_2 (size_t x) { - 13, 31, 47, 67, 131, 257, 521, 1031, 2053, 4099, 8209, 16411, - 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 4194319, - 8388617, 16777259, 33554467, 67108879, 134217757, 268435459, - 536870923, 1073741827, INT_MAX, -}; - -/* Returns pointer into hsh_prime_tab[], pointing to the first prime - in the table greater than X. */ -int * -hsh_next_prime (int x) + assert (x != 0); + + for (;;) + { + /* Turn off rightmost 1-bit in x. */ + size_t y = x & (x - 1); + + /* If y is 0 then x only had a single 1-bit. */ + if (y == 0) + return 2 * x; + + /* Otherwise turn off the next. */ + x = y; + } +} + +/* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */ +#define FNV_32_PRIME 16777619u +#define FNV_32_BASIS 2166136261u + +/* Fowler-Noll-Vo 32-bit hash, for bytes. */ +unsigned +hsh_hash_bytes (const void *buf_, size_t size) { - int *p; + const unsigned char *buf = buf_; + unsigned hash; + + assert (buf != NULL); - assert (x >= 0); + hash = FNV_32_BASIS; + while (size-- > 0) + hash = (hash * FNV_32_PRIME) ^ *buf++; - for (p = hsh_prime_tab; *p < x; p++) - ; + return hash; +} - assert (*p != INT_MAX); +/* Fowler-Noll-Vo 32-bit hash, for strings. */ +unsigned +hsh_hash_string (const char *s_) +{ + const unsigned char *s = s_; + unsigned hash; + + assert (s != NULL); - return p; + hash = FNV_32_BASIS; + while (*s != '\0') + hash = (hash * FNV_32_PRIME) ^ *s++; + + return hash; } -/* P.J. Weinberger's hash function, recommended by the Red Dragon - Book. Hashes the d-string between S1 and S2. Returns unbounded - nonnegative result. */ -int -hashpjw_d (const char *s1, const char *s2) +/* Hash for ints. */ +unsigned +hsh_hash_int (int i) { - const char *p; - unsigned g, h; - - for (h = 0, p = s1; p < s2; p++) - { - h = (h << 4) + *(unsigned char *) p; - g = h & 0xf0000000; - h ^= (g >> 24) | g; - } - return abs ((int) h); + return hsh_hash_bytes (&i, sizeof i); } -/* Alternate entry point for hashpjw_d() that takes an s-string. */ -int -hashpjw (const char *s) +/* Hash for double. */ +unsigned +hsh_hash_double (double d) { - return hashpjw_d (s, &s[strlen (s)]); + if (!isnan (d)) + return hsh_hash_bytes (&d, sizeof d); + else + return 0; } -/*hash tables. */ +/* Hash tables. */ /* Creates a hash table with at least M entries. COMPARE is a function that compares two entries and returns 0 if they are identical, nonzero otherwise; HASH returns a nonnegative hash value for an entry; FREE destroys an entry. */ struct hsh_table * -hsh_create (int m, - int (*compare) (const void *, const void *, void *param), - unsigned (*hash) (const void *, void *param), - void (*free) (void *, void *param), - void *param) +hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash, + hsh_free_func *free, void *aux) { - struct hsh_table *h = xmalloc (sizeof *h); + struct hsh_table *h; int i; - h->n = 0; - h->mp = hsh_next_prime (m); - h->m = *h->mp++; - h->table = xmalloc (sizeof *h->table * h->m); - for (i = 0; i < h->m; i++) - h->table[i] = NULL; - h->param = param; + assert (size > 0); + assert (compare != NULL); + assert (hash != NULL); + + h = xmalloc (sizeof *h); + h->used = 0; + if (size < 4) + size = 4; + h->size = next_power_of_2 (size); + h->entries = xmalloc (sizeof *h->entries * h->size); + for (i = 0; i < h->size; i++) + h->entries[i] = NULL; + h->aux = aux; h->compare = compare; h->hash = hash; h->free = free; @@ -126,20 +167,14 @@ hsh_clear (struct hsh_table *h) { int i; + assert (h != NULL); if (h->free) - for (i = 0; i < h->m; i++) - h->free (h->table[i], h->param); - - if (h->m >= 128) - { - free (h->table); - h->mp = hsh_next_prime (31); - h->m = *h->mp++; - h->table = xmalloc (sizeof *h->table * h->m); - } + for (i = 0; i < h->size; i++) + if (h->entries[i] != NULL) + h->free (h->entries[i], h->aux); - for (i = 0; i < h->m; i++) - h->table[i] = NULL; + for (i = 0; i < h->size; i++) + h->entries[i] = NULL; } /* Destroys table H and all its contents. */ @@ -148,173 +183,314 @@ hsh_destroy (struct hsh_table *h) { int i; - if (h == NULL) - return; - if (h->free) - for (i = 0; i < h->m; i++) - { - void *p = h->table[i]; - if (p) - h->free (p, h->param); - } - free (h->table); - free (h); + if (h != NULL) + { + if (h->free) + for (i = 0; i < h->size; i++) + if (h->entries[i] != NULL) + h->free (h->entries[i], h->aux); + free (h->entries); + free (h); + } } -/* Increases the capacity of H. */ -void -hsh_rehash (struct hsh_table *h) +/* Changes the capacity of H to NEW_SIZE. */ +static void +hsh_rehash (struct hsh_table *h, size_t new_size) { - void **begin = h->table; - void **end = &h->table[h->m]; - void **table_p; + void **begin, **end, **table_p; int i; - h->m = *h->mp++; - h->table = xmalloc (sizeof *h->table * h->m); - for (i = 0; i < h->m; i++) - h->table[i] = NULL; + assert (h != NULL); + assert (new_size >= h->used); + + begin = h->entries; + end = begin + h->size; + + h->size = new_size; + h->entries = xmalloc (sizeof *h->entries * h->size); + for (i = 0; i < h->size; i++) + h->entries[i] = NULL; for (table_p = begin; table_p < end; table_p++) { void **entry; if (*table_p == NULL) continue; - entry = &h->table[h->hash (*table_p, h->param) % h->m]; + entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)]; while (*entry) - if (--entry < h->table) - entry = &h->table[h->m - 1]; + if (--entry < h->entries) + entry = &h->entries[h->size - 1]; *entry = *table_p; } free (begin); } -/* Static variables for hsh_sort(). */ -static void *hsh_param; -static int (*hsh_compare) (const void *, const void *, void *param); +/* A "algo_predicate_func" that returns nonzero if DATA points + to a non-null void. */ +static int +not_null (const void *data_, void *aux UNUSED) +{ + void *const *data = data_; + + return *data != NULL; +} + +/* Compacts hash table H and returns a pointer to its data. The + returned data consists of hsh_count(H) non-null pointers, in + no particular order, followed by a null pointer. After + calling this function, only hsh_destroy() and hsh_count() may + be applied to H. */ +void ** +hsh_data (struct hsh_table *h) +{ + size_t n; -/* hsh_sort() helper function that ensures NULLs are sorted after the - rest of the table. */ + assert (h != NULL); + n = partition (h->entries, h->size, sizeof *h->entries, + not_null, NULL); + assert (n == h->used); + return h->entries; +} + +/* Dereferences void ** pointers and passes them to the hash + comparison function. */ static int -internal_comparison_fn (const void *pa, const void *pb) +comparison_helper (const void *a_, const void *b_, void *h_) { - void *a = *(void **) pa; - void *b = *(void **) pb; - return a == NULL ? 1 : (b == NULL ? -1 : hsh_compare (a, b, hsh_param)); + void *const *a = a_; + void *const *b = b_; + struct hsh_table *h = h_; + + return h->compare (*a, *b, h->aux); } -/* Sorts hash table H based on function COMPARE. NULLs are sent to - the end of the table. The resultant table is returned (it is - guaranteed to be NULL-terminated). H should not be used again as a - hash table until and unless hsh_clear() called. */ +/* Sorts hash table H based on hash comparison function. The + returned data consists of hsh_count(H) non-null pointers, + sorted in order of the hash comparison function, followed by a + null pointer. After calling this function, only hsh_destroy() + and hsh_count() may be applied to H. */ void ** -hsh_sort (struct hsh_table *h, - int (*compare) (const void *, const void *, void *param)) +hsh_sort (struct hsh_table *h) { -#if GLOBAL_DEBUGGING - static int reentrant; - if (reentrant) - abort (); - reentrant++; -#endif - hsh_param = h->param; - hsh_compare = compare ? compare : h->compare; - qsort (h->table, h->m, sizeof *h->table, internal_comparison_fn); -#if GLOBAL_DEBUGGING - reentrant--; -#endif - return h->table; + assert (h != NULL); + + hsh_data (h); + sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h); + return h->entries; +} + +/* Makes and returns a copy of the pointers to the data in H. + The returned data consists of hsh_count(H) non-null pointers, + in no particular order, followed by a null pointer. The hash + table is not modified. The caller is responsible for freeing + the allocated data. */ +void ** +hsh_data_copy (struct hsh_table *h) +{ + void **copy; + + assert (h != NULL); + copy = xmalloc ((h->used + 1) * sizeof *copy); + copy_if (h->entries, h->size, sizeof *h->entries, copy, + not_null, NULL); + copy[h->used] = NULL; + return copy; +} + +/* Makes and returns a copy of the pointers to the data in H. + The returned data consists of hsh_count(H) non-null pointers, + sorted in order of the hash comparison function, followed by a + null pointer. The hash table is not modified. The caller is + responsible for freeing the allocated data. */ +void ** +hsh_sort_copy (struct hsh_table *h) +{ + void **copy; + + assert (h != NULL); + copy = hsh_data_copy (h); + sort (copy, h->used, sizeof *copy, comparison_helper, h); + return copy; } /* Hash entries. */ -/* Searches hash table H for TARGET. If found, returns a pointer to a - pointer to that entry; otherwise returns a pointer to a NULL entry - which _must_ be used to insert a new entry having the same key - data. */ +/* Searches hash table H for TARGET. If found, returns a pointer + to a pointer to that entry; otherwise returns a pointer to a + NULL entry which *must* be used to insert a new entry having + the same key data. */ inline void ** hsh_probe (struct hsh_table *h, const void *target) { void **entry; + assert (h != NULL); + assert (target != NULL); + /* Order of these statements is important! */ - if (h->n > h->m / 2) - hsh_rehash (h); - entry = &h->table[h->hash (target, h->param) % h->m]; + if (h->used > h->size / 2) + hsh_rehash (h, h->size * 2); + entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; while (*entry) { - if (!h->compare (*entry, target, h->param)) + if (!h->compare (*entry, target, h->aux)) return entry; - - if (--entry < h->table) - entry = &h->table[h->m - 1]; + if (--entry < h->entries) + entry = &h->entries[h->size - 1]; } - h->n++; + h->used++; return entry; } -/* Returns the entry in hash table H that matches TARGET, NULL if - there is none. */ +/* Searches hash table H for TARGET. If not found, inserts + TARGET and returns a null pointer. If found, returns the + match, without replacing it in the table. */ void * -hsh_find (struct hsh_table *h, const void *target) +hsh_insert (struct hsh_table *h, void *target) +{ + void **entry; + + assert (h != NULL); + assert (target != NULL); + + entry = hsh_probe (h, target); + if (*entry == NULL) + { + *entry = target; + return NULL; + } + else + return *entry; +} + +/* Searches hash table H for TARGET. If not found, inserts + TARGET and returns a null pointer. If found, returns the + match, after replacing it in the table by TARGET. */ +void * +hsh_replace (struct hsh_table *h, void *target) +{ + void **entry = hsh_probe (h, target); + void *old = *entry; + *entry = target; + return old; +} + +/* Locates an entry matching TARGET. Returns a pointer to the + entry, or a null pointer on failure. */ +static inline void ** +locate_matching_entry (struct hsh_table *h, const void *target) { - void **entry = &h->table[h->hash (target, h->param) % h->m]; + void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; while (*entry) { - if (!h->compare (*entry, target, h->param)) - return *entry; - if (--entry < h->table) - entry = &h->table[h->m - 1]; + if (!h->compare (*entry, target, h->aux)) + return entry; + if (--entry < h->entries) + entry = &h->entries[h->size - 1]; } return NULL; } -/* Iterates throught hash table TABLE with iterator ITER. Returns the - next non-NULL entry in TABLE, or NULL after the last non-NULL - entry. After NULL is returned, ITER is returned to a condition in - which hsh_foreach() will return the first non-NULL entry if any on - the next call. Do not add entries to TABLE between call to - hsh_foreach() between NULL returns. - - Before calling hsh_foreach with a particular iterator for the first - time, you must initialize the iterator with a call to - hsh_iterator_init. */ +/* Returns the entry in hash table H that matches TARGET, or NULL + if there is none. */ void * -hsh_foreach (struct hsh_table *table, struct hsh_iterator *iter) +hsh_find (struct hsh_table *h, const void *target) { - int i; + void **entry = locate_matching_entry (h, target); + return entry != NULL ? *entry : NULL; +} + +/* Deletes the entry in hash table H that matches TARGET. + Returns nonzero if an entry was deleted. - if (!table) - return NULL; - if (!iter->init) + Note: this function is very slow because it rehashes the + entire table. Don't use this hash table implementation if + deletion is a common operation. */ +int +hsh_delete (struct hsh_table *h, const void *target) +{ + void **entry = locate_matching_entry (h, target); + if (entry != NULL) { - iter->init = 1; - iter->next = 0; + if (h->free != NULL) + h->free (*entry, h->aux); + *entry = 0; + hsh_rehash (h, h->size); + return 1; } - for (i = iter->next; i < table->m; i++) - if (table->table[i]) + else + return 0; +} + +/* Iteration. */ + +/* Finds and returns an entry in TABLE, and initializes ITER for + use with hsh_next(). If TABLE is empty, returns a null + pointer. */ +void * +hsh_first (struct hsh_table *h, struct hsh_iterator *iter) +{ + assert (h != NULL); + assert (iter != NULL); + + iter->next = 0; + return hsh_next (h, iter); +} + +/* Iterates through TABLE with iterator ITER. Returns the next + entry in TABLE, or a null pointer after the last entry. + + Entries are returned in an undefined order. Modifying TABLE + during iteration may cause some entries to be returned + multiple times or not at all. */ +void * +hsh_next (struct hsh_table *h, struct hsh_iterator *iter) +{ + size_t i; + + assert (h != NULL); + assert (iter != NULL); + assert (iter->next <= h->size); + + for (i = iter->next; i < h->size; i++) + if (h->entries[i]) { iter->next = i + 1; - return table->table[i]; + return h->entries[i]; } - iter->init = 0; + + iter->next = h->size; return NULL; } + +/* Returns the number of items in H. */ +size_t +hsh_count (struct hsh_table *h) +{ + assert (h != NULL); + + return h->used; +} + +/* Debug helpers. */ #if GLOBAL_DEBUGGING +#undef NDEBUG +#include #include /* Displays contents of hash table H on stdout. */ void hsh_dump (struct hsh_table *h) { - void **entry = h->table; + void **entry = h->entries; int i; printf (_("hash table:")); - for (i = 0; i < h->m; i++) + for (i = 0; i < h->size; i++) printf (" %p", *entry++); printf ("\n"); } @@ -323,11 +499,10 @@ hsh_dump (struct hsh_table *h) to a NULL pointer. This function is used when it is known that the entry to be inserted does not already exist in the table. */ void -force_hsh_insert (struct hsh_table *h, void *p) +hsh_force_insert (struct hsh_table *h, void *p) { void **pp = hsh_probe (h, p); - if (*pp != NULL) - assert (0); + assert (*pp == NULL); *pp = p; } @@ -335,11 +510,19 @@ force_hsh_insert (struct hsh_table *h, void *p) This function is for use when it is known that the entry being searched for must exist in the table. */ void * -force_hsh_find (struct hsh_table *h, const void *p) +hsh_force_find (struct hsh_table *h, const void *target) +{ + void *found = hsh_find (h, target); + assert (found != NULL); + return found; +} + +/* This wrapper for hsh_delete() verifies that an item really was + deleted. */ +void +hsh_force_delete (struct hsh_table *h, const void *target) { - p = hsh_find (h, p); - if (p == NULL) - assert (0); - return (void *) p; + int found = hsh_delete (h, target); + assert (found != 0); } #endif