X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fhash.c;h=588b311b45774c37cc52d5476b4a79b24b589b17;hb=f9ce8d75ae81553d9aec05351cb2d12d0df33e5e;hp=39d47cb625bb158871d21652c53efcea16542281;hpb=b18e1b9c95a478d434e9fcef9d8579d0b96b9a8d;p=pspp diff --git a/src/hash.c b/src/hash.c index 39d47cb625..588b311b45 100644 --- a/src/hash.c +++ b/src/hash.c @@ -18,12 +18,13 @@ 02111-1307, USA. */ #include -#include +#include "hash.h" +#include "error.h" #include #include +#include "algorithm.h" #include "alloc.h" -#include "hash.h" -#include "quicksort.h" +#include "misc.h" #include "str.h" /* Hash table. */ @@ -33,7 +34,7 @@ struct hsh_table size_t size; /* Number of entries (a power of 2). */ void **entries; /* Hash table proper. */ - void *param; + void *aux; /* Auxiliary data for comparison functions. */ hsh_compare_func *compare; hsh_hash_func *hash; hsh_free_func *free; @@ -75,39 +76,39 @@ next_power_of_2 (size_t x) } } -/* Colin Plumb's "one-at-a-time" hash, for bytes. */ +/* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */ +#define FNV_32_PRIME 16777619u +#define FNV_32_BASIS 2166136261u + +/* Fowler-Noll-Vo 32-bit hash, for bytes. */ unsigned hsh_hash_bytes (const void *buf_, size_t size) { const unsigned char *buf = buf_; - unsigned hash = 0; - while (size-- > 0) - { - hash += *buf++; - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); + unsigned hash; + + assert (buf != NULL); + + hash = FNV_32_BASIS; + while (size-- > 0) + hash = (hash * FNV_32_PRIME) ^ *buf++; + return hash; } -/* Colin Plumb's "one-at-a-time" hash, for strings. */ +/* Fowler-Noll-Vo 32-bit hash, for strings. */ unsigned hsh_hash_string (const char *s_) { const unsigned char *s = s_; - unsigned hash = 0; - while (*s != '\0') - { - hash += *s++; - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); + unsigned hash; + + assert (s != NULL); + + hash = FNV_32_BASIS; + while (*s != '\0') + hash = (hash * FNV_32_PRIME) ^ *s++; + return hash; } @@ -117,6 +118,16 @@ hsh_hash_int (int i) { return hsh_hash_bytes (&i, sizeof i); } + +/* Hash for double. */ +unsigned +hsh_hash_double (double d) +{ + if (!isnan (d)) + return hsh_hash_bytes (&d, sizeof d); + else + return 0; +} /* Hash tables. */ @@ -126,17 +137,24 @@ hsh_hash_int (int i) for an entry; FREE destroys an entry. */ struct hsh_table * hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash, - hsh_free_func *free, void *param) + hsh_free_func *free, void *aux) { - struct hsh_table *h = xmalloc (sizeof *h); + struct hsh_table *h; int i; + assert (size > 0); + assert (compare != NULL); + assert (hash != NULL); + + h = xmalloc (sizeof *h); h->used = 0; + if (size < 4) + size = 4; h->size = next_power_of_2 (size); h->entries = xmalloc (sizeof *h->entries * h->size); for (i = 0; i < h->size; i++) h->entries[i] = NULL; - h->param = param; + h->aux = aux; h->compare = compare; h->hash = hash; h->free = free; @@ -149,10 +167,11 @@ hsh_clear (struct hsh_table *h) { int i; + assert (h != NULL); if (h->free) for (i = 0; i < h->size; i++) if (h->entries[i] != NULL) - h->free (h->entries[i], h->param); + h->free (h->entries[i], h->aux); for (i = 0; i < h->size; i++) h->entries[i] = NULL; @@ -164,121 +183,210 @@ hsh_destroy (struct hsh_table *h) { int i; - if (h == NULL) - return; - if (h->free) - for (i = 0; i < h->size; i++) - if (h->entries[i] != NULL) - h->free (h->entries[i], h->param); - free (h->entries); - free (h); + if (h != NULL) + { + if (h->free) + for (i = 0; i < h->size; i++) + if (h->entries[i] != NULL) + h->free (h->entries[i], h->aux); + free (h->entries); + free (h); + } } /* Changes the capacity of H to NEW_SIZE. */ static void hsh_rehash (struct hsh_table *h, size_t new_size) { - void **begin = h->entries; - void **end = &h->entries[h->size]; - void **table_p; + void **begin, **end, **table_p; int i; + assert (h != NULL); + assert (new_size >= h->used); + + begin = h->entries; + end = begin + h->size; + h->size = new_size; h->entries = xmalloc (sizeof *h->entries * h->size); for (i = 0; i < h->size; i++) h->entries[i] = NULL; for (table_p = begin; table_p < end; table_p++) - { - void **entry; - - if (*table_p == NULL) - continue; - entry = &h->entries[h->hash (*table_p, h->param) & (h->size - 1)]; - while (*entry) - if (--entry < h->entries) - entry = &h->entries[h->size - 1]; - *entry = *table_p; - } + if (*table_p != NULL) + { + void **entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)]; + while (*entry != NULL) + if (++entry >= h->entries + h->size) + entry = h->entries; + *entry = *table_p; + } free (begin); } -/* hsh_sort() helper function that ensures NULLs are sorted after the - rest of the table. */ +/* A "algo_predicate_func" that returns nonzero if DATA points + to a non-null void. */ static int -sort_nulls_last (const void *a_, const void *b_, void *h_) +not_null (const void *data_, void *aux UNUSED) { - void *a = *(void **) a_; - void *b = *(void **) b_; + void *const *data = data_; + + return *data != NULL; +} + +/* Compacts hash table H and returns a pointer to its data. The + returned data consists of hsh_count(H) non-null pointers, in + no particular order, followed by a null pointer. After + calling this function, only hsh_destroy() and hsh_count() may + be applied to H. */ +void ** +hsh_data (struct hsh_table *h) +{ + size_t n; + + assert (h != NULL); + n = partition (h->entries, h->size, sizeof *h->entries, + not_null, NULL); + assert (n == h->used); + return h->entries; +} + +/* Dereferences void ** pointers and passes them to the hash + comparison function. */ +static int +comparison_helper (const void *a_, const void *b_, void *h_) +{ + void *const *a = a_; + void *const *b = b_; struct hsh_table *h = h_; - if (a != NULL) - { - if (b != NULL) - return h->compare (a, b, h->param); - else - return -1; - } - else - { - if (b != NULL) - return +1; - else - return 0; - } + return h->compare (*a, *b, h->aux); } -/* Sorts hash table H based on hash comparison function. NULLs - are sent to the end of the table. The resultant table is - returned (it is guaranteed to be NULL-terminated). H should - not be used again as a hash table until and unless hsh_clear() - called. */ +/* Sorts hash table H based on hash comparison function. The + returned data consists of hsh_count(H) non-null pointers, + sorted in order of the hash comparison function, followed by a + null pointer. After calling this function, only hsh_destroy() + and hsh_count() may be applied to H. */ void ** hsh_sort (struct hsh_table *h) { - quicksort (h->entries, h->size, sizeof *h->entries, sort_nulls_last, h); + assert (h != NULL); + + hsh_data (h); + sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h); return h->entries; } + +/* Makes and returns a copy of the pointers to the data in H. + The returned data consists of hsh_count(H) non-null pointers, + in no particular order, followed by a null pointer. The hash + table is not modified. The caller is responsible for freeing + the allocated data. */ +void ** +hsh_data_copy (struct hsh_table *h) +{ + void **copy; + + assert (h != NULL); + copy = xmalloc ((h->used + 1) * sizeof *copy); + copy_if (h->entries, h->size, sizeof *h->entries, copy, + not_null, NULL); + copy[h->used] = NULL; + return copy; +} + +/* Makes and returns a copy of the pointers to the data in H. + The returned data consists of hsh_count(H) non-null pointers, + sorted in order of the hash comparison function, followed by a + null pointer. The hash table is not modified. The caller is + responsible for freeing the allocated data. */ +void ** +hsh_sort_copy (struct hsh_table *h) +{ + void **copy; + + assert (h != NULL); + copy = hsh_data_copy (h); + sort (copy, h->used, sizeof *copy, comparison_helper, h); + return copy; +} /* Hash entries. */ -/* Searches hash table H for TARGET. If found, returns a pointer to a - pointer to that entry; otherwise returns a pointer to a NULL entry - which _must_ be used to insert a new entry having the same key - data. */ +/* Searches hash table H for TARGET. If found, returns a pointer + to a pointer to that entry; otherwise returns a pointer to a + NULL entry which *must* be used to insert a new entry having + the same key data. */ inline void ** hsh_probe (struct hsh_table *h, const void *target) { void **entry; + assert (h != NULL); + assert (target != NULL); + /* Order of these statements is important! */ if (h->used > h->size / 2) hsh_rehash (h, h->size * 2); - entry = &h->entries[h->hash (target, h->param) & (h->size - 1)]; + entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; while (*entry) { - if (!h->compare (*entry, target, h->param)) + if (!h->compare (*entry, target, h->aux)) return entry; - if (--entry < h->entries) - entry = &h->entries[h->size - 1]; + if (++entry >= h->entries + h->size) + entry = h->entries; } h->used++; return entry; } +/* Searches hash table H for TARGET. If not found, inserts + TARGET and returns a null pointer. If found, returns the + match, without replacing it in the table. */ +void * +hsh_insert (struct hsh_table *h, void *target) +{ + void **entry; + + assert (h != NULL); + assert (target != NULL); + + entry = hsh_probe (h, target); + if (*entry == NULL) + { + *entry = target; + return NULL; + } + else + return *entry; +} + +/* Searches hash table H for TARGET. If not found, inserts + TARGET and returns a null pointer. If found, returns the + match, after replacing it in the table by TARGET. */ +void * +hsh_replace (struct hsh_table *h, void *target) +{ + void **entry = hsh_probe (h, target); + void *old = *entry; + *entry = target; + return old; +} + /* Locates an entry matching TARGET. Returns a pointer to the entry, or a null pointer on failure. */ static inline void ** locate_matching_entry (struct hsh_table *h, const void *target) { - void **entry = &h->entries[h->hash (target, h->param) & (h->size - 1)]; + void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; while (*entry) { - if (!h->compare (*entry, target, h->param)) + if (!h->compare (*entry, target, h->aux)) return entry; - if (--entry < h->entries) - entry = &h->entries[h->size - 1]; + if (++entry >= h->entries + h->size) + entry = h->entries; } return NULL; } @@ -295,19 +403,43 @@ hsh_find (struct hsh_table *h, const void *target) /* Deletes the entry in hash table H that matches TARGET. Returns nonzero if an entry was deleted. - Note: this function is very slow because it rehashes the - entire table. Don't use this hash table implementation if - deletion is a common operation. */ + Uses Knuth's Algorithm 6.4R (Deletion with linear probing). + Because our load factor is at most 1/2, the average number of + moves that this algorithm makes should be at most 2 - ln 2 ~= + 1.65. + + Not well tested. */ int hsh_delete (struct hsh_table *h, const void *target) { void **entry = locate_matching_entry (h, target); - if (h->free != NULL) + if (entry != NULL) { - h->free (*entry, h->param); + ptrdiff_t i; + + h->used--; + if (h->free != NULL) + h->free (*entry, h->aux); *entry = 0; - hsh_rehash (h, h->size); - return 1; + + i = entry - h->entries; + for (;;) + { + unsigned r; + ptrdiff_t j = i; + + do + { + if (--i < 0) + i = h->size - 1; + if (h->entries[i] == NULL) + return 1; + + r = h->hash (h->entries[i], h->aux) & (h->size - 1); + } + while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + h->entries[i] = h->entries[j]; + } } else return 0; @@ -367,7 +499,7 @@ hsh_count (struct hsh_table *h) #if GLOBAL_DEBUGGING #undef NDEBUG -#include +#include "error.h" #include /* Displays contents of hash table H on stdout. */