X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fhash.c;h=bcf5244f049ca64be73130476386a7e95e6cf586;hb=92fb12eb06716d14c05b781f5d9dcde956d77c30;hp=f42901ea8e5479d42c6f0ee0ad0e3c603badc1a6;hpb=205ac3afa4c2b19c85819d8695abf3975bb11807;p=pspp diff --git a/src/hash.c b/src/hash.c index f42901ea8e..bcf5244f04 100644 --- a/src/hash.c +++ b/src/hash.c @@ -14,31 +14,24 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "hash.h" #include "error.h" +#include +#include #include #include #include "algorithm.h" #include "alloc.h" +#include #include "misc.h" #include "str.h" -/* Hash table. */ -struct hsh_table - { - size_t used; /* Number of filled entries. */ - size_t size; /* Number of entries (a power of 2). */ - void **entries; /* Hash table proper. */ - - void *aux; /* Auxiliary data for comparison functions. */ - hsh_compare_func *compare; - hsh_hash_func *hash; - hsh_free_func *free; - }; +#include "gettext.h" +#define _(msgid) gettext (msgid) /* Note for constructing hash functions: @@ -84,7 +77,7 @@ next_power_of_2 (size_t x) unsigned hsh_hash_bytes (const void *buf_, size_t size) { - const unsigned char *buf = buf_; + const unsigned char *buf = (const unsigned char *) buf_; unsigned hash; assert (buf != NULL); @@ -100,7 +93,7 @@ hsh_hash_bytes (const void *buf_, size_t size) unsigned hsh_hash_string (const char *s_) { - const unsigned char *s = s_; + const unsigned char *s = (const unsigned char *) s_; unsigned hash; assert (s != NULL); @@ -112,6 +105,22 @@ hsh_hash_string (const char *s_) return hash; } +/* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */ +unsigned +hsh_hash_case_string (const char *s_) +{ + const unsigned char *s = (const unsigned char *) s_; + unsigned hash; + + assert (s != NULL); + + hash = FNV_32_BASIS; + while (*s != '\0') + hash = (hash * FNV_32_PRIME) ^ toupper (*s++); + + return hash; +} + /* Hash for ints. */ unsigned hsh_hash_int (int i) @@ -131,6 +140,25 @@ hsh_hash_double (double d) /* Hash tables. */ +/* Hash table. */ +struct hsh_table + { + size_t used; /* Number of filled entries. */ + size_t size; /* Number of entries (a power of 2). */ + void **entries; /* Hash table proper. */ + + void *aux; /* Auxiliary data for comparison functions. */ + hsh_compare_func *compare; + hsh_hash_func *hash; + hsh_free_func *free; + +#ifndef NDEBUG + /* Set to false if hsh_data() or hsh_sort() has been called, + so that most hsh_*() functions may no longer be called. */ + bool hash_ordered; +#endif + }; + /* Creates a hash table with at least M entries. COMPARE is a function that compares two entries and returns 0 if they are identical, nonzero otherwise; HASH returns a nonnegative hash value @@ -142,7 +170,6 @@ hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash, struct hsh_table *h; int i; - assert (size > 0); assert (compare != NULL); assert (hash != NULL); @@ -151,13 +178,16 @@ hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash, if (size < 4) size = 4; h->size = next_power_of_2 (size); - h->entries = xmalloc (sizeof *h->entries * h->size); + h->entries = xnmalloc (h->size, sizeof *h->entries); for (i = 0; i < h->size; i++) h->entries[i] = NULL; h->aux = aux; h->compare = compare; h->hash = hash; h->free = free; +#ifndef NDEBUG + h->hash_ordered = true; +#endif return h; } @@ -175,6 +205,12 @@ hsh_clear (struct hsh_table *h) for (i = 0; i < h->size; i++) h->entries[i] = NULL; + + h->used = 0; + +#ifndef NDEBUG + h->hash_ordered = true; +#endif } /* Destroys table H and all its contents. */ @@ -194,9 +230,30 @@ hsh_destroy (struct hsh_table *h) } } -/* Changes the capacity of H to NEW_SIZE. */ +/* Locates an entry matching TARGET. Returns a pointer to the + entry, or a null pointer on failure. */ +static inline unsigned +locate_matching_entry (struct hsh_table *h, const void *target) +{ + unsigned i = h->hash (target, h->aux); + + assert (h->hash_ordered); + for (;;) + { + void *entry; + i &= h->size - 1; + entry = h->entries[i]; + if (entry == NULL || !h->compare (entry, target, h->aux)) + return i; + i--; + } +} + +/* Changes the capacity of H to NEW_SIZE, which must be a + positive power of 2 at least as large as the number of + elements in H. */ static void -hsh_rehash (struct hsh_table *h, size_t new_size) +rehash (struct hsh_table *h, size_t new_size) { void **begin, **end, **table_p; int i; @@ -204,26 +261,27 @@ hsh_rehash (struct hsh_table *h, size_t new_size) assert (h != NULL); assert (new_size >= h->used); + /* Verify that NEW_SIZE is a positive power of 2. */ + assert (new_size > 0 && (new_size & (new_size - 1)) == 0); + begin = h->entries; end = begin + h->size; h->size = new_size; - h->entries = xmalloc (sizeof *h->entries * h->size); + h->entries = xnmalloc (h->size, sizeof *h->entries); for (i = 0; i < h->size; i++) h->entries[i] = NULL; - for (table_p = begin; table_p < end; table_p++) + for (table_p = begin; table_p < end; table_p++) { - void **entry; - - if (*table_p == NULL) - continue; - entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)]; - while (*entry) - if (--entry < h->entries) - entry = &h->entries[h->size - 1]; - *entry = *table_p; + void *entry = *table_p; + if (entry != NULL) + h->entries[locate_matching_entry (h, entry)] = entry; } free (begin); + +#ifndef NDEBUG + h->hash_ordered = true; +#endif } /* A "algo_predicate_func" that returns nonzero if DATA points @@ -238,18 +296,33 @@ not_null (const void *data_, void *aux UNUSED) /* Compacts hash table H and returns a pointer to its data. The returned data consists of hsh_count(H) non-null pointers, in - no particular order, followed by a null pointer. After - calling this function, only hsh_destroy() and hsh_count() may - be applied to H. */ -void ** + no particular order, followed by a null pointer. + + After calling this function, only hsh_destroy() and + hsh_count() should be applied to H. hsh_first() and + hsh_next() could also be used, but you're better off just + iterating through the returned array. + + This function is intended for use in situations where data + processing occurs in two phases. In the first phase, data is + added, removed, and searched for within a hash table. In the + second phase, the contents of the hash table are output and + the hash property itself is no longer of interest. + + Use hsh_sort() instead, if the second phase wants data in + sorted order. Use hsh_data_copy() or hsh_sort_copy() instead, + if the second phase still needs to search the hash table. */ +void *const * hsh_data (struct hsh_table *h) { size_t n; assert (h != NULL); - n = partition (h->entries, h->size, sizeof *h->entries, - not_null, NULL); + n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL); assert (n == h->used); +#ifndef NDEBUG + h->hash_ordered = false; +#endif return h->entries; } @@ -262,15 +335,33 @@ comparison_helper (const void *a_, const void *b_, void *h_) void *const *b = b_; struct hsh_table *h = h_; + assert(a); + assert(b); + return h->compare (*a, *b, h->aux); } /* Sorts hash table H based on hash comparison function. The returned data consists of hsh_count(H) non-null pointers, sorted in order of the hash comparison function, followed by a - null pointer. After calling this function, only hsh_destroy() - and hsh_count() may be applied to H. */ -void ** + null pointer. + + After calling this function, only hsh_destroy() and + hsh_count() should be applied to H. hsh_first() and + hsh_next() could also be used, but you're better off just + iterating through the returned array. + + This function is intended for use in situations where data + processing occurs in two phases. In the first phase, data is + added, removed, and searched for within a hash table. In the + second phase, the contents of the hash table are output and + the hash property itself is no longer of interest. + + Use hsh_data() instead, if the second phase doesn't need the + data in any particular order. Use hsh_data_copy() or + hsh_sort_copy() instead, if the second phase still needs to + search the hash table. */ +void *const * hsh_sort (struct hsh_table *h) { assert (h != NULL); @@ -284,16 +375,18 @@ hsh_sort (struct hsh_table *h) The returned data consists of hsh_count(H) non-null pointers, in no particular order, followed by a null pointer. The hash table is not modified. The caller is responsible for freeing - the allocated data. */ + the allocated data. + + If you don't need to search or modify the hash table, then + hsh_data() is a more efficient choice. */ void ** hsh_data_copy (struct hsh_table *h) { void **copy; assert (h != NULL); - copy = xmalloc ((h->used + 1) * sizeof *copy); - copy_if (h->entries, h->size, sizeof *h->entries, copy, - not_null, NULL); + copy = xnmalloc ((h->used + 1), sizeof *copy); + copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL); copy[h->used] = NULL; return copy; } @@ -302,7 +395,10 @@ hsh_data_copy (struct hsh_table *h) The returned data consists of hsh_count(H) non-null pointers, sorted in order of the hash comparison function, followed by a null pointer. The hash table is not modified. The caller is - responsible for freeing the allocated data. */ + responsible for freeing the allocated data. + + If you don't need to search or modify the hash table, then + hsh_sort() is a more efficient choice. */ void ** hsh_sort_copy (struct hsh_table *h) { @@ -323,25 +419,18 @@ hsh_sort_copy (struct hsh_table *h) inline void ** hsh_probe (struct hsh_table *h, const void *target) { - void **entry; - + unsigned i; + assert (h != NULL); assert (target != NULL); + assert (h->hash_ordered); - /* Order of these statements is important! */ if (h->used > h->size / 2) - hsh_rehash (h, h->size * 2); - entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; - - while (*entry) - { - if (!h->compare (*entry, target, h->aux)) - return entry; - if (--entry < h->entries) - entry = &h->entries[h->size - 1]; - } - h->used++; - return entry; + rehash (h, h->size * 2); + i = locate_matching_entry (h, target); + if (h->entries[i] == NULL) + h->used++; + return &h->entries[i]; } /* Searches hash table H for TARGET. If not found, inserts @@ -377,49 +466,49 @@ hsh_replace (struct hsh_table *h, void *target) return old; } -/* Locates an entry matching TARGET. Returns a pointer to the - entry, or a null pointer on failure. */ -static inline void ** -locate_matching_entry (struct hsh_table *h, const void *target) -{ - void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; - - while (*entry) - { - if (!h->compare (*entry, target, h->aux)) - return entry; - if (--entry < h->entries) - entry = &h->entries[h->size - 1]; - } - return NULL; -} - /* Returns the entry in hash table H that matches TARGET, or NULL if there is none. */ void * hsh_find (struct hsh_table *h, const void *target) { - void **entry = locate_matching_entry (h, target); - return entry != NULL ? *entry : NULL; + return h->entries[locate_matching_entry (h, target)]; } /* Deletes the entry in hash table H that matches TARGET. Returns nonzero if an entry was deleted. - Note: this function is very slow because it rehashes the - entire table. Don't use this hash table implementation if - deletion is a common operation. */ + Uses Knuth's Algorithm 6.4R (Deletion with linear probing). + Because our load factor is at most 1/2, the average number of + moves that this algorithm makes should be at most 2 - ln 2 ~= + 1.65. */ int hsh_delete (struct hsh_table *h, const void *target) { - void **entry = locate_matching_entry (h, target); - if (entry != NULL) + unsigned i = locate_matching_entry (h, target); + if (h->entries[i] != NULL) { + h->used--; if (h->free != NULL) - h->free (*entry, h->aux); - *entry = 0; - hsh_rehash (h, h->size); - return 1; + h->free (h->entries[i], h->aux); + + for (;;) + { + unsigned r; + ptrdiff_t j; + + h->entries[i] = NULL; + j = i; + do + { + i = (i - 1) & (h->size - 1); + if (h->entries[i] == NULL) + return 1; + + r = h->hash (h->entries[i], h->aux) & (h->size - 1); + } + while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + h->entries[j] = h->entries[i]; + } } else return 0;