1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include <gsl/gsl_math.h>
36 #define _(msgid) gettext (msgid)
38 /* Note for constructing hash functions:
40 You can store the hash values in the records, then compare hash
41 values (in the compare function) before bothering to compare keys.
42 Hash values can simply be returned from the records instead of
43 recalculating when rehashing. */
47 Since hash_probe and hash_find take void * pointers, it's easy to
48 pass a void ** to your data by accidentally inserting an `&'
49 reference operator where one shouldn't go. It took me an hour to
50 hunt down a bug like that once. */
52 /* Prime numbers and hash functions. */
54 /* Returns smallest power of 2 greater than X. */
56 next_power_of_2 (size_t x)
62 /* Turn off rightmost 1-bit in x. */
63 size_t y = x & (x - 1);
65 /* If y is 0 then x only had a single 1-bit. */
69 /* Otherwise turn off the next. */
74 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
75 #define FNV_32_PRIME 16777619u
76 #define FNV_32_BASIS 2166136261u
78 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
80 hsh_hash_bytes (const void *buf_, size_t size)
82 const unsigned char *buf = (const unsigned char *) buf_;
89 hash = (hash * FNV_32_PRIME) ^ *buf++;
94 /* Fowler-Noll-Vo 32-bit hash, for strings. */
96 hsh_hash_string (const char *s_)
98 const unsigned char *s = (const unsigned char *) s_;
105 hash = (hash * FNV_32_PRIME) ^ *s++;
110 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
112 hsh_hash_case_string (const char *s_)
114 const unsigned char *s = (const unsigned char *) s_;
121 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
130 return hsh_hash_bytes (&i, sizeof i);
133 /* Hash for double. */
135 hsh_hash_double (double d)
138 return hsh_hash_bytes (&d, sizeof d);
148 size_t used; /* Number of filled entries. */
149 size_t size; /* Number of entries (a power of 2). */
150 void **entries; /* Hash table proper. */
152 const void *aux; /* Auxiliary data for comparison functions. */
153 hsh_compare_func *compare;
158 /* Set to false if hsh_data() or hsh_sort() has been called,
159 so that most hsh_*() functions may no longer be called. */
163 struct pool *pool; /* The pool used for this hash table */
167 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
168 hsh_free_func *free, const void *aux)
170 return hsh_create_pool (NULL, size, compare, hash, free, aux);
175 /* Creates a hash table with at least M entries. COMPARE is a
176 function that compares two entries and returns 0 if they are
177 identical, nonzero otherwise; HASH returns a nonnegative hash value
178 for an entry; FREE destroys an entry. */
180 hsh_create_pool (struct pool *pool, int size,
181 hsh_compare_func *compare, hsh_hash_func *hash,
182 hsh_free_func *free, const void *aux)
187 assert (compare != NULL);
188 assert (hash != NULL);
190 h = pool_malloc (pool, sizeof *h);
195 h->size = next_power_of_2 (size);
196 h->entries = pool_nmalloc (pool, h->size, sizeof *h->entries);
197 for (i = 0; i < h->size; i++)
198 h->entries[i] = NULL;
200 h->compare = compare;
204 h->hash_ordered = true;
209 /* Destroys the contents of table H. */
211 hsh_clear (struct hsh_table *h)
217 for (i = 0; i < h->size; i++)
218 if (h->entries[i] != NULL)
219 h->free (h->entries[i], h->aux);
221 for (i = 0; i < h->size; i++)
222 h->entries[i] = NULL;
227 h->hash_ordered = true;
231 /* Destroys table H and all its contents. */
233 hsh_destroy (struct hsh_table *h)
240 for (i = 0; i < h->size; i++)
241 if (h->entries[i] != NULL)
242 h->free (h->entries[i], h->aux);
243 pool_free (h->pool, h->entries);
244 pool_free (h->pool, h);
248 /* Locates an entry matching TARGET. Returns the index for the
249 entry, if found, or the index of an empty entry that indicates
250 where TARGET should go, otherwise. */
251 static inline unsigned
252 locate_matching_entry (struct hsh_table *h, const void *target)
254 unsigned i = h->hash (target, h->aux);
256 assert (h->hash_ordered);
261 entry = h->entries[i];
262 if (entry == NULL || !h->compare (entry, target, h->aux))
268 /* Returns the index of an empty entry that indicates
269 where TARGET should go, assuming that TARGET is not equal to
270 any item already in the hash table. */
271 static inline unsigned
272 locate_empty_entry (struct hsh_table *h, const void *target)
274 unsigned i = h->hash (target, h->aux);
276 assert (h->hash_ordered);
280 if (h->entries[i] == NULL)
286 /* Changes the capacity of H to NEW_SIZE, which must be a
287 positive power of 2 at least as large as the number of
290 rehash (struct hsh_table *h, size_t new_size)
292 void **begin, **end, **table_p;
296 assert (new_size >= h->used);
298 /* Verify that NEW_SIZE is a positive power of 2. */
299 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
302 end = begin + h->size;
305 h->entries = pool_nmalloc (h->pool, h->size, sizeof *h->entries);
306 for (i = 0; i < h->size; i++)
307 h->entries[i] = NULL;
308 for (table_p = begin; table_p < end; table_p++)
310 void *entry = *table_p;
312 h->entries[locate_empty_entry (h, entry)] = entry;
314 pool_free (h->pool, begin);
317 h->hash_ordered = true;
321 /* A "algo_predicate_func" that returns true if DATA points
322 to a non-null void. */
324 not_null (const void *data_, const void *aux UNUSED)
326 void *const *data = data_;
328 return *data != NULL;
331 /* Compacts hash table H and returns a pointer to its data. The
332 returned data consists of hsh_count(H) non-null pointers, in
333 no particular order, followed by a null pointer.
335 After calling this function, only hsh_destroy() and
336 hsh_count() should be applied to H. hsh_first() and
337 hsh_next() could also be used, but you're better off just
338 iterating through the returned array.
340 This function is intended for use in situations where data
341 processing occurs in two phases. In the first phase, data is
342 added, removed, and searched for within a hash table. In the
343 second phase, the contents of the hash table are output and
344 the hash property itself is no longer of interest.
346 Use hsh_sort() instead, if the second phase wants data in
347 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
348 if the second phase still needs to search the hash table. */
350 hsh_data (struct hsh_table *h)
355 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
356 assert (n == h->used);
358 h->hash_ordered = false;
363 /* Dereferences void ** pointers and passes them to the hash
364 comparison function. */
366 comparison_helper (const void *a_, const void *b_, const void *h_)
370 const struct hsh_table *h = h_;
375 return h->compare (*a, *b, h->aux);
378 /* Sorts hash table H based on hash comparison function. The
379 returned data consists of hsh_count(H) non-null pointers,
380 sorted in order of the hash comparison function, followed by a
383 After calling this function, only hsh_destroy() and
384 hsh_count() should be applied to H. hsh_first() and
385 hsh_next() could also be used, but you're better off just
386 iterating through the returned array.
388 This function is intended for use in situations where data
389 processing occurs in two phases. In the first phase, data is
390 added, removed, and searched for within a hash table. In the
391 second phase, the contents of the hash table are output and
392 the hash property itself is no longer of interest.
394 Use hsh_data() instead, if the second phase doesn't need the
395 data in any particular order. Use hsh_data_copy() or
396 hsh_sort_copy() instead, if the second phase still needs to
397 search the hash table. */
399 hsh_sort (struct hsh_table *h)
404 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
408 /* Makes and returns a copy of the pointers to the data in H.
409 The returned data consists of hsh_count(H) non-null pointers,
410 in no particular order, followed by a null pointer. The hash
411 table is not modified. The caller is responsible for freeing
414 If you don't need to search or modify the hash table, then
415 hsh_data() is a more efficient choice. */
417 hsh_data_copy (struct hsh_table *h)
422 copy = pool_nmalloc (h->pool, (h->used + 1), sizeof *copy);
423 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
424 copy[h->used] = NULL;
428 /* Makes and returns a copy of the pointers to the data in H.
429 The returned data consists of hsh_count(H) non-null pointers,
430 sorted in order of the hash comparison function, followed by a
431 null pointer. The hash table is not modified. The caller is
432 responsible for freeing the allocated data.
434 If you don't need to search or modify the hash table, then
435 hsh_sort() is a more efficient choice. */
437 hsh_sort_copy (struct hsh_table *h)
442 copy = hsh_data_copy (h);
443 sort (copy, h->used, sizeof *copy, comparison_helper, h);
449 /* Searches hash table H for TARGET. If found, returns a pointer
450 to a pointer to that entry; otherwise returns a pointer to a
451 NULL entry which *must* be used to insert a new entry having
452 the same key data. */
454 hsh_probe (struct hsh_table *h, const void *target)
459 assert (target != NULL);
460 assert (h->hash_ordered);
462 if (h->used > h->size / 2)
463 rehash (h, h->size * 2);
464 i = locate_matching_entry (h, target);
465 if (h->entries[i] == NULL)
467 return &h->entries[i];
470 /* Searches hash table H for TARGET. If not found, inserts
471 TARGET and returns a null pointer. If found, returns the
472 match, without replacing it in the table. */
474 hsh_insert (struct hsh_table *h, void *target)
479 assert (target != NULL);
481 entry = hsh_probe (h, target);
491 /* Searches hash table H for TARGET. If not found, inserts
492 TARGET and returns a null pointer. If found, returns the
493 match, after replacing it in the table by TARGET. */
495 hsh_replace (struct hsh_table *h, void *target)
497 void **entry = hsh_probe (h, target);
503 /* Returns the entry in hash table H that matches TARGET, or NULL
506 hsh_find (struct hsh_table *h, const void *target)
508 return h->entries[locate_matching_entry (h, target)];
511 /* Deletes the entry in hash table H that matches TARGET.
512 Returns true if an entry was deleted.
514 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
515 Because our load factor is at most 1/2, the average number of
516 moves that this algorithm makes should be at most 2 - ln 2 ~=
519 hsh_delete (struct hsh_table *h, const void *target)
521 unsigned i = locate_matching_entry (h, target);
522 if (h->entries[i] != NULL)
526 h->free (h->entries[i], h->aux);
533 h->entries[i] = NULL;
537 i = (i - 1) & (h->size - 1);
538 if (h->entries[i] == NULL)
541 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
543 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
544 h->entries[j] = h->entries[i];
553 /* Finds and returns an entry in TABLE, and initializes ITER for
554 use with hsh_next(). If TABLE is empty, returns a null
557 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
560 assert (iter != NULL);
563 return hsh_next (h, iter);
566 /* Iterates through TABLE with iterator ITER. Returns the next
567 entry in TABLE, or a null pointer after the last entry.
569 Entries are returned in an undefined order. Modifying TABLE
570 during iteration may cause some entries to be returned
571 multiple times or not at all. */
573 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
578 assert (iter != NULL);
579 assert (iter->next <= h->size);
581 for (i = iter->next; i < h->size; i++)
585 return h->entries[i];
588 iter->next = h->size;
592 /* Returns the number of items in H. */
594 hsh_count (struct hsh_table *h)
608 /* Displays contents of hash table H on stdout. */
610 hsh_dump (struct hsh_table *h)
612 void **entry = h->entries;
615 printf (_("hash table:"));
616 for (i = 0; i < h->size; i++)
617 printf (" %p", *entry++);
621 /* This wrapper around hsh_probe() assures that it returns a pointer
622 to a NULL pointer. This function is used when it is known that the
623 entry to be inserted does not already exist in the table. */
625 hsh_force_insert (struct hsh_table *h, void *p)
627 void **pp = hsh_probe (h, p);
628 assert (*pp == NULL);
632 /* This wrapper around hsh_find() assures that it returns non-NULL.
633 This function is for use when it is known that the entry being
634 searched for must exist in the table. */
636 hsh_force_find (struct hsh_table *h, const void *target)
638 void *found = hsh_find (h, target);
639 assert (found != NULL);
643 /* This wrapper for hsh_delete() verifies that an item really was
646 hsh_force_delete (struct hsh_table *h, const void *target)
648 int found = hsh_delete (h, target);