1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
35 #define _(msgid) gettext (msgid)
37 /* Note for constructing hash functions:
39 You can store the hash values in the records, then compare hash
40 values (in the compare function) before bothering to compare keys.
41 Hash values can simply be returned from the records instead of
42 recalculating when rehashing. */
46 Since hash_probe and hash_find take void * pointers, it's easy to
47 pass a void ** to your data by accidentally inserting an `&'
48 reference operator where one shouldn't go. It took me an hour to
49 hunt down a bug like that once. */
51 /* Prime numbers and hash functions. */
53 /* Returns smallest power of 2 greater than X. */
55 next_power_of_2 (size_t x)
61 /* Turn off rightmost 1-bit in x. */
62 size_t y = x & (x - 1);
64 /* If y is 0 then x only had a single 1-bit. */
68 /* Otherwise turn off the next. */
73 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
74 #define FNV_32_PRIME 16777619u
75 #define FNV_32_BASIS 2166136261u
77 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
79 hsh_hash_bytes (const void *buf_, size_t size)
81 const unsigned char *buf = (const unsigned char *) buf_;
88 hash = (hash * FNV_32_PRIME) ^ *buf++;
93 /* Fowler-Noll-Vo 32-bit hash, for strings. */
95 hsh_hash_string (const char *s_)
97 const unsigned char *s = (const unsigned char *) s_;
104 hash = (hash * FNV_32_PRIME) ^ *s++;
109 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
111 hsh_hash_case_string (const char *s_)
113 const unsigned char *s = (const unsigned char *) s_;
120 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
129 return hsh_hash_bytes (&i, sizeof i);
132 /* Hash for double. */
134 hsh_hash_double (double d)
137 return hsh_hash_bytes (&d, sizeof d);
147 size_t used; /* Number of filled entries. */
148 size_t size; /* Number of entries (a power of 2). */
149 void **entries; /* Hash table proper. */
151 const void *aux; /* Auxiliary data for comparison functions. */
152 hsh_compare_func *compare;
157 /* Set to false if hsh_data() or hsh_sort() has been called,
158 so that most hsh_*() functions may no longer be called. */
162 struct pool *pool; /* The pool used for this hash table */
166 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
167 hsh_free_func *free, const void *aux)
169 return hsh_create_pool (NULL, size, compare, hash, free, aux);
174 /* Creates a hash table with at least M entries. COMPARE is a
175 function that compares two entries and returns 0 if they are
176 identical, nonzero otherwise; HASH returns a nonnegative hash value
177 for an entry; FREE destroys an entry. */
179 hsh_create_pool (struct pool *pool, int size,
180 hsh_compare_func *compare, hsh_hash_func *hash,
181 hsh_free_func *free, const void *aux)
186 assert (compare != NULL);
187 assert (hash != NULL);
189 h = pool_malloc (pool, sizeof *h);
194 h->size = next_power_of_2 (size);
195 h->entries = pool_nmalloc (pool, h->size, sizeof *h->entries);
196 for (i = 0; i < h->size; i++)
197 h->entries[i] = NULL;
199 h->compare = compare;
203 h->hash_ordered = true;
208 /* Destroys the contents of table H. */
210 hsh_clear (struct hsh_table *h)
216 for (i = 0; i < h->size; i++)
217 if (h->entries[i] != NULL)
218 h->free (h->entries[i], h->aux);
220 for (i = 0; i < h->size; i++)
221 h->entries[i] = NULL;
226 h->hash_ordered = true;
230 /* Destroys table H and all its contents. */
232 hsh_destroy (struct hsh_table *h)
239 for (i = 0; i < h->size; i++)
240 if (h->entries[i] != NULL)
241 h->free (h->entries[i], h->aux);
242 pool_free (h->pool, h->entries);
243 pool_free (h->pool, h);
247 /* Locates an entry matching TARGET. Returns the index for the
248 entry, if found, or the index of an empty entry that indicates
249 where TARGET should go, otherwise. */
250 static inline unsigned
251 locate_matching_entry (struct hsh_table *h, const void *target)
253 unsigned i = h->hash (target, h->aux);
255 assert (h->hash_ordered);
260 entry = h->entries[i];
261 if (entry == NULL || !h->compare (entry, target, h->aux))
267 /* Returns the index of an empty entry that indicates
268 where TARGET should go, assuming that TARGET is not equal to
269 any item already in the hash table. */
270 static inline unsigned
271 locate_empty_entry (struct hsh_table *h, const void *target)
273 unsigned i = h->hash (target, h->aux);
275 assert (h->hash_ordered);
279 if (h->entries[i] == NULL)
285 /* Changes the capacity of H to NEW_SIZE, which must be a
286 positive power of 2 at least as large as the number of
289 rehash (struct hsh_table *h, size_t new_size)
291 void **begin, **end, **table_p;
295 assert (new_size >= h->used);
297 /* Verify that NEW_SIZE is a positive power of 2. */
298 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
301 end = begin + h->size;
304 h->entries = pool_nmalloc (h->pool, h->size, sizeof *h->entries);
305 for (i = 0; i < h->size; i++)
306 h->entries[i] = NULL;
307 for (table_p = begin; table_p < end; table_p++)
309 void *entry = *table_p;
311 h->entries[locate_empty_entry (h, entry)] = entry;
313 pool_free (h->pool, begin);
316 h->hash_ordered = true;
320 /* A "algo_predicate_func" that returns true if DATA points
321 to a non-null void. */
323 not_null (const void *data_, const void *aux UNUSED)
325 void *const *data = data_;
327 return *data != NULL;
330 /* Compacts hash table H and returns a pointer to its data. The
331 returned data consists of hsh_count(H) non-null pointers, in
332 no particular order, followed by a null pointer.
334 After calling this function, only hsh_destroy() and
335 hsh_count() should be applied to H. hsh_first() and
336 hsh_next() could also be used, but you're better off just
337 iterating through the returned array.
339 This function is intended for use in situations where data
340 processing occurs in two phases. In the first phase, data is
341 added, removed, and searched for within a hash table. In the
342 second phase, the contents of the hash table are output and
343 the hash property itself is no longer of interest.
345 Use hsh_sort() instead, if the second phase wants data in
346 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
347 if the second phase still needs to search the hash table. */
349 hsh_data (struct hsh_table *h)
354 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
355 assert (n == h->used);
357 h->hash_ordered = false;
362 /* Dereferences void ** pointers and passes them to the hash
363 comparison function. */
365 comparison_helper (const void *a_, const void *b_, const void *h_)
369 const struct hsh_table *h = h_;
374 return h->compare (*a, *b, h->aux);
377 /* Sorts hash table H based on hash comparison function. The
378 returned data consists of hsh_count(H) non-null pointers,
379 sorted in order of the hash comparison function, followed by a
382 After calling this function, only hsh_destroy() and
383 hsh_count() should be applied to H. hsh_first() and
384 hsh_next() could also be used, but you're better off just
385 iterating through the returned array.
387 This function is intended for use in situations where data
388 processing occurs in two phases. In the first phase, data is
389 added, removed, and searched for within a hash table. In the
390 second phase, the contents of the hash table are output and
391 the hash property itself is no longer of interest.
393 Use hsh_data() instead, if the second phase doesn't need the
394 data in any particular order. Use hsh_data_copy() or
395 hsh_sort_copy() instead, if the second phase still needs to
396 search the hash table. */
398 hsh_sort (struct hsh_table *h)
403 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
407 /* Makes and returns a copy of the pointers to the data in H.
408 The returned data consists of hsh_count(H) non-null pointers,
409 in no particular order, followed by a null pointer. The hash
410 table is not modified. The caller is responsible for freeing
413 If you don't need to search or modify the hash table, then
414 hsh_data() is a more efficient choice. */
416 hsh_data_copy (struct hsh_table *h)
421 copy = pool_nmalloc (h->pool, (h->used + 1), sizeof *copy);
422 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
423 copy[h->used] = NULL;
427 /* Makes and returns a copy of the pointers to the data in H.
428 The returned data consists of hsh_count(H) non-null pointers,
429 sorted in order of the hash comparison function, followed by a
430 null pointer. The hash table is not modified. The caller is
431 responsible for freeing the allocated data.
433 If you don't need to search or modify the hash table, then
434 hsh_sort() is a more efficient choice. */
436 hsh_sort_copy (struct hsh_table *h)
441 copy = hsh_data_copy (h);
442 sort (copy, h->used, sizeof *copy, comparison_helper, h);
448 /* Searches hash table H for TARGET. If found, returns a pointer
449 to a pointer to that entry; otherwise returns a pointer to a
450 NULL entry which *must* be used to insert a new entry having
451 the same key data. */
453 hsh_probe (struct hsh_table *h, const void *target)
458 assert (target != NULL);
459 assert (h->hash_ordered);
461 if (h->used > h->size / 2)
462 rehash (h, h->size * 2);
463 i = locate_matching_entry (h, target);
464 if (h->entries[i] == NULL)
466 return &h->entries[i];
469 /* Searches hash table H for TARGET. If not found, inserts
470 TARGET and returns a null pointer. If found, returns the
471 match, without replacing it in the table. */
473 hsh_insert (struct hsh_table *h, void *target)
478 assert (target != NULL);
480 entry = hsh_probe (h, target);
490 /* Searches hash table H for TARGET. If not found, inserts
491 TARGET and returns a null pointer. If found, returns the
492 match, after replacing it in the table by TARGET. */
494 hsh_replace (struct hsh_table *h, void *target)
496 void **entry = hsh_probe (h, target);
502 /* Returns the entry in hash table H that matches TARGET, or NULL
505 hsh_find (struct hsh_table *h, const void *target)
507 return h->entries[locate_matching_entry (h, target)];
510 /* Deletes the entry in hash table H that matches TARGET.
511 Returns true if an entry was deleted.
513 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
514 Because our load factor is at most 1/2, the average number of
515 moves that this algorithm makes should be at most 2 - ln 2 ~=
518 hsh_delete (struct hsh_table *h, const void *target)
520 unsigned i = locate_matching_entry (h, target);
521 if (h->entries[i] != NULL)
525 h->free (h->entries[i], h->aux);
532 h->entries[i] = NULL;
536 i = (i - 1) & (h->size - 1);
537 if (h->entries[i] == NULL)
540 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
542 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
543 h->entries[j] = h->entries[i];
552 /* Finds and returns an entry in TABLE, and initializes ITER for
553 use with hsh_next(). If TABLE is empty, returns a null
556 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
559 assert (iter != NULL);
562 return hsh_next (h, iter);
565 /* Iterates through TABLE with iterator ITER. Returns the next
566 entry in TABLE, or a null pointer after the last entry.
568 Entries are returned in an undefined order. Modifying TABLE
569 during iteration may cause some entries to be returned
570 multiple times or not at all. */
572 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
577 assert (iter != NULL);
578 assert (iter->next <= h->size);
580 for (i = iter->next; i < h->size; i++)
584 return h->entries[i];
587 iter->next = h->size;
591 /* Returns the number of items in H. */
593 hsh_count (struct hsh_table *h)
607 /* Displays contents of hash table H on stdout. */
609 hsh_dump (struct hsh_table *h)
611 void **entry = h->entries;
614 printf (_("hash table:"));
615 for (i = 0; i < h->size; i++)
616 printf (" %p", *entry++);
620 /* This wrapper around hsh_probe() assures that it returns a pointer
621 to a NULL pointer. This function is used when it is known that the
622 entry to be inserted does not already exist in the table. */
624 hsh_force_insert (struct hsh_table *h, void *p)
626 void **pp = hsh_probe (h, p);
627 assert (*pp == NULL);
631 /* This wrapper around hsh_find() assures that it returns non-NULL.
632 This function is for use when it is known that the entry being
633 searched for must exist in the table. */
635 hsh_force_find (struct hsh_table *h, const void *target)
637 void *found = hsh_find (h, target);
638 assert (found != NULL);
642 /* This wrapper for hsh_delete() verifies that an item really was
645 hsh_force_delete (struct hsh_table *h, const void *target)
647 int found = hsh_delete (h, target);