1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
37 #define _(msgid) gettext (msgid)
39 /* Note for constructing hash functions:
41 You can store the hash values in the records, then compare hash
42 values (in the compare function) before bothering to compare keys.
43 Hash values can simply be returned from the records instead of
44 recalculating when rehashing. */
48 Since hash_probe and hash_find take void * pointers, it's easy to
49 pass a void ** to your data by accidentally inserting an `&'
50 reference operator where one shouldn't go. It took me an hour to
51 hunt down a bug like that once. */
53 /* Prime numbers and hash functions. */
55 /* Returns smallest power of 2 greater than X. */
57 next_power_of_2 (size_t x)
63 /* Turn off rightmost 1-bit in x. */
64 size_t y = x & (x - 1);
66 /* If y is 0 then x only had a single 1-bit. */
70 /* Otherwise turn off the next. */
75 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
76 #define FNV_32_PRIME 16777619u
77 #define FNV_32_BASIS 2166136261u
79 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
81 hsh_hash_bytes (const void *buf_, size_t size)
83 const unsigned char *buf = (const unsigned char *) buf_;
90 hash = (hash * FNV_32_PRIME) ^ *buf++;
95 /* Fowler-Noll-Vo 32-bit hash, for strings. */
97 hsh_hash_string (const char *s_)
99 const unsigned char *s = (const unsigned char *) s_;
106 hash = (hash * FNV_32_PRIME) ^ *s++;
111 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
113 hsh_hash_case_string (const char *s_)
115 const unsigned char *s = (const unsigned char *) s_;
122 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
131 return hsh_hash_bytes (&i, sizeof i);
134 /* Hash for double. */
136 hsh_hash_double (double d)
139 return hsh_hash_bytes (&d, sizeof d);
149 size_t used; /* Number of filled entries. */
150 size_t size; /* Number of entries (a power of 2). */
151 void **entries; /* Hash table proper. */
153 const void *aux; /* Auxiliary data for comparison functions. */
154 hsh_compare_func *compare;
159 /* Set to false if hsh_data() or hsh_sort() has been called,
160 so that most hsh_*() functions may no longer be called. */
164 struct pool *pool; /* The pool used for this hash table */
168 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
169 hsh_free_func *free, const void *aux)
171 return hsh_create_pool (NULL, size, compare, hash, free, aux);
176 /* Creates a hash table with at least M entries. COMPARE is a
177 function that compares two entries and returns 0 if they are
178 identical, nonzero otherwise; HASH returns a nonnegative hash value
179 for an entry; FREE destroys an entry. */
181 hsh_create_pool (struct pool *pool, int size,
182 hsh_compare_func *compare, hsh_hash_func *hash,
183 hsh_free_func *free, const void *aux)
188 assert (compare != NULL);
189 assert (hash != NULL);
191 h = pool_malloc (pool, sizeof *h);
196 h->size = next_power_of_2 (size);
197 h->entries = pool_nmalloc (pool, h->size, sizeof *h->entries);
198 for (i = 0; i < h->size; i++)
199 h->entries[i] = NULL;
201 h->compare = compare;
205 h->hash_ordered = true;
210 /* Destroys the contents of table H. */
212 hsh_clear (struct hsh_table *h)
218 for (i = 0; i < h->size; i++)
219 if (h->entries[i] != NULL)
220 h->free (h->entries[i], h->aux);
222 for (i = 0; i < h->size; i++)
223 h->entries[i] = NULL;
228 h->hash_ordered = true;
232 /* Destroys table H and all its contents. */
234 hsh_destroy (struct hsh_table *h)
241 for (i = 0; i < h->size; i++)
242 if (h->entries[i] != NULL)
243 h->free (h->entries[i], h->aux);
244 pool_free (h->pool, h->entries);
245 pool_free (h->pool, h);
249 /* Locates an entry matching TARGET. Returns the index for the
250 entry, if found, or the index of an empty entry that indicates
251 where TARGET should go, otherwise. */
252 static inline unsigned
253 locate_matching_entry (struct hsh_table *h, const void *target)
255 unsigned i = h->hash (target, h->aux);
257 assert (h->hash_ordered);
262 entry = h->entries[i];
263 if (entry == NULL || !h->compare (entry, target, h->aux))
269 /* Returns the index of an empty entry that indicates
270 where TARGET should go, assuming that TARGET is not equal to
271 any item already in the hash table. */
272 static inline unsigned
273 locate_empty_entry (struct hsh_table *h, const void *target)
275 unsigned i = h->hash (target, h->aux);
277 assert (h->hash_ordered);
281 if (h->entries[i] == NULL)
287 /* Changes the capacity of H to NEW_SIZE, which must be a
288 positive power of 2 at least as large as the number of
291 rehash (struct hsh_table *h, size_t new_size)
293 void **begin, **end, **table_p;
297 assert (new_size >= h->used);
299 /* Verify that NEW_SIZE is a positive power of 2. */
300 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
303 end = begin + h->size;
306 h->entries = pool_nmalloc (h->pool, h->size, sizeof *h->entries);
307 for (i = 0; i < h->size; i++)
308 h->entries[i] = NULL;
309 for (table_p = begin; table_p < end; table_p++)
311 void *entry = *table_p;
313 h->entries[locate_empty_entry (h, entry)] = entry;
315 pool_free (h->pool, begin);
318 h->hash_ordered = true;
322 /* A "algo_predicate_func" that returns true if DATA points
323 to a non-null void. */
325 not_null (const void *data_, const void *aux UNUSED)
327 void *const *data = data_;
329 return *data != NULL;
332 /* Compacts hash table H and returns a pointer to its data. The
333 returned data consists of hsh_count(H) non-null pointers, in
334 no particular order, followed by a null pointer.
336 After calling this function, only hsh_destroy() and
337 hsh_count() should be applied to H. hsh_first() and
338 hsh_next() could also be used, but you're better off just
339 iterating through the returned array.
341 This function is intended for use in situations where data
342 processing occurs in two phases. In the first phase, data is
343 added, removed, and searched for within a hash table. In the
344 second phase, the contents of the hash table are output and
345 the hash property itself is no longer of interest.
347 Use hsh_sort() instead, if the second phase wants data in
348 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
349 if the second phase still needs to search the hash table. */
351 hsh_data (struct hsh_table *h)
356 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
357 assert (n == h->used);
359 h->hash_ordered = false;
364 /* Dereferences void ** pointers and passes them to the hash
365 comparison function. */
367 comparison_helper (const void *a_, const void *b_, const void *h_)
371 const struct hsh_table *h = h_;
376 return h->compare (*a, *b, h->aux);
379 /* Sorts hash table H based on hash comparison function. The
380 returned data consists of hsh_count(H) non-null pointers,
381 sorted in order of the hash comparison function, followed by a
384 After calling this function, only hsh_destroy() and
385 hsh_count() should be applied to H. hsh_first() and
386 hsh_next() could also be used, but you're better off just
387 iterating through the returned array.
389 This function is intended for use in situations where data
390 processing occurs in two phases. In the first phase, data is
391 added, removed, and searched for within a hash table. In the
392 second phase, the contents of the hash table are output and
393 the hash property itself is no longer of interest.
395 Use hsh_data() instead, if the second phase doesn't need the
396 data in any particular order. Use hsh_data_copy() or
397 hsh_sort_copy() instead, if the second phase still needs to
398 search the hash table. */
400 hsh_sort (struct hsh_table *h)
405 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
409 /* Makes and returns a copy of the pointers to the data in H.
410 The returned data consists of hsh_count(H) non-null pointers,
411 in no particular order, followed by a null pointer. The hash
412 table is not modified. The caller is responsible for freeing
415 If you don't need to search or modify the hash table, then
416 hsh_data() is a more efficient choice. */
418 hsh_data_copy (struct hsh_table *h)
423 copy = pool_nmalloc (h->pool, (h->used + 1), sizeof *copy);
424 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
425 copy[h->used] = NULL;
429 /* Makes and returns a copy of the pointers to the data in H.
430 The returned data consists of hsh_count(H) non-null pointers,
431 sorted in order of the hash comparison function, followed by a
432 null pointer. The hash table is not modified. The caller is
433 responsible for freeing the allocated data.
435 If you don't need to search or modify the hash table, then
436 hsh_sort() is a more efficient choice. */
438 hsh_sort_copy (struct hsh_table *h)
443 copy = hsh_data_copy (h);
444 sort (copy, h->used, sizeof *copy, comparison_helper, h);
450 /* Searches hash table H for TARGET. If found, returns a pointer
451 to a pointer to that entry; otherwise returns a pointer to a
452 NULL entry which *must* be used to insert a new entry having
453 the same key data. */
455 hsh_probe (struct hsh_table *h, const void *target)
460 assert (target != NULL);
461 assert (h->hash_ordered);
463 if (h->used > h->size / 2)
464 rehash (h, h->size * 2);
465 i = locate_matching_entry (h, target);
466 if (h->entries[i] == NULL)
468 return &h->entries[i];
471 /* Searches hash table H for TARGET. If not found, inserts
472 TARGET and returns a null pointer. If found, returns the
473 match, without replacing it in the table. */
475 hsh_insert (struct hsh_table *h, void *target)
480 assert (target != NULL);
482 entry = hsh_probe (h, target);
492 /* Searches hash table H for TARGET. If not found, inserts
493 TARGET and returns a null pointer. If found, returns the
494 match, after replacing it in the table by TARGET. */
496 hsh_replace (struct hsh_table *h, void *target)
498 void **entry = hsh_probe (h, target);
504 /* Returns the entry in hash table H that matches TARGET, or NULL
507 hsh_find (struct hsh_table *h, const void *target)
509 return h->entries[locate_matching_entry (h, target)];
512 /* Deletes the entry in hash table H that matches TARGET.
513 Returns true if an entry was deleted.
515 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
516 Because our load factor is at most 1/2, the average number of
517 moves that this algorithm makes should be at most 2 - ln 2 ~=
520 hsh_delete (struct hsh_table *h, const void *target)
522 unsigned i = locate_matching_entry (h, target);
523 if (h->entries[i] != NULL)
527 h->free (h->entries[i], h->aux);
534 h->entries[i] = NULL;
538 i = (i - 1) & (h->size - 1);
539 if (h->entries[i] == NULL)
542 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
544 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
545 h->entries[j] = h->entries[i];
554 /* Finds and returns an entry in TABLE, and initializes ITER for
555 use with hsh_next(). If TABLE is empty, returns a null
558 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
561 assert (iter != NULL);
564 return hsh_next (h, iter);
567 /* Iterates through TABLE with iterator ITER. Returns the next
568 entry in TABLE, or a null pointer after the last entry.
570 Entries are returned in an undefined order. Modifying TABLE
571 during iteration may cause some entries to be returned
572 multiple times or not at all. */
574 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
579 assert (iter != NULL);
580 assert (iter->next <= h->size);
582 for (i = iter->next; i < h->size; i++)
586 return h->entries[i];
589 iter->next = h->size;
593 /* Returns the number of items in H. */
595 hsh_count (struct hsh_table *h)
609 /* Displays contents of hash table H on stdout. */
611 hsh_dump (struct hsh_table *h)
613 void **entry = h->entries;
616 printf (_("hash table:"));
617 for (i = 0; i < h->size; i++)
618 printf (" %p", *entry++);
622 /* This wrapper around hsh_probe() assures that it returns a pointer
623 to a NULL pointer. This function is used when it is known that the
624 entry to be inserted does not already exist in the table. */
626 hsh_force_insert (struct hsh_table *h, void *p)
628 void **pp = hsh_probe (h, p);
629 assert (*pp == NULL);
633 /* This wrapper around hsh_find() assures that it returns non-NULL.
634 This function is for use when it is known that the entry being
635 searched for must exist in the table. */
637 hsh_force_find (struct hsh_table *h, const void *target)
639 void *found = hsh_find (h, target);
640 assert (found != NULL);
644 /* This wrapper for hsh_delete() verifies that an item really was
647 hsh_force_delete (struct hsh_table *h, const void *target)
649 int found = hsh_delete (h, target);