1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
38 #define _(msgid) gettext (msgid)
40 /* Note for constructing hash functions:
42 You can store the hash values in the records, then compare hash
43 values (in the compare function) before bothering to compare keys.
44 Hash values can simply be returned from the records instead of
45 recalculating when rehashing. */
49 Since hash_probe and hash_find take void * pointers, it's easy to
50 pass a void ** to your data by accidentally inserting an `&'
51 reference operator where one shouldn't go. It took me an hour to
52 hunt down a bug like that once. */
54 /* Prime numbers and hash functions. */
56 /* Returns smallest power of 2 greater than X. */
58 next_power_of_2 (size_t x)
64 /* Turn off rightmost 1-bit in x. */
65 size_t y = x & (x - 1);
67 /* If y is 0 then x only had a single 1-bit. */
71 /* Otherwise turn off the next. */
76 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
77 #define FNV_32_PRIME 16777619u
78 #define FNV_32_BASIS 2166136261u
80 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
82 hsh_hash_bytes (const void *buf_, size_t size)
84 const unsigned char *buf = (const unsigned char *) buf_;
91 hash = (hash * FNV_32_PRIME) ^ *buf++;
96 /* Fowler-Noll-Vo 32-bit hash, for strings. */
98 hsh_hash_string (const char *s_)
100 const unsigned char *s = (const unsigned char *) s_;
107 hash = (hash * FNV_32_PRIME) ^ *s++;
112 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
114 hsh_hash_case_string (const char *s_)
116 const unsigned char *s = (const unsigned char *) s_;
123 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
132 return hsh_hash_bytes (&i, sizeof i);
135 /* Hash for double. */
137 hsh_hash_double (double d)
140 return hsh_hash_bytes (&d, sizeof d);
150 size_t used; /* Number of filled entries. */
151 size_t size; /* Number of entries (a power of 2). */
152 void **entries; /* Hash table proper. */
154 const void *aux; /* Auxiliary data for comparison functions. */
155 hsh_compare_func *compare;
160 /* Set to false if hsh_data() or hsh_sort() has been called,
161 so that most hsh_*() functions may no longer be called. */
165 struct pool *pool; /* The pool used for this hash table */
169 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
170 hsh_free_func *free, const void *aux)
172 return hsh_create_pool (NULL, size, compare, hash, free, aux);
177 /* Creates a hash table with at least M entries. COMPARE is a
178 function that compares two entries and returns 0 if they are
179 identical, nonzero otherwise; HASH returns a nonnegative hash value
180 for an entry; FREE destroys an entry. */
182 hsh_create_pool (struct pool *pool, int size,
183 hsh_compare_func *compare, hsh_hash_func *hash,
184 hsh_free_func *free, const void *aux)
189 assert (compare != NULL);
190 assert (hash != NULL);
192 h = pool_malloc (pool, sizeof *h);
197 h->size = next_power_of_2 (size);
198 h->entries = pool_nmalloc (pool, h->size, sizeof *h->entries);
199 for (i = 0; i < h->size; i++)
200 h->entries[i] = NULL;
202 h->compare = compare;
206 h->hash_ordered = true;
211 /* Destroys the contents of table H. */
213 hsh_clear (struct hsh_table *h)
219 for (i = 0; i < h->size; i++)
220 if (h->entries[i] != NULL)
221 h->free (h->entries[i], h->aux);
223 for (i = 0; i < h->size; i++)
224 h->entries[i] = NULL;
229 h->hash_ordered = true;
233 /* Destroys table H and all its contents. */
235 hsh_destroy (struct hsh_table *h)
242 for (i = 0; i < h->size; i++)
243 if (h->entries[i] != NULL)
244 h->free (h->entries[i], h->aux);
245 pool_free (h->pool, h->entries);
246 pool_free (h->pool, h);
250 /* Locates an entry matching TARGET. Returns the index for the
251 entry, if found, or the index of an empty entry that indicates
252 where TARGET should go, otherwise. */
253 static inline unsigned
254 locate_matching_entry (struct hsh_table *h, const void *target)
256 unsigned i = h->hash (target, h->aux);
258 assert (h->hash_ordered);
263 entry = h->entries[i];
264 if (entry == NULL || !h->compare (entry, target, h->aux))
270 /* Returns the index of an empty entry that indicates
271 where TARGET should go, assuming that TARGET is not equal to
272 any item already in the hash table. */
273 static inline unsigned
274 locate_empty_entry (struct hsh_table *h, const void *target)
276 unsigned i = h->hash (target, h->aux);
278 assert (h->hash_ordered);
282 if (h->entries[i] == NULL)
288 /* Changes the capacity of H to NEW_SIZE, which must be a
289 positive power of 2 at least as large as the number of
292 rehash (struct hsh_table *h, size_t new_size)
294 void **begin, **end, **table_p;
298 assert (new_size >= h->used);
300 /* Verify that NEW_SIZE is a positive power of 2. */
301 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
304 end = begin + h->size;
307 h->entries = pool_nmalloc (h->pool, h->size, sizeof *h->entries);
308 for (i = 0; i < h->size; i++)
309 h->entries[i] = NULL;
310 for (table_p = begin; table_p < end; table_p++)
312 void *entry = *table_p;
314 h->entries[locate_empty_entry (h, entry)] = entry;
316 pool_free (h->pool, begin);
319 h->hash_ordered = true;
323 /* A "algo_predicate_func" that returns true if DATA points
324 to a non-null void. */
326 not_null (const void *data_, const void *aux UNUSED)
328 void *const *data = data_;
330 return *data != NULL;
333 /* Compacts hash table H and returns a pointer to its data. The
334 returned data consists of hsh_count(H) non-null pointers, in
335 no particular order, followed by a null pointer.
337 After calling this function, only hsh_destroy() and
338 hsh_count() should be applied to H. hsh_first() and
339 hsh_next() could also be used, but you're better off just
340 iterating through the returned array.
342 This function is intended for use in situations where data
343 processing occurs in two phases. In the first phase, data is
344 added, removed, and searched for within a hash table. In the
345 second phase, the contents of the hash table are output and
346 the hash property itself is no longer of interest.
348 Use hsh_sort() instead, if the second phase wants data in
349 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
350 if the second phase still needs to search the hash table. */
352 hsh_data (struct hsh_table *h)
357 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
358 assert (n == h->used);
360 h->hash_ordered = false;
365 /* Dereferences void ** pointers and passes them to the hash
366 comparison function. */
368 comparison_helper (const void *a_, const void *b_, const void *h_)
372 const struct hsh_table *h = h_;
377 return h->compare (*a, *b, h->aux);
380 /* Sorts hash table H based on hash comparison function. The
381 returned data consists of hsh_count(H) non-null pointers,
382 sorted in order of the hash comparison function, followed by a
385 After calling this function, only hsh_destroy() and
386 hsh_count() should be applied to H. hsh_first() and
387 hsh_next() could also be used, but you're better off just
388 iterating through the returned array.
390 This function is intended for use in situations where data
391 processing occurs in two phases. In the first phase, data is
392 added, removed, and searched for within a hash table. In the
393 second phase, the contents of the hash table are output and
394 the hash property itself is no longer of interest.
396 Use hsh_data() instead, if the second phase doesn't need the
397 data in any particular order. Use hsh_data_copy() or
398 hsh_sort_copy() instead, if the second phase still needs to
399 search the hash table. */
401 hsh_sort (struct hsh_table *h)
406 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
410 /* Makes and returns a copy of the pointers to the data in H.
411 The returned data consists of hsh_count(H) non-null pointers,
412 in no particular order, followed by a null pointer. The hash
413 table is not modified. The caller is responsible for freeing
416 If you don't need to search or modify the hash table, then
417 hsh_data() is a more efficient choice. */
419 hsh_data_copy (struct hsh_table *h)
424 copy = pool_nmalloc (h->pool, (h->used + 1), sizeof *copy);
425 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
426 copy[h->used] = NULL;
430 /* Makes and returns a copy of the pointers to the data in H.
431 The returned data consists of hsh_count(H) non-null pointers,
432 sorted in order of the hash comparison function, followed by a
433 null pointer. The hash table is not modified. The caller is
434 responsible for freeing the allocated data.
436 If you don't need to search or modify the hash table, then
437 hsh_sort() is a more efficient choice. */
439 hsh_sort_copy (struct hsh_table *h)
444 copy = hsh_data_copy (h);
445 sort (copy, h->used, sizeof *copy, comparison_helper, h);
451 /* Searches hash table H for TARGET. If found, returns a pointer
452 to a pointer to that entry; otherwise returns a pointer to a
453 NULL entry which *must* be used to insert a new entry having
454 the same key data. */
456 hsh_probe (struct hsh_table *h, const void *target)
461 assert (target != NULL);
462 assert (h->hash_ordered);
464 if (h->used > h->size / 2)
465 rehash (h, h->size * 2);
466 i = locate_matching_entry (h, target);
467 if (h->entries[i] == NULL)
469 return &h->entries[i];
472 /* Searches hash table H for TARGET. If not found, inserts
473 TARGET and returns a null pointer. If found, returns the
474 match, without replacing it in the table. */
476 hsh_insert (struct hsh_table *h, void *target)
481 assert (target != NULL);
483 entry = hsh_probe (h, target);
493 /* Searches hash table H for TARGET. If not found, inserts
494 TARGET and returns a null pointer. If found, returns the
495 match, after replacing it in the table by TARGET. */
497 hsh_replace (struct hsh_table *h, void *target)
499 void **entry = hsh_probe (h, target);
505 /* Returns the entry in hash table H that matches TARGET, or NULL
508 hsh_find (struct hsh_table *h, const void *target)
510 return h->entries[locate_matching_entry (h, target)];
513 /* Deletes the entry in hash table H that matches TARGET.
514 Returns true if an entry was deleted.
516 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
517 Because our load factor is at most 1/2, the average number of
518 moves that this algorithm makes should be at most 2 - ln 2 ~=
521 hsh_delete (struct hsh_table *h, const void *target)
523 unsigned i = locate_matching_entry (h, target);
524 if (h->entries[i] != NULL)
528 h->free (h->entries[i], h->aux);
535 h->entries[i] = NULL;
539 i = (i - 1) & (h->size - 1);
540 if (h->entries[i] == NULL)
543 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
545 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
546 h->entries[j] = h->entries[i];
555 /* Finds and returns an entry in TABLE, and initializes ITER for
556 use with hsh_next(). If TABLE is empty, returns a null
559 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
562 assert (iter != NULL);
565 return hsh_next (h, iter);
568 /* Iterates through TABLE with iterator ITER. Returns the next
569 entry in TABLE, or a null pointer after the last entry.
571 Entries are returned in an undefined order. Modifying TABLE
572 during iteration may cause some entries to be returned
573 multiple times or not at all. */
575 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
580 assert (iter != NULL);
581 assert (iter->next <= h->size);
583 for (i = iter->next; i < h->size; i++)
587 return h->entries[i];
590 iter->next = h->size;
594 /* Returns the number of items in H. */
596 hsh_count (struct hsh_table *h)
610 /* Displays contents of hash table H on stdout. */
612 hsh_dump (struct hsh_table *h)
614 void **entry = h->entries;
617 printf (_("hash table:"));
618 for (i = 0; i < h->size; i++)
619 printf (" %p", *entry++);
623 /* This wrapper around hsh_probe() assures that it returns a pointer
624 to a NULL pointer. This function is used when it is known that the
625 entry to be inserted does not already exist in the table. */
627 hsh_force_insert (struct hsh_table *h, void *p)
629 void **pp = hsh_probe (h, p);
630 assert (*pp == NULL);
634 /* This wrapper around hsh_find() assures that it returns non-NULL.
635 This function is for use when it is known that the entry being
636 searched for must exist in the table. */
638 hsh_force_find (struct hsh_table *h, const void *target)
640 void *found = hsh_find (h, target);
641 assert (found != NULL);
645 /* This wrapper for hsh_delete() verifies that an item really was
648 hsh_force_delete (struct hsh_table *h, const void *target)
650 int found = hsh_delete (h, target);