1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
36 #define _(msgid) gettext (msgid)
38 /* Note for constructing hash functions:
40 You can store the hash values in the records, then compare hash
41 values (in the compare function) before bothering to compare keys.
42 Hash values can simply be returned from the records instead of
43 recalculating when rehashing. */
47 Since hash_probe and hash_find take void * pointers, it's easy to
48 pass a void ** to your data by accidentally inserting an `&'
49 reference operator where one shouldn't go. It took me an hour to
50 hunt down a bug like that once. */
52 /* Prime numbers and hash functions. */
54 /* Returns smallest power of 2 greater than X. */
56 next_power_of_2 (size_t x)
62 /* Turn off rightmost 1-bit in x. */
63 size_t y = x & (x - 1);
65 /* If y is 0 then x only had a single 1-bit. */
69 /* Otherwise turn off the next. */
74 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
75 #define FNV_32_PRIME 16777619u
76 #define FNV_32_BASIS 2166136261u
78 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
80 hsh_hash_bytes (const void *buf_, size_t size)
82 const unsigned char *buf = (const unsigned char *) buf_;
89 hash = (hash * FNV_32_PRIME) ^ *buf++;
94 /* Fowler-Noll-Vo 32-bit hash, for strings. */
96 hsh_hash_string (const char *s_)
98 const unsigned char *s = (const unsigned char *) s_;
105 hash = (hash * FNV_32_PRIME) ^ *s++;
110 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
112 hsh_hash_case_string (const char *s_)
114 const unsigned char *s = (const unsigned char *) s_;
121 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
130 return hsh_hash_bytes (&i, sizeof i);
133 /* Hash for double. */
135 hsh_hash_double (double d)
138 return hsh_hash_bytes (&d, sizeof d);
148 size_t used; /* Number of filled entries. */
149 size_t size; /* Number of entries (a power of 2). */
150 void **entries; /* Hash table proper. */
152 void *aux; /* Auxiliary data for comparison functions. */
153 hsh_compare_func *compare;
158 /* Set to false if hsh_data() or hsh_sort() has been called,
159 so that most hsh_*() functions may no longer be called. */
164 /* Creates a hash table with at least M entries. COMPARE is a
165 function that compares two entries and returns 0 if they are
166 identical, nonzero otherwise; HASH returns a nonnegative hash value
167 for an entry; FREE destroys an entry. */
169 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
170 hsh_free_func *free, void *aux)
175 assert (compare != NULL);
176 assert (hash != NULL);
178 h = xmalloc (sizeof *h);
182 h->size = next_power_of_2 (size);
183 h->entries = xnmalloc (h->size, sizeof *h->entries);
184 for (i = 0; i < h->size; i++)
185 h->entries[i] = NULL;
187 h->compare = compare;
191 h->hash_ordered = true;
196 /* Destroys the contents of table H. */
198 hsh_clear (struct hsh_table *h)
204 for (i = 0; i < h->size; i++)
205 if (h->entries[i] != NULL)
206 h->free (h->entries[i], h->aux);
208 for (i = 0; i < h->size; i++)
209 h->entries[i] = NULL;
214 h->hash_ordered = true;
218 /* Destroys table H and all its contents. */
220 hsh_destroy (struct hsh_table *h)
227 for (i = 0; i < h->size; i++)
228 if (h->entries[i] != NULL)
229 h->free (h->entries[i], h->aux);
235 /* Locates an entry matching TARGET. Returns the index for the
236 entry, if found, or the index of an empty entry that indicates
237 where TARGET should go, otherwise. */
238 static inline unsigned
239 locate_matching_entry (struct hsh_table *h, const void *target)
241 unsigned i = h->hash (target, h->aux);
243 assert (h->hash_ordered);
248 entry = h->entries[i];
249 if (entry == NULL || !h->compare (entry, target, h->aux))
255 /* Returns the index of an empty entry that indicates
256 where TARGET should go, assuming that TARGET is not equal to
257 any item already in the hash table. */
258 static inline unsigned
259 locate_empty_entry (struct hsh_table *h, const void *target)
261 unsigned i = h->hash (target, h->aux);
263 assert (h->hash_ordered);
267 if (h->entries[i] == NULL)
273 /* Changes the capacity of H to NEW_SIZE, which must be a
274 positive power of 2 at least as large as the number of
277 rehash (struct hsh_table *h, size_t new_size)
279 void **begin, **end, **table_p;
283 assert (new_size >= h->used);
285 /* Verify that NEW_SIZE is a positive power of 2. */
286 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
289 end = begin + h->size;
292 h->entries = xnmalloc (h->size, sizeof *h->entries);
293 for (i = 0; i < h->size; i++)
294 h->entries[i] = NULL;
295 for (table_p = begin; table_p < end; table_p++)
297 void *entry = *table_p;
299 h->entries[locate_empty_entry (h, entry)] = entry;
304 h->hash_ordered = true;
308 /* A "algo_predicate_func" that returns true if DATA points
309 to a non-null void. */
311 not_null (const void *data_, void *aux UNUSED)
313 void *const *data = data_;
315 return *data != NULL;
318 /* Compacts hash table H and returns a pointer to its data. The
319 returned data consists of hsh_count(H) non-null pointers, in
320 no particular order, followed by a null pointer.
322 After calling this function, only hsh_destroy() and
323 hsh_count() should be applied to H. hsh_first() and
324 hsh_next() could also be used, but you're better off just
325 iterating through the returned array.
327 This function is intended for use in situations where data
328 processing occurs in two phases. In the first phase, data is
329 added, removed, and searched for within a hash table. In the
330 second phase, the contents of the hash table are output and
331 the hash property itself is no longer of interest.
333 Use hsh_sort() instead, if the second phase wants data in
334 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
335 if the second phase still needs to search the hash table. */
337 hsh_data (struct hsh_table *h)
342 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
343 assert (n == h->used);
345 h->hash_ordered = false;
350 /* Dereferences void ** pointers and passes them to the hash
351 comparison function. */
353 comparison_helper (const void *a_, const void *b_, void *h_)
357 struct hsh_table *h = h_;
362 return h->compare (*a, *b, h->aux);
365 /* Sorts hash table H based on hash comparison function. The
366 returned data consists of hsh_count(H) non-null pointers,
367 sorted in order of the hash comparison function, followed by a
370 After calling this function, only hsh_destroy() and
371 hsh_count() should be applied to H. hsh_first() and
372 hsh_next() could also be used, but you're better off just
373 iterating through the returned array.
375 This function is intended for use in situations where data
376 processing occurs in two phases. In the first phase, data is
377 added, removed, and searched for within a hash table. In the
378 second phase, the contents of the hash table are output and
379 the hash property itself is no longer of interest.
381 Use hsh_data() instead, if the second phase doesn't need the
382 data in any particular order. Use hsh_data_copy() or
383 hsh_sort_copy() instead, if the second phase still needs to
384 search the hash table. */
386 hsh_sort (struct hsh_table *h)
391 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
395 /* Makes and returns a copy of the pointers to the data in H.
396 The returned data consists of hsh_count(H) non-null pointers,
397 in no particular order, followed by a null pointer. The hash
398 table is not modified. The caller is responsible for freeing
401 If you don't need to search or modify the hash table, then
402 hsh_data() is a more efficient choice. */
404 hsh_data_copy (struct hsh_table *h)
409 copy = xnmalloc ((h->used + 1), sizeof *copy);
410 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
411 copy[h->used] = NULL;
415 /* Makes and returns a copy of the pointers to the data in H.
416 The returned data consists of hsh_count(H) non-null pointers,
417 sorted in order of the hash comparison function, followed by a
418 null pointer. The hash table is not modified. The caller is
419 responsible for freeing the allocated data.
421 If you don't need to search or modify the hash table, then
422 hsh_sort() is a more efficient choice. */
424 hsh_sort_copy (struct hsh_table *h)
429 copy = hsh_data_copy (h);
430 sort (copy, h->used, sizeof *copy, comparison_helper, h);
436 /* Searches hash table H for TARGET. If found, returns a pointer
437 to a pointer to that entry; otherwise returns a pointer to a
438 NULL entry which *must* be used to insert a new entry having
439 the same key data. */
441 hsh_probe (struct hsh_table *h, const void *target)
446 assert (target != NULL);
447 assert (h->hash_ordered);
449 if (h->used > h->size / 2)
450 rehash (h, h->size * 2);
451 i = locate_matching_entry (h, target);
452 if (h->entries[i] == NULL)
454 return &h->entries[i];
457 /* Searches hash table H for TARGET. If not found, inserts
458 TARGET and returns a null pointer. If found, returns the
459 match, without replacing it in the table. */
461 hsh_insert (struct hsh_table *h, void *target)
466 assert (target != NULL);
468 entry = hsh_probe (h, target);
478 /* Searches hash table H for TARGET. If not found, inserts
479 TARGET and returns a null pointer. If found, returns the
480 match, after replacing it in the table by TARGET. */
482 hsh_replace (struct hsh_table *h, void *target)
484 void **entry = hsh_probe (h, target);
490 /* Returns the entry in hash table H that matches TARGET, or NULL
493 hsh_find (struct hsh_table *h, const void *target)
495 return h->entries[locate_matching_entry (h, target)];
498 /* Deletes the entry in hash table H that matches TARGET.
499 Returns true if an entry was deleted.
501 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
502 Because our load factor is at most 1/2, the average number of
503 moves that this algorithm makes should be at most 2 - ln 2 ~=
506 hsh_delete (struct hsh_table *h, const void *target)
508 unsigned i = locate_matching_entry (h, target);
509 if (h->entries[i] != NULL)
513 h->free (h->entries[i], h->aux);
520 h->entries[i] = NULL;
524 i = (i - 1) & (h->size - 1);
525 if (h->entries[i] == NULL)
528 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
530 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
531 h->entries[j] = h->entries[i];
540 /* Finds and returns an entry in TABLE, and initializes ITER for
541 use with hsh_next(). If TABLE is empty, returns a null
544 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
547 assert (iter != NULL);
550 return hsh_next (h, iter);
553 /* Iterates through TABLE with iterator ITER. Returns the next
554 entry in TABLE, or a null pointer after the last entry.
556 Entries are returned in an undefined order. Modifying TABLE
557 during iteration may cause some entries to be returned
558 multiple times or not at all. */
560 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
565 assert (iter != NULL);
566 assert (iter->next <= h->size);
568 for (i = iter->next; i < h->size; i++)
572 return h->entries[i];
575 iter->next = h->size;
579 /* Returns the number of items in H. */
581 hsh_count (struct hsh_table *h)
595 /* Displays contents of hash table H on stdout. */
597 hsh_dump (struct hsh_table *h)
599 void **entry = h->entries;
602 printf (_("hash table:"));
603 for (i = 0; i < h->size; i++)
604 printf (" %p", *entry++);
608 /* This wrapper around hsh_probe() assures that it returns a pointer
609 to a NULL pointer. This function is used when it is known that the
610 entry to be inserted does not already exist in the table. */
612 hsh_force_insert (struct hsh_table *h, void *p)
614 void **pp = hsh_probe (h, p);
615 assert (*pp == NULL);
619 /* This wrapper around hsh_find() assures that it returns non-NULL.
620 This function is for use when it is known that the entry being
621 searched for must exist in the table. */
623 hsh_force_find (struct hsh_table *h, const void *target)
625 void *found = hsh_find (h, target);
626 assert (found != NULL);
630 /* This wrapper for hsh_delete() verifies that an item really was
633 hsh_force_delete (struct hsh_table *h, const void *target)
635 int found = hsh_delete (h, target);