1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "algorithm.h"
32 /* Note for constructing hash functions:
34 You can store the hash values in the records, then compare hash
35 values (in the compare function) before bothering to compare keys.
36 Hash values can simply be returned from the records instead of
37 recalculating when rehashing. */
41 Since hash_probe and hash_find take void * pointers, it's easy to
42 pass a void ** to your data by accidentally inserting an `&'
43 reference operator where one shouldn't go. It took me an hour to
44 hunt down a bug like that once. */
46 /* Prime numbers and hash functions. */
48 /* Returns smallest power of 2 greater than X. */
50 next_power_of_2 (size_t x)
56 /* Turn off rightmost 1-bit in x. */
57 size_t y = x & (x - 1);
59 /* If y is 0 then x only had a single 1-bit. */
63 /* Otherwise turn off the next. */
68 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
69 #define FNV_32_PRIME 16777619u
70 #define FNV_32_BASIS 2166136261u
72 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
74 hsh_hash_bytes (const void *buf_, size_t size)
76 const unsigned char *buf = buf_;
83 hash = (hash * FNV_32_PRIME) ^ *buf++;
88 /* Fowler-Noll-Vo 32-bit hash, for strings. */
90 hsh_hash_string (const char *s_)
92 const unsigned char *s = s_;
99 hash = (hash * FNV_32_PRIME) ^ *s++;
104 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
106 hsh_hash_case_string (const char *s_)
108 const unsigned char *s = s_;
115 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
124 return hsh_hash_bytes (&i, sizeof i);
127 /* Hash for double. */
129 hsh_hash_double (double d)
132 return hsh_hash_bytes (&d, sizeof d);
142 size_t used; /* Number of filled entries. */
143 size_t size; /* Number of entries (a power of 2). */
144 void **entries; /* Hash table proper. */
146 void *aux; /* Auxiliary data for comparison functions. */
147 hsh_compare_func *compare;
152 /* Set to false if hsh_data() or hsh_sort() has been called,
153 so that most hsh_*() functions may no longer be called. */
158 /* Creates a hash table with at least M entries. COMPARE is a
159 function that compares two entries and returns 0 if they are
160 identical, nonzero otherwise; HASH returns a nonnegative hash value
161 for an entry; FREE destroys an entry. */
163 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
164 hsh_free_func *free, void *aux)
169 assert (compare != NULL);
170 assert (hash != NULL);
172 h = xmalloc (sizeof *h);
176 h->size = next_power_of_2 (size);
177 h->entries = xmalloc (sizeof *h->entries * h->size);
178 for (i = 0; i < h->size; i++)
179 h->entries[i] = NULL;
181 h->compare = compare;
185 h->hash_ordered = true;
190 /* Destroys the contents of table H. */
192 hsh_clear (struct hsh_table *h)
198 for (i = 0; i < h->size; i++)
199 if (h->entries[i] != NULL)
200 h->free (h->entries[i], h->aux);
202 for (i = 0; i < h->size; i++)
203 h->entries[i] = NULL;
208 h->hash_ordered = true;
212 /* Destroys table H and all its contents. */
214 hsh_destroy (struct hsh_table *h)
221 for (i = 0; i < h->size; i++)
222 if (h->entries[i] != NULL)
223 h->free (h->entries[i], h->aux);
229 /* Locates an entry matching TARGET. Returns a pointer to the
230 entry, or a null pointer on failure. */
231 static inline unsigned
232 locate_matching_entry (struct hsh_table *h, const void *target)
234 unsigned i = h->hash (target, h->aux);
236 assert (h->hash_ordered);
241 entry = h->entries[i];
242 if (entry == NULL || !h->compare (entry, target, h->aux))
248 /* Changes the capacity of H to NEW_SIZE, which must be a
249 positive power of 2 at least as large as the number of
252 rehash (struct hsh_table *h, size_t new_size)
254 void **begin, **end, **table_p;
258 assert (new_size >= h->used);
260 /* Verify that NEW_SIZE is a positive power of 2. */
261 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
264 end = begin + h->size;
267 h->entries = xmalloc (sizeof *h->entries * h->size);
268 for (i = 0; i < h->size; i++)
269 h->entries[i] = NULL;
270 for (table_p = begin; table_p < end; table_p++)
272 void *entry = *table_p;
274 h->entries[locate_matching_entry (h, entry)] = entry;
279 h->hash_ordered = true;
283 /* A "algo_predicate_func" that returns nonzero if DATA points
284 to a non-null void. */
286 not_null (const void *data_, void *aux UNUSED)
288 void *const *data = data_;
290 return *data != NULL;
293 /* Compacts hash table H and returns a pointer to its data. The
294 returned data consists of hsh_count(H) non-null pointers, in
295 no particular order, followed by a null pointer.
297 After calling this function, only hsh_destroy() and
298 hsh_count() should be applied to H. hsh_first() and
299 hsh_next() could also be used, but you're better off just
300 iterating through the returned array.
302 This function is intended for use in situations where data
303 processing occurs in two phases. In the first phase, data is
304 added, removed, and searched for within a hash table. In the
305 second phase, the contents of the hash table are output and
306 the hash property itself is no longer of interest.
308 Use hsh_sort() instead, if the second phase wants data in
309 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
310 if the second phase still needs to search the hash table. */
312 hsh_data (struct hsh_table *h)
317 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
318 assert (n == h->used);
320 h->hash_ordered = false;
325 /* Dereferences void ** pointers and passes them to the hash
326 comparison function. */
328 comparison_helper (const void *a_, const void *b_, void *h_)
332 struct hsh_table *h = h_;
337 return h->compare (*a, *b, h->aux);
340 /* Sorts hash table H based on hash comparison function. The
341 returned data consists of hsh_count(H) non-null pointers,
342 sorted in order of the hash comparison function, followed by a
345 After calling this function, only hsh_destroy() and
346 hsh_count() should be applied to H. hsh_first() and
347 hsh_next() could also be used, but you're better off just
348 iterating through the returned array.
350 This function is intended for use in situations where data
351 processing occurs in two phases. In the first phase, data is
352 added, removed, and searched for within a hash table. In the
353 second phase, the contents of the hash table are output and
354 the hash property itself is no longer of interest.
356 Use hsh_data() instead, if the second phase doesn't need the
357 data in any particular order. Use hsh_data_copy() or
358 hsh_sort_copy() instead, if the second phase still needs to
359 search the hash table. */
361 hsh_sort (struct hsh_table *h)
366 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
370 /* Makes and returns a copy of the pointers to the data in H.
371 The returned data consists of hsh_count(H) non-null pointers,
372 in no particular order, followed by a null pointer. The hash
373 table is not modified. The caller is responsible for freeing
376 If you don't need to search or modify the hash table, then
377 hsh_data() is a more efficient choice. */
379 hsh_data_copy (struct hsh_table *h)
384 copy = xmalloc ((h->used + 1) * sizeof *copy);
385 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
386 copy[h->used] = NULL;
390 /* Makes and returns a copy of the pointers to the data in H.
391 The returned data consists of hsh_count(H) non-null pointers,
392 sorted in order of the hash comparison function, followed by a
393 null pointer. The hash table is not modified. The caller is
394 responsible for freeing the allocated data.
396 If you don't need to search or modify the hash table, then
397 hsh_sort() is a more efficient choice. */
399 hsh_sort_copy (struct hsh_table *h)
404 copy = hsh_data_copy (h);
405 sort (copy, h->used, sizeof *copy, comparison_helper, h);
411 /* Searches hash table H for TARGET. If found, returns a pointer
412 to a pointer to that entry; otherwise returns a pointer to a
413 NULL entry which *must* be used to insert a new entry having
414 the same key data. */
416 hsh_probe (struct hsh_table *h, const void *target)
421 assert (target != NULL);
422 assert (h->hash_ordered);
424 if (h->used > h->size / 2)
425 rehash (h, h->size * 2);
426 i = locate_matching_entry (h, target);
427 if (h->entries[i] == NULL)
429 return &h->entries[i];
432 /* Searches hash table H for TARGET. If not found, inserts
433 TARGET and returns a null pointer. If found, returns the
434 match, without replacing it in the table. */
436 hsh_insert (struct hsh_table *h, void *target)
441 assert (target != NULL);
443 entry = hsh_probe (h, target);
453 /* Searches hash table H for TARGET. If not found, inserts
454 TARGET and returns a null pointer. If found, returns the
455 match, after replacing it in the table by TARGET. */
457 hsh_replace (struct hsh_table *h, void *target)
459 void **entry = hsh_probe (h, target);
465 /* Returns the entry in hash table H that matches TARGET, or NULL
468 hsh_find (struct hsh_table *h, const void *target)
470 return h->entries[locate_matching_entry (h, target)];
473 /* Deletes the entry in hash table H that matches TARGET.
474 Returns nonzero if an entry was deleted.
476 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
477 Because our load factor is at most 1/2, the average number of
478 moves that this algorithm makes should be at most 2 - ln 2 ~=
481 hsh_delete (struct hsh_table *h, const void *target)
483 unsigned i = locate_matching_entry (h, target);
484 if (h->entries[i] != NULL)
488 h->free (h->entries[i], h->aux);
495 h->entries[i] = NULL;
499 i = (i - 1) & (h->size - 1);
500 if (h->entries[i] == NULL)
503 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
505 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
506 h->entries[j] = h->entries[i];
515 /* Finds and returns an entry in TABLE, and initializes ITER for
516 use with hsh_next(). If TABLE is empty, returns a null
519 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
522 assert (iter != NULL);
525 return hsh_next (h, iter);
528 /* Iterates through TABLE with iterator ITER. Returns the next
529 entry in TABLE, or a null pointer after the last entry.
531 Entries are returned in an undefined order. Modifying TABLE
532 during iteration may cause some entries to be returned
533 multiple times or not at all. */
535 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
540 assert (iter != NULL);
541 assert (iter->next <= h->size);
543 for (i = iter->next; i < h->size; i++)
547 return h->entries[i];
550 iter->next = h->size;
554 /* Returns the number of items in H. */
556 hsh_count (struct hsh_table *h)
570 /* Displays contents of hash table H on stdout. */
572 hsh_dump (struct hsh_table *h)
574 void **entry = h->entries;
577 printf (_("hash table:"));
578 for (i = 0; i < h->size; i++)
579 printf (" %p", *entry++);
583 /* This wrapper around hsh_probe() assures that it returns a pointer
584 to a NULL pointer. This function is used when it is known that the
585 entry to be inserted does not already exist in the table. */
587 hsh_force_insert (struct hsh_table *h, void *p)
589 void **pp = hsh_probe (h, p);
590 assert (*pp == NULL);
594 /* This wrapper around hsh_find() assures that it returns non-NULL.
595 This function is for use when it is known that the entry being
596 searched for must exist in the table. */
598 hsh_force_find (struct hsh_table *h, const void *target)
600 void *found = hsh_find (h, target);
601 assert (found != NULL);
605 /* This wrapper for hsh_delete() verifies that an item really was
608 hsh_force_delete (struct hsh_table *h, const void *target)
610 int found = hsh_delete (h, target);