1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "algorithm.h"
33 /* Note for constructing hash functions:
35 You can store the hash values in the records, then compare hash
36 values (in the compare function) before bothering to compare keys.
37 Hash values can simply be returned from the records instead of
38 recalculating when rehashing. */
42 Since hash_probe and hash_find take void * pointers, it's easy to
43 pass a void ** to your data by accidentally inserting an `&'
44 reference operator where one shouldn't go. It took me an hour to
45 hunt down a bug like that once. */
47 /* Prime numbers and hash functions. */
49 /* Returns smallest power of 2 greater than X. */
51 next_power_of_2 (size_t x)
57 /* Turn off rightmost 1-bit in x. */
58 size_t y = x & (x - 1);
60 /* If y is 0 then x only had a single 1-bit. */
64 /* Otherwise turn off the next. */
69 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
70 #define FNV_32_PRIME 16777619u
71 #define FNV_32_BASIS 2166136261u
73 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
75 hsh_hash_bytes (const void *buf_, size_t size)
77 const unsigned char *buf = buf_;
84 hash = (hash * FNV_32_PRIME) ^ *buf++;
89 /* Fowler-Noll-Vo 32-bit hash, for strings. */
91 hsh_hash_string (const char *s_)
93 const unsigned char *s = s_;
100 hash = (hash * FNV_32_PRIME) ^ *s++;
105 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
107 hsh_hash_case_string (const char *s_)
109 const unsigned char *s = s_;
116 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
125 return hsh_hash_bytes (&i, sizeof i);
128 /* Hash for double. */
130 hsh_hash_double (double d)
133 return hsh_hash_bytes (&d, sizeof d);
143 size_t used; /* Number of filled entries. */
144 size_t size; /* Number of entries (a power of 2). */
145 void **entries; /* Hash table proper. */
147 void *aux; /* Auxiliary data for comparison functions. */
148 hsh_compare_func *compare;
153 /* Set to false if hsh_data() or hsh_sort() has been called,
154 so that most hsh_*() functions may no longer be called. */
159 /* Creates a hash table with at least M entries. COMPARE is a
160 function that compares two entries and returns 0 if they are
161 identical, nonzero otherwise; HASH returns a nonnegative hash value
162 for an entry; FREE destroys an entry. */
164 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
165 hsh_free_func *free, void *aux)
170 assert (compare != NULL);
171 assert (hash != NULL);
173 h = xmalloc (sizeof *h);
177 h->size = next_power_of_2 (size);
178 h->entries = xmalloc (sizeof *h->entries * h->size);
179 for (i = 0; i < h->size; i++)
180 h->entries[i] = NULL;
182 h->compare = compare;
186 h->hash_ordered = true;
191 /* Destroys the contents of table H. */
193 hsh_clear (struct hsh_table *h)
199 for (i = 0; i < h->size; i++)
200 if (h->entries[i] != NULL)
201 h->free (h->entries[i], h->aux);
203 for (i = 0; i < h->size; i++)
204 h->entries[i] = NULL;
209 h->hash_ordered = true;
213 /* Destroys table H and all its contents. */
215 hsh_destroy (struct hsh_table *h)
222 for (i = 0; i < h->size; i++)
223 if (h->entries[i] != NULL)
224 h->free (h->entries[i], h->aux);
230 /* Locates an entry matching TARGET. Returns a pointer to the
231 entry, or a null pointer on failure. */
232 static inline unsigned
233 locate_matching_entry (struct hsh_table *h, const void *target)
235 unsigned i = h->hash (target, h->aux);
237 assert (h->hash_ordered);
242 entry = h->entries[i];
243 if (entry == NULL || !h->compare (entry, target, h->aux))
249 /* Changes the capacity of H to NEW_SIZE, which must be a
250 positive power of 2 at least as large as the number of
253 rehash (struct hsh_table *h, size_t new_size)
255 void **begin, **end, **table_p;
259 assert (new_size >= h->used);
261 /* Verify that NEW_SIZE is a positive power of 2. */
262 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
265 end = begin + h->size;
268 h->entries = xmalloc (sizeof *h->entries * h->size);
269 for (i = 0; i < h->size; i++)
270 h->entries[i] = NULL;
271 for (table_p = begin; table_p < end; table_p++)
273 void *entry = *table_p;
275 h->entries[locate_matching_entry (h, entry)] = entry;
280 h->hash_ordered = true;
284 /* A "algo_predicate_func" that returns nonzero if DATA points
285 to a non-null void. */
287 not_null (const void *data_, void *aux UNUSED)
289 void *const *data = data_;
291 return *data != NULL;
294 /* Compacts hash table H and returns a pointer to its data. The
295 returned data consists of hsh_count(H) non-null pointers, in
296 no particular order, followed by a null pointer.
298 After calling this function, only hsh_destroy() and
299 hsh_count() should be applied to H. hsh_first() and
300 hsh_next() could also be used, but you're better off just
301 iterating through the returned array.
303 This function is intended for use in situations where data
304 processing occurs in two phases. In the first phase, data is
305 added, removed, and searched for within a hash table. In the
306 second phase, the contents of the hash table are output and
307 the hash property itself is no longer of interest.
309 Use hsh_sort() instead, if the second phase wants data in
310 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
311 if the second phase still needs to search the hash table. */
313 hsh_data (struct hsh_table *h)
318 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
319 assert (n == h->used);
321 h->hash_ordered = false;
326 /* Dereferences void ** pointers and passes them to the hash
327 comparison function. */
329 comparison_helper (const void *a_, const void *b_, void *h_)
333 struct hsh_table *h = h_;
338 return h->compare (*a, *b, h->aux);
341 /* Sorts hash table H based on hash comparison function. The
342 returned data consists of hsh_count(H) non-null pointers,
343 sorted in order of the hash comparison function, followed by a
346 After calling this function, only hsh_destroy() and
347 hsh_count() should be applied to H. hsh_first() and
348 hsh_next() could also be used, but you're better off just
349 iterating through the returned array.
351 This function is intended for use in situations where data
352 processing occurs in two phases. In the first phase, data is
353 added, removed, and searched for within a hash table. In the
354 second phase, the contents of the hash table are output and
355 the hash property itself is no longer of interest.
357 Use hsh_data() instead, if the second phase doesn't need the
358 data in any particular order. Use hsh_data_copy() or
359 hsh_sort_copy() instead, if the second phase still needs to
360 search the hash table. */
362 hsh_sort (struct hsh_table *h)
367 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
371 /* Makes and returns a copy of the pointers to the data in H.
372 The returned data consists of hsh_count(H) non-null pointers,
373 in no particular order, followed by a null pointer. The hash
374 table is not modified. The caller is responsible for freeing
377 If you don't need to search or modify the hash table, then
378 hsh_data() is a more efficient choice. */
380 hsh_data_copy (struct hsh_table *h)
385 copy = xmalloc ((h->used + 1) * sizeof *copy);
386 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
387 copy[h->used] = NULL;
391 /* Makes and returns a copy of the pointers to the data in H.
392 The returned data consists of hsh_count(H) non-null pointers,
393 sorted in order of the hash comparison function, followed by a
394 null pointer. The hash table is not modified. The caller is
395 responsible for freeing the allocated data.
397 If you don't need to search or modify the hash table, then
398 hsh_sort() is a more efficient choice. */
400 hsh_sort_copy (struct hsh_table *h)
405 copy = hsh_data_copy (h);
406 sort (copy, h->used, sizeof *copy, comparison_helper, h);
412 /* Searches hash table H for TARGET. If found, returns a pointer
413 to a pointer to that entry; otherwise returns a pointer to a
414 NULL entry which *must* be used to insert a new entry having
415 the same key data. */
417 hsh_probe (struct hsh_table *h, const void *target)
422 assert (target != NULL);
423 assert (h->hash_ordered);
425 if (h->used > h->size / 2)
426 rehash (h, h->size * 2);
427 i = locate_matching_entry (h, target);
428 if (h->entries[i] == NULL)
430 return &h->entries[i];
433 /* Searches hash table H for TARGET. If not found, inserts
434 TARGET and returns a null pointer. If found, returns the
435 match, without replacing it in the table. */
437 hsh_insert (struct hsh_table *h, void *target)
442 assert (target != NULL);
444 entry = hsh_probe (h, target);
454 /* Searches hash table H for TARGET. If not found, inserts
455 TARGET and returns a null pointer. If found, returns the
456 match, after replacing it in the table by TARGET. */
458 hsh_replace (struct hsh_table *h, void *target)
460 void **entry = hsh_probe (h, target);
466 /* Returns the entry in hash table H that matches TARGET, or NULL
469 hsh_find (struct hsh_table *h, const void *target)
471 return h->entries[locate_matching_entry (h, target)];
474 /* Deletes the entry in hash table H that matches TARGET.
475 Returns nonzero if an entry was deleted.
477 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
478 Because our load factor is at most 1/2, the average number of
479 moves that this algorithm makes should be at most 2 - ln 2 ~=
482 hsh_delete (struct hsh_table *h, const void *target)
484 unsigned i = locate_matching_entry (h, target);
485 if (h->entries[i] != NULL)
489 h->free (h->entries[i], h->aux);
496 h->entries[i] = NULL;
500 i = (i - 1) & (h->size - 1);
501 if (h->entries[i] == NULL)
504 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
506 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
507 h->entries[j] = h->entries[i];
516 /* Finds and returns an entry in TABLE, and initializes ITER for
517 use with hsh_next(). If TABLE is empty, returns a null
520 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
523 assert (iter != NULL);
526 return hsh_next (h, iter);
529 /* Iterates through TABLE with iterator ITER. Returns the next
530 entry in TABLE, or a null pointer after the last entry.
532 Entries are returned in an undefined order. Modifying TABLE
533 during iteration may cause some entries to be returned
534 multiple times or not at all. */
536 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
541 assert (iter != NULL);
542 assert (iter->next <= h->size);
544 for (i = iter->next; i < h->size; i++)
548 return h->entries[i];
551 iter->next = h->size;
555 /* Returns the number of items in H. */
557 hsh_count (struct hsh_table *h)
571 /* Displays contents of hash table H on stdout. */
573 hsh_dump (struct hsh_table *h)
575 void **entry = h->entries;
578 printf (_("hash table:"));
579 for (i = 0; i < h->size; i++)
580 printf (" %p", *entry++);
584 /* This wrapper around hsh_probe() assures that it returns a pointer
585 to a NULL pointer. This function is used when it is known that the
586 entry to be inserted does not already exist in the table. */
588 hsh_force_insert (struct hsh_table *h, void *p)
590 void **pp = hsh_probe (h, p);
591 assert (*pp == NULL);
595 /* This wrapper around hsh_find() assures that it returns non-NULL.
596 This function is for use when it is known that the entry being
597 searched for must exist in the table. */
599 hsh_force_find (struct hsh_table *h, const void *target)
601 void *found = hsh_find (h, target);
602 assert (found != NULL);
606 /* This wrapper for hsh_delete() verifies that an item really was
609 hsh_force_delete (struct hsh_table *h, const void *target)
611 int found = hsh_delete (h, target);