1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
35 #define _(msgid) gettext (msgid)
37 /* Note for constructing hash functions:
39 You can store the hash values in the records, then compare hash
40 values (in the compare function) before bothering to compare keys.
41 Hash values can simply be returned from the records instead of
42 recalculating when rehashing. */
46 Since hash_probe and hash_find take void * pointers, it's easy to
47 pass a void ** to your data by accidentally inserting an `&'
48 reference operator where one shouldn't go. It took me an hour to
49 hunt down a bug like that once. */
51 /* Prime numbers and hash functions. */
53 /* Returns smallest power of 2 greater than X. */
55 next_power_of_2 (size_t x)
61 /* Turn off rightmost 1-bit in x. */
62 size_t y = x & (x - 1);
64 /* If y is 0 then x only had a single 1-bit. */
68 /* Otherwise turn off the next. */
73 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
74 #define FNV_32_PRIME 16777619u
75 #define FNV_32_BASIS 2166136261u
77 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
79 hsh_hash_bytes (const void *buf_, size_t size)
81 const unsigned char *buf = (const unsigned char *) buf_;
88 hash = (hash * FNV_32_PRIME) ^ *buf++;
93 /* Fowler-Noll-Vo 32-bit hash, for strings. */
95 hsh_hash_string (const char *s_)
97 const unsigned char *s = (const unsigned char *) s_;
104 hash = (hash * FNV_32_PRIME) ^ *s++;
109 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
111 hsh_hash_case_string (const char *s_)
113 const unsigned char *s = (const unsigned char *) s_;
120 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
129 return hsh_hash_bytes (&i, sizeof i);
132 /* Hash for double. */
134 hsh_hash_double (double d)
137 return hsh_hash_bytes (&d, sizeof d);
147 size_t used; /* Number of filled entries. */
148 size_t size; /* Number of entries (a power of 2). */
149 void **entries; /* Hash table proper. */
151 void *aux; /* Auxiliary data for comparison functions. */
152 hsh_compare_func *compare;
157 /* Set to false if hsh_data() or hsh_sort() has been called,
158 so that most hsh_*() functions may no longer be called. */
163 /* Creates a hash table with at least M entries. COMPARE is a
164 function that compares two entries and returns 0 if they are
165 identical, nonzero otherwise; HASH returns a nonnegative hash value
166 for an entry; FREE destroys an entry. */
168 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
169 hsh_free_func *free, void *aux)
174 assert (compare != NULL);
175 assert (hash != NULL);
177 h = xmalloc (sizeof *h);
181 h->size = next_power_of_2 (size);
182 h->entries = xnmalloc (h->size, sizeof *h->entries);
183 for (i = 0; i < h->size; i++)
184 h->entries[i] = NULL;
186 h->compare = compare;
190 h->hash_ordered = true;
195 /* Destroys the contents of table H. */
197 hsh_clear (struct hsh_table *h)
203 for (i = 0; i < h->size; i++)
204 if (h->entries[i] != NULL)
205 h->free (h->entries[i], h->aux);
207 for (i = 0; i < h->size; i++)
208 h->entries[i] = NULL;
213 h->hash_ordered = true;
217 /* Destroys table H and all its contents. */
219 hsh_destroy (struct hsh_table *h)
226 for (i = 0; i < h->size; i++)
227 if (h->entries[i] != NULL)
228 h->free (h->entries[i], h->aux);
234 /* Locates an entry matching TARGET. Returns the index for the
235 entry, if found, or the index of an empty entry that indicates
236 where TARGET should go, otherwise. */
237 static inline unsigned
238 locate_matching_entry (struct hsh_table *h, const void *target)
240 unsigned i = h->hash (target, h->aux);
242 assert (h->hash_ordered);
247 entry = h->entries[i];
248 if (entry == NULL || !h->compare (entry, target, h->aux))
254 /* Returns the index of an empty entry that indicates
255 where TARGET should go, assuming that TARGET is not equal to
256 any item already in the hash table. */
257 static inline unsigned
258 locate_empty_entry (struct hsh_table *h, const void *target)
260 unsigned i = h->hash (target, h->aux);
262 assert (h->hash_ordered);
266 if (h->entries[i] == NULL)
272 /* Changes the capacity of H to NEW_SIZE, which must be a
273 positive power of 2 at least as large as the number of
276 rehash (struct hsh_table *h, size_t new_size)
278 void **begin, **end, **table_p;
282 assert (new_size >= h->used);
284 /* Verify that NEW_SIZE is a positive power of 2. */
285 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
288 end = begin + h->size;
291 h->entries = xnmalloc (h->size, sizeof *h->entries);
292 for (i = 0; i < h->size; i++)
293 h->entries[i] = NULL;
294 for (table_p = begin; table_p < end; table_p++)
296 void *entry = *table_p;
298 h->entries[locate_empty_entry (h, entry)] = entry;
303 h->hash_ordered = true;
307 /* A "algo_predicate_func" that returns nonzero if DATA points
308 to a non-null void. */
310 not_null (const void *data_, void *aux UNUSED)
312 void *const *data = data_;
314 return *data != NULL;
317 /* Compacts hash table H and returns a pointer to its data. The
318 returned data consists of hsh_count(H) non-null pointers, in
319 no particular order, followed by a null pointer.
321 After calling this function, only hsh_destroy() and
322 hsh_count() should be applied to H. hsh_first() and
323 hsh_next() could also be used, but you're better off just
324 iterating through the returned array.
326 This function is intended for use in situations where data
327 processing occurs in two phases. In the first phase, data is
328 added, removed, and searched for within a hash table. In the
329 second phase, the contents of the hash table are output and
330 the hash property itself is no longer of interest.
332 Use hsh_sort() instead, if the second phase wants data in
333 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
334 if the second phase still needs to search the hash table. */
336 hsh_data (struct hsh_table *h)
341 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
342 assert (n == h->used);
344 h->hash_ordered = false;
349 /* Dereferences void ** pointers and passes them to the hash
350 comparison function. */
352 comparison_helper (const void *a_, const void *b_, void *h_)
356 struct hsh_table *h = h_;
361 return h->compare (*a, *b, h->aux);
364 /* Sorts hash table H based on hash comparison function. The
365 returned data consists of hsh_count(H) non-null pointers,
366 sorted in order of the hash comparison function, followed by a
369 After calling this function, only hsh_destroy() and
370 hsh_count() should be applied to H. hsh_first() and
371 hsh_next() could also be used, but you're better off just
372 iterating through the returned array.
374 This function is intended for use in situations where data
375 processing occurs in two phases. In the first phase, data is
376 added, removed, and searched for within a hash table. In the
377 second phase, the contents of the hash table are output and
378 the hash property itself is no longer of interest.
380 Use hsh_data() instead, if the second phase doesn't need the
381 data in any particular order. Use hsh_data_copy() or
382 hsh_sort_copy() instead, if the second phase still needs to
383 search the hash table. */
385 hsh_sort (struct hsh_table *h)
390 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
394 /* Makes and returns a copy of the pointers to the data in H.
395 The returned data consists of hsh_count(H) non-null pointers,
396 in no particular order, followed by a null pointer. The hash
397 table is not modified. The caller is responsible for freeing
400 If you don't need to search or modify the hash table, then
401 hsh_data() is a more efficient choice. */
403 hsh_data_copy (struct hsh_table *h)
408 copy = xnmalloc ((h->used + 1), sizeof *copy);
409 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
410 copy[h->used] = NULL;
414 /* Makes and returns a copy of the pointers to the data in H.
415 The returned data consists of hsh_count(H) non-null pointers,
416 sorted in order of the hash comparison function, followed by a
417 null pointer. The hash table is not modified. The caller is
418 responsible for freeing the allocated data.
420 If you don't need to search or modify the hash table, then
421 hsh_sort() is a more efficient choice. */
423 hsh_sort_copy (struct hsh_table *h)
428 copy = hsh_data_copy (h);
429 sort (copy, h->used, sizeof *copy, comparison_helper, h);
435 /* Searches hash table H for TARGET. If found, returns a pointer
436 to a pointer to that entry; otherwise returns a pointer to a
437 NULL entry which *must* be used to insert a new entry having
438 the same key data. */
440 hsh_probe (struct hsh_table *h, const void *target)
445 assert (target != NULL);
446 assert (h->hash_ordered);
448 if (h->used > h->size / 2)
449 rehash (h, h->size * 2);
450 i = locate_matching_entry (h, target);
451 if (h->entries[i] == NULL)
453 return &h->entries[i];
456 /* Searches hash table H for TARGET. If not found, inserts
457 TARGET and returns a null pointer. If found, returns the
458 match, without replacing it in the table. */
460 hsh_insert (struct hsh_table *h, void *target)
465 assert (target != NULL);
467 entry = hsh_probe (h, target);
477 /* Searches hash table H for TARGET. If not found, inserts
478 TARGET and returns a null pointer. If found, returns the
479 match, after replacing it in the table by TARGET. */
481 hsh_replace (struct hsh_table *h, void *target)
483 void **entry = hsh_probe (h, target);
489 /* Returns the entry in hash table H that matches TARGET, or NULL
492 hsh_find (struct hsh_table *h, const void *target)
494 return h->entries[locate_matching_entry (h, target)];
497 /* Deletes the entry in hash table H that matches TARGET.
498 Returns nonzero if an entry was deleted.
500 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
501 Because our load factor is at most 1/2, the average number of
502 moves that this algorithm makes should be at most 2 - ln 2 ~=
505 hsh_delete (struct hsh_table *h, const void *target)
507 unsigned i = locate_matching_entry (h, target);
508 if (h->entries[i] != NULL)
512 h->free (h->entries[i], h->aux);
519 h->entries[i] = NULL;
523 i = (i - 1) & (h->size - 1);
524 if (h->entries[i] == NULL)
527 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
529 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
530 h->entries[j] = h->entries[i];
539 /* Finds and returns an entry in TABLE, and initializes ITER for
540 use with hsh_next(). If TABLE is empty, returns a null
543 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
546 assert (iter != NULL);
549 return hsh_next (h, iter);
552 /* Iterates through TABLE with iterator ITER. Returns the next
553 entry in TABLE, or a null pointer after the last entry.
555 Entries are returned in an undefined order. Modifying TABLE
556 during iteration may cause some entries to be returned
557 multiple times or not at all. */
559 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
564 assert (iter != NULL);
565 assert (iter->next <= h->size);
567 for (i = iter->next; i < h->size; i++)
571 return h->entries[i];
574 iter->next = h->size;
578 /* Returns the number of items in H. */
580 hsh_count (struct hsh_table *h)
594 /* Displays contents of hash table H on stdout. */
596 hsh_dump (struct hsh_table *h)
598 void **entry = h->entries;
601 printf (_("hash table:"));
602 for (i = 0; i < h->size; i++)
603 printf (" %p", *entry++);
607 /* This wrapper around hsh_probe() assures that it returns a pointer
608 to a NULL pointer. This function is used when it is known that the
609 entry to be inserted does not already exist in the table. */
611 hsh_force_insert (struct hsh_table *h, void *p)
613 void **pp = hsh_probe (h, p);
614 assert (*pp == NULL);
618 /* This wrapper around hsh_find() assures that it returns non-NULL.
619 This function is for use when it is known that the entry being
620 searched for must exist in the table. */
622 hsh_force_find (struct hsh_table *h, const void *target)
624 void *found = hsh_find (h, target);
625 assert (found != NULL);
629 /* This wrapper for hsh_delete() verifies that an item really was
632 hsh_force_delete (struct hsh_table *h, const void *target)
634 int found = hsh_delete (h, target);