1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "algorithm.h"
34 #define _(msgid) gettext (msgid)
36 /* Note for constructing hash functions:
38 You can store the hash values in the records, then compare hash
39 values (in the compare function) before bothering to compare keys.
40 Hash values can simply be returned from the records instead of
41 recalculating when rehashing. */
45 Since hash_probe and hash_find take void * pointers, it's easy to
46 pass a void ** to your data by accidentally inserting an `&'
47 reference operator where one shouldn't go. It took me an hour to
48 hunt down a bug like that once. */
50 /* Prime numbers and hash functions. */
52 /* Returns smallest power of 2 greater than X. */
54 next_power_of_2 (size_t x)
60 /* Turn off rightmost 1-bit in x. */
61 size_t y = x & (x - 1);
63 /* If y is 0 then x only had a single 1-bit. */
67 /* Otherwise turn off the next. */
72 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
73 #define FNV_32_PRIME 16777619u
74 #define FNV_32_BASIS 2166136261u
76 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
78 hsh_hash_bytes (const void *buf_, size_t size)
80 const unsigned char *buf = buf_;
87 hash = (hash * FNV_32_PRIME) ^ *buf++;
92 /* Fowler-Noll-Vo 32-bit hash, for strings. */
94 hsh_hash_string (const char *s_)
96 const unsigned char *s = s_;
103 hash = (hash * FNV_32_PRIME) ^ *s++;
108 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
110 hsh_hash_case_string (const char *s_)
112 const unsigned char *s = s_;
119 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
128 return hsh_hash_bytes (&i, sizeof i);
131 /* Hash for double. */
133 hsh_hash_double (double d)
136 return hsh_hash_bytes (&d, sizeof d);
146 size_t used; /* Number of filled entries. */
147 size_t size; /* Number of entries (a power of 2). */
148 void **entries; /* Hash table proper. */
150 void *aux; /* Auxiliary data for comparison functions. */
151 hsh_compare_func *compare;
156 /* Set to false if hsh_data() or hsh_sort() has been called,
157 so that most hsh_*() functions may no longer be called. */
162 /* Creates a hash table with at least M entries. COMPARE is a
163 function that compares two entries and returns 0 if they are
164 identical, nonzero otherwise; HASH returns a nonnegative hash value
165 for an entry; FREE destroys an entry. */
167 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
168 hsh_free_func *free, void *aux)
173 assert (compare != NULL);
174 assert (hash != NULL);
176 h = xmalloc (sizeof *h);
180 h->size = next_power_of_2 (size);
181 h->entries = xmalloc (sizeof *h->entries * h->size);
182 for (i = 0; i < h->size; i++)
183 h->entries[i] = NULL;
185 h->compare = compare;
189 h->hash_ordered = true;
194 /* Destroys the contents of table H. */
196 hsh_clear (struct hsh_table *h)
202 for (i = 0; i < h->size; i++)
203 if (h->entries[i] != NULL)
204 h->free (h->entries[i], h->aux);
206 for (i = 0; i < h->size; i++)
207 h->entries[i] = NULL;
212 h->hash_ordered = true;
216 /* Destroys table H and all its contents. */
218 hsh_destroy (struct hsh_table *h)
225 for (i = 0; i < h->size; i++)
226 if (h->entries[i] != NULL)
227 h->free (h->entries[i], h->aux);
233 /* Locates an entry matching TARGET. Returns a pointer to the
234 entry, or a null pointer on failure. */
235 static inline unsigned
236 locate_matching_entry (struct hsh_table *h, const void *target)
238 unsigned i = h->hash (target, h->aux);
240 assert (h->hash_ordered);
245 entry = h->entries[i];
246 if (entry == NULL || !h->compare (entry, target, h->aux))
252 /* Changes the capacity of H to NEW_SIZE, which must be a
253 positive power of 2 at least as large as the number of
256 rehash (struct hsh_table *h, size_t new_size)
258 void **begin, **end, **table_p;
262 assert (new_size >= h->used);
264 /* Verify that NEW_SIZE is a positive power of 2. */
265 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
268 end = begin + h->size;
271 h->entries = xmalloc (sizeof *h->entries * h->size);
272 for (i = 0; i < h->size; i++)
273 h->entries[i] = NULL;
274 for (table_p = begin; table_p < end; table_p++)
276 void *entry = *table_p;
278 h->entries[locate_matching_entry (h, entry)] = entry;
283 h->hash_ordered = true;
287 /* A "algo_predicate_func" that returns nonzero if DATA points
288 to a non-null void. */
290 not_null (const void *data_, void *aux UNUSED)
292 void *const *data = data_;
294 return *data != NULL;
297 /* Compacts hash table H and returns a pointer to its data. The
298 returned data consists of hsh_count(H) non-null pointers, in
299 no particular order, followed by a null pointer.
301 After calling this function, only hsh_destroy() and
302 hsh_count() should be applied to H. hsh_first() and
303 hsh_next() could also be used, but you're better off just
304 iterating through the returned array.
306 This function is intended for use in situations where data
307 processing occurs in two phases. In the first phase, data is
308 added, removed, and searched for within a hash table. In the
309 second phase, the contents of the hash table are output and
310 the hash property itself is no longer of interest.
312 Use hsh_sort() instead, if the second phase wants data in
313 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
314 if the second phase still needs to search the hash table. */
316 hsh_data (struct hsh_table *h)
321 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
322 assert (n == h->used);
324 h->hash_ordered = false;
329 /* Dereferences void ** pointers and passes them to the hash
330 comparison function. */
332 comparison_helper (const void *a_, const void *b_, void *h_)
336 struct hsh_table *h = h_;
341 return h->compare (*a, *b, h->aux);
344 /* Sorts hash table H based on hash comparison function. The
345 returned data consists of hsh_count(H) non-null pointers,
346 sorted in order of the hash comparison function, followed by a
349 After calling this function, only hsh_destroy() and
350 hsh_count() should be applied to H. hsh_first() and
351 hsh_next() could also be used, but you're better off just
352 iterating through the returned array.
354 This function is intended for use in situations where data
355 processing occurs in two phases. In the first phase, data is
356 added, removed, and searched for within a hash table. In the
357 second phase, the contents of the hash table are output and
358 the hash property itself is no longer of interest.
360 Use hsh_data() instead, if the second phase doesn't need the
361 data in any particular order. Use hsh_data_copy() or
362 hsh_sort_copy() instead, if the second phase still needs to
363 search the hash table. */
365 hsh_sort (struct hsh_table *h)
370 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
374 /* Makes and returns a copy of the pointers to the data in H.
375 The returned data consists of hsh_count(H) non-null pointers,
376 in no particular order, followed by a null pointer. The hash
377 table is not modified. The caller is responsible for freeing
380 If you don't need to search or modify the hash table, then
381 hsh_data() is a more efficient choice. */
383 hsh_data_copy (struct hsh_table *h)
388 copy = xmalloc ((h->used + 1) * sizeof *copy);
389 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
390 copy[h->used] = NULL;
394 /* Makes and returns a copy of the pointers to the data in H.
395 The returned data consists of hsh_count(H) non-null pointers,
396 sorted in order of the hash comparison function, followed by a
397 null pointer. The hash table is not modified. The caller is
398 responsible for freeing the allocated data.
400 If you don't need to search or modify the hash table, then
401 hsh_sort() is a more efficient choice. */
403 hsh_sort_copy (struct hsh_table *h)
408 copy = hsh_data_copy (h);
409 sort (copy, h->used, sizeof *copy, comparison_helper, h);
415 /* Searches hash table H for TARGET. If found, returns a pointer
416 to a pointer to that entry; otherwise returns a pointer to a
417 NULL entry which *must* be used to insert a new entry having
418 the same key data. */
420 hsh_probe (struct hsh_table *h, const void *target)
425 assert (target != NULL);
426 assert (h->hash_ordered);
428 if (h->used > h->size / 2)
429 rehash (h, h->size * 2);
430 i = locate_matching_entry (h, target);
431 if (h->entries[i] == NULL)
433 return &h->entries[i];
436 /* Searches hash table H for TARGET. If not found, inserts
437 TARGET and returns a null pointer. If found, returns the
438 match, without replacing it in the table. */
440 hsh_insert (struct hsh_table *h, void *target)
445 assert (target != NULL);
447 entry = hsh_probe (h, target);
457 /* Searches hash table H for TARGET. If not found, inserts
458 TARGET and returns a null pointer. If found, returns the
459 match, after replacing it in the table by TARGET. */
461 hsh_replace (struct hsh_table *h, void *target)
463 void **entry = hsh_probe (h, target);
469 /* Returns the entry in hash table H that matches TARGET, or NULL
472 hsh_find (struct hsh_table *h, const void *target)
474 return h->entries[locate_matching_entry (h, target)];
477 /* Deletes the entry in hash table H that matches TARGET.
478 Returns nonzero if an entry was deleted.
480 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
481 Because our load factor is at most 1/2, the average number of
482 moves that this algorithm makes should be at most 2 - ln 2 ~=
485 hsh_delete (struct hsh_table *h, const void *target)
487 unsigned i = locate_matching_entry (h, target);
488 if (h->entries[i] != NULL)
492 h->free (h->entries[i], h->aux);
499 h->entries[i] = NULL;
503 i = (i - 1) & (h->size - 1);
504 if (h->entries[i] == NULL)
507 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
509 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
510 h->entries[j] = h->entries[i];
519 /* Finds and returns an entry in TABLE, and initializes ITER for
520 use with hsh_next(). If TABLE is empty, returns a null
523 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
526 assert (iter != NULL);
529 return hsh_next (h, iter);
532 /* Iterates through TABLE with iterator ITER. Returns the next
533 entry in TABLE, or a null pointer after the last entry.
535 Entries are returned in an undefined order. Modifying TABLE
536 during iteration may cause some entries to be returned
537 multiple times or not at all. */
539 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
544 assert (iter != NULL);
545 assert (iter->next <= h->size);
547 for (i = iter->next; i < h->size; i++)
551 return h->entries[i];
554 iter->next = h->size;
558 /* Returns the number of items in H. */
560 hsh_count (struct hsh_table *h)
574 /* Displays contents of hash table H on stdout. */
576 hsh_dump (struct hsh_table *h)
578 void **entry = h->entries;
581 printf (_("hash table:"));
582 for (i = 0; i < h->size; i++)
583 printf (" %p", *entry++);
587 /* This wrapper around hsh_probe() assures that it returns a pointer
588 to a NULL pointer. This function is used when it is known that the
589 entry to be inserted does not already exist in the table. */
591 hsh_force_insert (struct hsh_table *h, void *p)
593 void **pp = hsh_probe (h, p);
594 assert (*pp == NULL);
598 /* This wrapper around hsh_find() assures that it returns non-NULL.
599 This function is for use when it is known that the entry being
600 searched for must exist in the table. */
602 hsh_force_find (struct hsh_table *h, const void *target)
604 void *found = hsh_find (h, target);
605 assert (found != NULL);
609 /* This wrapper for hsh_delete() verifies that an item really was
612 hsh_force_delete (struct hsh_table *h, const void *target)
614 int found = hsh_delete (h, target);