1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 #include "algorithm.h"
33 size_t used; /* Number of filled entries. */
34 size_t size; /* Number of entries (a power of 2). */
35 void **entries; /* Hash table proper. */
37 void *aux; /* Auxiliary data for comparison functions. */
38 hsh_compare_func *compare;
43 /* Note for constructing hash functions:
45 You can store the hash values in the records, then compare hash
46 values (in the compare function) before bothering to compare keys.
47 Hash values can simply be returned from the records instead of
48 recalculating when rehashing. */
52 Since hash_probe and hash_find take void * pointers, it's easy to
53 pass a void ** to your data by accidentally inserting an `&'
54 reference operator where one shouldn't go. It took me an hour to
55 hunt down a bug like that once. */
57 /* Prime numbers and hash functions. */
59 /* Returns smallest power of 2 greater than X. */
61 next_power_of_2 (size_t x)
67 /* Turn off rightmost 1-bit in x. */
68 size_t y = x & (x - 1);
70 /* If y is 0 then x only had a single 1-bit. */
74 /* Otherwise turn off the next. */
79 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
80 #define FNV_32_PRIME 16777619u
81 #define FNV_32_BASIS 2166136261u
83 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
85 hsh_hash_bytes (const void *buf_, size_t size)
87 const unsigned char *buf = buf_;
94 hash = (hash * FNV_32_PRIME) ^ *buf++;
99 /* Fowler-Noll-Vo 32-bit hash, for strings. */
101 hsh_hash_string (const char *s_)
103 const unsigned char *s = s_;
110 hash = (hash * FNV_32_PRIME) ^ *s++;
119 return hsh_hash_bytes (&i, sizeof i);
122 /* Hash for double. */
124 hsh_hash_double (double d)
127 return hsh_hash_bytes (&d, sizeof d);
134 /* Creates a hash table with at least M entries. COMPARE is a
135 function that compares two entries and returns 0 if they are
136 identical, nonzero otherwise; HASH returns a nonnegative hash value
137 for an entry; FREE destroys an entry. */
139 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
140 hsh_free_func *free, void *aux)
148 assert (compare != NULL);
149 assert (hash != NULL);
151 h = xmalloc (sizeof *h);
155 h->size = next_power_of_2 (size);
156 h->entries = xmalloc (sizeof *h->entries * h->size);
157 for (i = 0; i < h->size; i++)
158 h->entries[i] = NULL;
160 h->compare = compare;
166 /* Destroys the contents of table H. */
168 hsh_clear (struct hsh_table *h)
174 for (i = 0; i < h->size; i++)
175 if (h->entries[i] != NULL)
176 h->free (h->entries[i], h->aux);
178 for (i = 0; i < h->size; i++)
179 h->entries[i] = NULL;
184 /* Destroys table H and all its contents. */
186 hsh_destroy (struct hsh_table *h)
193 for (i = 0; i < h->size; i++)
194 if (h->entries[i] != NULL)
195 h->free (h->entries[i], h->aux);
201 /* Changes the capacity of H to NEW_SIZE. */
203 hsh_rehash (struct hsh_table *h, size_t new_size)
205 void **begin, **end, **table_p;
209 assert (new_size >= h->used);
212 end = begin + h->size;
215 h->entries = xmalloc (sizeof *h->entries * h->size);
216 for (i = 0; i < h->size; i++)
217 h->entries[i] = NULL;
218 for (table_p = begin; table_p < end; table_p++)
219 if (*table_p != NULL)
221 void **entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)];
222 while (*entry != NULL)
223 if (++entry >= h->entries + h->size)
230 /* A "algo_predicate_func" that returns nonzero if DATA points
231 to a non-null void. */
233 not_null (const void *data_, void *aux UNUSED)
235 void *const *data = data_;
237 return *data != NULL;
240 /* Compacts hash table H and returns a pointer to its data. The
241 returned data consists of hsh_count(H) non-null pointers, in
242 no particular order, followed by a null pointer. After
243 calling this function, only hsh_destroy() and hsh_count() may
246 hsh_data (struct hsh_table *h)
251 n = partition (h->entries, h->size, sizeof *h->entries,
253 assert (n == h->used);
257 /* Dereferences void ** pointers and passes them to the hash
258 comparison function. */
260 comparison_helper (const void *a_, const void *b_, void *h_)
264 struct hsh_table *h = h_;
269 return h->compare (*a, *b, h->aux);
272 /* Sorts hash table H based on hash comparison function. The
273 returned data consists of hsh_count(H) non-null pointers,
274 sorted in order of the hash comparison function, followed by a
275 null pointer. After calling this function, only hsh_destroy()
276 and hsh_count() may be applied to H. */
278 hsh_sort (struct hsh_table *h)
283 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
287 /* Makes and returns a copy of the pointers to the data in H.
288 The returned data consists of hsh_count(H) non-null pointers,
289 in no particular order, followed by a null pointer. The hash
290 table is not modified. The caller is responsible for freeing
291 the allocated data. */
293 hsh_data_copy (struct hsh_table *h)
298 copy = xmalloc ((h->used + 1) * sizeof *copy);
299 copy_if (h->entries, h->size, sizeof *h->entries, copy,
301 copy[h->used] = NULL;
305 /* Makes and returns a copy of the pointers to the data in H.
306 The returned data consists of hsh_count(H) non-null pointers,
307 sorted in order of the hash comparison function, followed by a
308 null pointer. The hash table is not modified. The caller is
309 responsible for freeing the allocated data. */
311 hsh_sort_copy (struct hsh_table *h)
316 copy = hsh_data_copy (h);
317 sort (copy, h->used, sizeof *copy, comparison_helper, h);
323 /* Searches hash table H for TARGET. If found, returns a pointer
324 to a pointer to that entry; otherwise returns a pointer to a
325 NULL entry which *must* be used to insert a new entry having
326 the same key data. */
328 hsh_probe (struct hsh_table *h, const void *target)
333 assert (target != NULL);
335 /* Order of these statements is important! */
336 if (h->used > h->size / 2)
337 hsh_rehash (h, h->size * 2);
338 entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
342 if (!h->compare (*entry, target, h->aux))
344 if (++entry >= h->entries + h->size)
351 /* Searches hash table H for TARGET. If not found, inserts
352 TARGET and returns a null pointer. If found, returns the
353 match, without replacing it in the table. */
355 hsh_insert (struct hsh_table *h, void *target)
360 assert (target != NULL);
362 entry = hsh_probe (h, target);
372 /* Searches hash table H for TARGET. If not found, inserts
373 TARGET and returns a null pointer. If found, returns the
374 match, after replacing it in the table by TARGET. */
376 hsh_replace (struct hsh_table *h, void *target)
378 void **entry = hsh_probe (h, target);
384 /* Locates an entry matching TARGET. Returns a pointer to the
385 entry, or a null pointer on failure. */
386 static inline void **
387 locate_matching_entry (struct hsh_table *h, const void *target)
389 void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
393 if (!h->compare (*entry, target, h->aux))
395 if (++entry >= h->entries + h->size)
401 /* Returns the entry in hash table H that matches TARGET, or NULL
404 hsh_find (struct hsh_table *h, const void *target)
406 void **entry = locate_matching_entry (h, target);
407 return entry != NULL ? *entry : NULL;
410 /* Deletes the entry in hash table H that matches TARGET.
411 Returns nonzero if an entry was deleted.
413 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
414 Because our load factor is at most 1/2, the average number of
415 moves that this algorithm makes should be at most 2 - ln 2 ~=
420 hsh_delete (struct hsh_table *h, const void *target)
422 void **entry = locate_matching_entry (h, target);
429 h->free (*entry, h->aux);
432 i = entry - h->entries;
442 if (h->entries[i] == NULL)
445 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
447 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
448 h->entries[i] = h->entries[j];
457 /* Finds and returns an entry in TABLE, and initializes ITER for
458 use with hsh_next(). If TABLE is empty, returns a null
461 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
464 assert (iter != NULL);
467 return hsh_next (h, iter);
470 /* Iterates through TABLE with iterator ITER. Returns the next
471 entry in TABLE, or a null pointer after the last entry.
473 Entries are returned in an undefined order. Modifying TABLE
474 during iteration may cause some entries to be returned
475 multiple times or not at all. */
477 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
482 assert (iter != NULL);
483 assert (iter->next <= h->size);
485 for (i = iter->next; i < h->size; i++)
489 return h->entries[i];
492 iter->next = h->size;
496 /* Returns the number of items in H. */
498 hsh_count (struct hsh_table *h)
512 /* Displays contents of hash table H on stdout. */
514 hsh_dump (struct hsh_table *h)
516 void **entry = h->entries;
519 printf (_("hash table:"));
520 for (i = 0; i < h->size; i++)
521 printf (" %p", *entry++);
525 /* This wrapper around hsh_probe() assures that it returns a pointer
526 to a NULL pointer. This function is used when it is known that the
527 entry to be inserted does not already exist in the table. */
529 hsh_force_insert (struct hsh_table *h, void *p)
531 void **pp = hsh_probe (h, p);
532 assert (*pp == NULL);
536 /* This wrapper around hsh_find() assures that it returns non-NULL.
537 This function is for use when it is known that the entry being
538 searched for must exist in the table. */
540 hsh_force_find (struct hsh_table *h, const void *target)
542 void *found = hsh_find (h, target);
543 assert (found != NULL);
547 /* This wrapper for hsh_delete() verifies that an item really was
550 hsh_force_delete (struct hsh_table *h, const void *target)
552 int found = hsh_delete (h, target);