1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 #include "algorithm.h"
33 size_t used; /* Number of filled entries. */
34 size_t size; /* Number of entries (a power of 2). */
35 void **entries; /* Hash table proper. */
37 void *aux; /* Auxiliary data for comparison functions. */
38 hsh_compare_func *compare;
43 /* Note for constructing hash functions:
45 You can store the hash values in the records, then compare hash
46 values (in the compare function) before bothering to compare keys.
47 Hash values can simply be returned from the records instead of
48 recalculating when rehashing. */
52 Since hash_probe and hash_find take void * pointers, it's easy to
53 pass a void ** to your data by accidentally inserting an `&'
54 reference operator where one shouldn't go. It took me an hour to
55 hunt down a bug like that once. */
57 /* Prime numbers and hash functions. */
59 /* Returns smallest power of 2 greater than X. */
61 next_power_of_2 (size_t x)
67 /* Turn off rightmost 1-bit in x. */
68 size_t y = x & (x - 1);
70 /* If y is 0 then x only had a single 1-bit. */
74 /* Otherwise turn off the next. */
79 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
80 #define FNV_32_PRIME 16777619u
81 #define FNV_32_BASIS 2166136261u
83 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
85 hsh_hash_bytes (const void *buf_, size_t size)
87 const unsigned char *buf = buf_;
94 hash = (hash * FNV_32_PRIME) ^ *buf++;
99 /* Fowler-Noll-Vo 32-bit hash, for strings. */
101 hsh_hash_string (const char *s_)
103 const unsigned char *s = s_;
110 hash = (hash * FNV_32_PRIME) ^ *s++;
119 return hsh_hash_bytes (&i, sizeof i);
122 /* Hash for double. */
124 hsh_hash_double (double d)
127 return hsh_hash_bytes (&d, sizeof d);
134 /* Creates a hash table with at least M entries. COMPARE is a
135 function that compares two entries and returns 0 if they are
136 identical, nonzero otherwise; HASH returns a nonnegative hash value
137 for an entry; FREE destroys an entry. */
139 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
140 hsh_free_func *free, void *aux)
146 assert (compare != NULL);
147 assert (hash != NULL);
149 h = xmalloc (sizeof *h);
153 h->size = next_power_of_2 (size);
154 h->entries = xmalloc (sizeof *h->entries * h->size);
155 for (i = 0; i < h->size; i++)
156 h->entries[i] = NULL;
158 h->compare = compare;
164 /* Destroys the contents of table H. */
166 hsh_clear (struct hsh_table *h)
172 for (i = 0; i < h->size; i++)
173 if (h->entries[i] != NULL)
174 h->free (h->entries[i], h->aux);
176 for (i = 0; i < h->size; i++)
177 h->entries[i] = NULL;
182 /* Destroys table H and all its contents. */
184 hsh_destroy (struct hsh_table *h)
191 for (i = 0; i < h->size; i++)
192 if (h->entries[i] != NULL)
193 h->free (h->entries[i], h->aux);
199 /* Changes the capacity of H to NEW_SIZE. */
201 hsh_rehash (struct hsh_table *h, size_t new_size)
203 void **begin, **end, **table_p;
207 assert (new_size >= h->used);
210 end = begin + h->size;
213 h->entries = xmalloc (sizeof *h->entries * h->size);
214 for (i = 0; i < h->size; i++)
215 h->entries[i] = NULL;
216 for (table_p = begin; table_p < end; table_p++)
217 if (*table_p != NULL)
219 void **entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)];
220 while (*entry != NULL)
221 if (++entry >= h->entries + h->size)
228 /* A "algo_predicate_func" that returns nonzero if DATA points
229 to a non-null void. */
231 not_null (const void *data_, void *aux UNUSED)
233 void *const *data = data_;
235 return *data != NULL;
238 /* Compacts hash table H and returns a pointer to its data. The
239 returned data consists of hsh_count(H) non-null pointers, in
240 no particular order, followed by a null pointer. After
241 calling this function, only hsh_destroy() and hsh_count() may
244 hsh_data (struct hsh_table *h)
249 n = partition (h->entries, h->size, sizeof *h->entries,
251 assert (n == h->used);
255 /* Dereferences void ** pointers and passes them to the hash
256 comparison function. */
258 comparison_helper (const void *a_, const void *b_, void *h_)
262 struct hsh_table *h = h_;
267 return h->compare (*a, *b, h->aux);
270 /* Sorts hash table H based on hash comparison function. The
271 returned data consists of hsh_count(H) non-null pointers,
272 sorted in order of the hash comparison function, followed by a
273 null pointer. After calling this function, only hsh_destroy()
274 and hsh_count() may be applied to H. */
276 hsh_sort (struct hsh_table *h)
281 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
285 /* Makes and returns a copy of the pointers to the data in H.
286 The returned data consists of hsh_count(H) non-null pointers,
287 in no particular order, followed by a null pointer. The hash
288 table is not modified. The caller is responsible for freeing
289 the allocated data. */
291 hsh_data_copy (struct hsh_table *h)
296 copy = xmalloc ((h->used + 1) * sizeof *copy);
297 copy_if (h->entries, h->size, sizeof *h->entries, copy,
299 copy[h->used] = NULL;
303 /* Makes and returns a copy of the pointers to the data in H.
304 The returned data consists of hsh_count(H) non-null pointers,
305 sorted in order of the hash comparison function, followed by a
306 null pointer. The hash table is not modified. The caller is
307 responsible for freeing the allocated data. */
309 hsh_sort_copy (struct hsh_table *h)
314 copy = hsh_data_copy (h);
315 sort (copy, h->used, sizeof *copy, comparison_helper, h);
321 /* Searches hash table H for TARGET. If found, returns a pointer
322 to a pointer to that entry; otherwise returns a pointer to a
323 NULL entry which *must* be used to insert a new entry having
324 the same key data. */
326 hsh_probe (struct hsh_table *h, const void *target)
331 assert (target != NULL);
333 /* Order of these statements is important! */
334 if (h->used > h->size / 2)
335 hsh_rehash (h, h->size * 2);
336 entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
340 if (!h->compare (*entry, target, h->aux))
342 if (++entry >= h->entries + h->size)
349 /* Searches hash table H for TARGET. If not found, inserts
350 TARGET and returns a null pointer. If found, returns the
351 match, without replacing it in the table. */
353 hsh_insert (struct hsh_table *h, void *target)
358 assert (target != NULL);
360 entry = hsh_probe (h, target);
370 /* Searches hash table H for TARGET. If not found, inserts
371 TARGET and returns a null pointer. If found, returns the
372 match, after replacing it in the table by TARGET. */
374 hsh_replace (struct hsh_table *h, void *target)
376 void **entry = hsh_probe (h, target);
382 /* Locates an entry matching TARGET. Returns a pointer to the
383 entry, or a null pointer on failure. */
384 static inline void **
385 locate_matching_entry (struct hsh_table *h, const void *target)
387 void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
391 if (!h->compare (*entry, target, h->aux))
393 if (++entry >= h->entries + h->size)
399 /* Returns the entry in hash table H that matches TARGET, or NULL
402 hsh_find (struct hsh_table *h, const void *target)
404 void **entry = locate_matching_entry (h, target);
405 return entry != NULL ? *entry : NULL;
408 /* Deletes the entry in hash table H that matches TARGET.
409 Returns nonzero if an entry was deleted.
411 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
412 Because our load factor is at most 1/2, the average number of
413 moves that this algorithm makes should be at most 2 - ln 2 ~=
418 hsh_delete (struct hsh_table *h, const void *target)
420 void **entry = locate_matching_entry (h, target);
427 h->free (*entry, h->aux);
430 i = entry - h->entries;
440 if (h->entries[i] == NULL)
443 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
445 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
446 h->entries[i] = h->entries[j];
455 /* Finds and returns an entry in TABLE, and initializes ITER for
456 use with hsh_next(). If TABLE is empty, returns a null
459 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
462 assert (iter != NULL);
465 return hsh_next (h, iter);
468 /* Iterates through TABLE with iterator ITER. Returns the next
469 entry in TABLE, or a null pointer after the last entry.
471 Entries are returned in an undefined order. Modifying TABLE
472 during iteration may cause some entries to be returned
473 multiple times or not at all. */
475 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
480 assert (iter != NULL);
481 assert (iter->next <= h->size);
483 for (i = iter->next; i < h->size; i++)
487 return h->entries[i];
490 iter->next = h->size;
494 /* Returns the number of items in H. */
496 hsh_count (struct hsh_table *h)
510 /* Displays contents of hash table H on stdout. */
512 hsh_dump (struct hsh_table *h)
514 void **entry = h->entries;
517 printf (_("hash table:"));
518 for (i = 0; i < h->size; i++)
519 printf (" %p", *entry++);
523 /* This wrapper around hsh_probe() assures that it returns a pointer
524 to a NULL pointer. This function is used when it is known that the
525 entry to be inserted does not already exist in the table. */
527 hsh_force_insert (struct hsh_table *h, void *p)
529 void **pp = hsh_probe (h, p);
530 assert (*pp == NULL);
534 /* This wrapper around hsh_find() assures that it returns non-NULL.
535 This function is for use when it is known that the entry being
536 searched for must exist in the table. */
538 hsh_force_find (struct hsh_table *h, const void *target)
540 void *found = hsh_find (h, target);
541 assert (found != NULL);
545 /* This wrapper for hsh_delete() verifies that an item really was
548 hsh_force_delete (struct hsh_table *h, const void *target)
550 int found = hsh_delete (h, target);