1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 #include "algorithm.h"
33 size_t used; /* Number of filled entries. */
34 size_t size; /* Number of entries (a power of 2). */
35 void **entries; /* Hash table proper. */
37 void *aux; /* Auxiliary data for comparison functions. */
38 hsh_compare_func *compare;
43 /* Note for constructing hash functions:
45 You can store the hash values in the records, then compare hash
46 values (in the compare function) before bothering to compare keys.
47 Hash values can simply be returned from the records instead of
48 recalculating when rehashing. */
52 Since hash_probe and hash_find take void * pointers, it's easy to
53 pass a void ** to your data by accidentally inserting an `&'
54 reference operator where one shouldn't go. It took me an hour to
55 hunt down a bug like that once. */
57 /* Prime numbers and hash functions. */
59 /* Returns smallest power of 2 greater than X. */
61 next_power_of_2 (size_t x)
67 /* Turn off rightmost 1-bit in x. */
68 size_t y = x & (x - 1);
70 /* If y is 0 then x only had a single 1-bit. */
74 /* Otherwise turn off the next. */
79 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
80 #define FNV_32_PRIME 16777619u
81 #define FNV_32_BASIS 2166136261u
83 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
85 hsh_hash_bytes (const void *buf_, size_t size)
87 const unsigned char *buf = buf_;
94 hash = (hash * FNV_32_PRIME) ^ *buf++;
99 /* Fowler-Noll-Vo 32-bit hash, for strings. */
101 hsh_hash_string (const char *s_)
103 const unsigned char *s = s_;
110 hash = (hash * FNV_32_PRIME) ^ *s++;
119 return hsh_hash_bytes (&i, sizeof i);
122 /* Hash for double. */
124 hsh_hash_double (double d)
127 return hsh_hash_bytes (&d, sizeof d);
134 /* Creates a hash table with at least M entries. COMPARE is a
135 function that compares two entries and returns 0 if they are
136 identical, nonzero otherwise; HASH returns a nonnegative hash value
137 for an entry; FREE destroys an entry. */
139 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
140 hsh_free_func *free, void *aux)
146 assert (compare != NULL);
147 assert (hash != NULL);
149 h = xmalloc (sizeof *h);
153 h->size = next_power_of_2 (size);
154 h->entries = xmalloc (sizeof *h->entries * h->size);
155 for (i = 0; i < h->size; i++)
156 h->entries[i] = NULL;
158 h->compare = compare;
164 /* Destroys the contents of table H. */
166 hsh_clear (struct hsh_table *h)
172 for (i = 0; i < h->size; i++)
173 if (h->entries[i] != NULL)
174 h->free (h->entries[i], h->aux);
176 for (i = 0; i < h->size; i++)
177 h->entries[i] = NULL;
180 /* Destroys table H and all its contents. */
182 hsh_destroy (struct hsh_table *h)
189 for (i = 0; i < h->size; i++)
190 if (h->entries[i] != NULL)
191 h->free (h->entries[i], h->aux);
197 /* Changes the capacity of H to NEW_SIZE. */
199 hsh_rehash (struct hsh_table *h, size_t new_size)
201 void **begin, **end, **table_p;
205 assert (new_size >= h->used);
208 end = begin + h->size;
211 h->entries = xmalloc (sizeof *h->entries * h->size);
212 for (i = 0; i < h->size; i++)
213 h->entries[i] = NULL;
214 for (table_p = begin; table_p < end; table_p++)
215 if (*table_p != NULL)
217 void **entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)];
218 while (*entry != NULL)
219 if (++entry >= h->entries + h->size)
226 /* A "algo_predicate_func" that returns nonzero if DATA points
227 to a non-null void. */
229 not_null (const void *data_, void *aux UNUSED)
231 void *const *data = data_;
233 return *data != NULL;
236 /* Compacts hash table H and returns a pointer to its data. The
237 returned data consists of hsh_count(H) non-null pointers, in
238 no particular order, followed by a null pointer. After
239 calling this function, only hsh_destroy() and hsh_count() may
242 hsh_data (struct hsh_table *h)
247 n = partition (h->entries, h->size, sizeof *h->entries,
249 assert (n == h->used);
253 /* Dereferences void ** pointers and passes them to the hash
254 comparison function. */
256 comparison_helper (const void *a_, const void *b_, void *h_)
260 struct hsh_table *h = h_;
262 return h->compare (*a, *b, h->aux);
265 /* Sorts hash table H based on hash comparison function. The
266 returned data consists of hsh_count(H) non-null pointers,
267 sorted in order of the hash comparison function, followed by a
268 null pointer. After calling this function, only hsh_destroy()
269 and hsh_count() may be applied to H. */
271 hsh_sort (struct hsh_table *h)
276 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
280 /* Makes and returns a copy of the pointers to the data in H.
281 The returned data consists of hsh_count(H) non-null pointers,
282 in no particular order, followed by a null pointer. The hash
283 table is not modified. The caller is responsible for freeing
284 the allocated data. */
286 hsh_data_copy (struct hsh_table *h)
291 copy = xmalloc ((h->used + 1) * sizeof *copy);
292 copy_if (h->entries, h->size, sizeof *h->entries, copy,
294 copy[h->used] = NULL;
298 /* Makes and returns a copy of the pointers to the data in H.
299 The returned data consists of hsh_count(H) non-null pointers,
300 sorted in order of the hash comparison function, followed by a
301 null pointer. The hash table is not modified. The caller is
302 responsible for freeing the allocated data. */
304 hsh_sort_copy (struct hsh_table *h)
309 copy = hsh_data_copy (h);
310 sort (copy, h->used, sizeof *copy, comparison_helper, h);
316 /* Searches hash table H for TARGET. If found, returns a pointer
317 to a pointer to that entry; otherwise returns a pointer to a
318 NULL entry which *must* be used to insert a new entry having
319 the same key data. */
321 hsh_probe (struct hsh_table *h, const void *target)
326 assert (target != NULL);
328 /* Order of these statements is important! */
329 if (h->used > h->size / 2)
330 hsh_rehash (h, h->size * 2);
331 entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
335 if (!h->compare (*entry, target, h->aux))
337 if (++entry >= h->entries + h->size)
344 /* Searches hash table H for TARGET. If not found, inserts
345 TARGET and returns a null pointer. If found, returns the
346 match, without replacing it in the table. */
348 hsh_insert (struct hsh_table *h, void *target)
353 assert (target != NULL);
355 entry = hsh_probe (h, target);
365 /* Searches hash table H for TARGET. If not found, inserts
366 TARGET and returns a null pointer. If found, returns the
367 match, after replacing it in the table by TARGET. */
369 hsh_replace (struct hsh_table *h, void *target)
371 void **entry = hsh_probe (h, target);
377 /* Locates an entry matching TARGET. Returns a pointer to the
378 entry, or a null pointer on failure. */
379 static inline void **
380 locate_matching_entry (struct hsh_table *h, const void *target)
382 void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
386 if (!h->compare (*entry, target, h->aux))
388 if (++entry >= h->entries + h->size)
394 /* Returns the entry in hash table H that matches TARGET, or NULL
397 hsh_find (struct hsh_table *h, const void *target)
399 void **entry = locate_matching_entry (h, target);
400 return entry != NULL ? *entry : NULL;
403 /* Deletes the entry in hash table H that matches TARGET.
404 Returns nonzero if an entry was deleted.
406 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
407 Because our load factor is at most 1/2, the average number of
408 moves that this algorithm makes should be at most 2 - ln 2 ~=
413 hsh_delete (struct hsh_table *h, const void *target)
415 void **entry = locate_matching_entry (h, target);
422 h->free (*entry, h->aux);
425 i = entry - h->entries;
435 if (h->entries[i] == NULL)
438 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
440 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
441 h->entries[i] = h->entries[j];
450 /* Finds and returns an entry in TABLE, and initializes ITER for
451 use with hsh_next(). If TABLE is empty, returns a null
454 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
457 assert (iter != NULL);
460 return hsh_next (h, iter);
463 /* Iterates through TABLE with iterator ITER. Returns the next
464 entry in TABLE, or a null pointer after the last entry.
466 Entries are returned in an undefined order. Modifying TABLE
467 during iteration may cause some entries to be returned
468 multiple times or not at all. */
470 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
475 assert (iter != NULL);
476 assert (iter->next <= h->size);
478 for (i = iter->next; i < h->size; i++)
482 return h->entries[i];
485 iter->next = h->size;
489 /* Returns the number of items in H. */
491 hsh_count (struct hsh_table *h)
505 /* Displays contents of hash table H on stdout. */
507 hsh_dump (struct hsh_table *h)
509 void **entry = h->entries;
512 printf (_("hash table:"));
513 for (i = 0; i < h->size; i++)
514 printf (" %p", *entry++);
518 /* This wrapper around hsh_probe() assures that it returns a pointer
519 to a NULL pointer. This function is used when it is known that the
520 entry to be inserted does not already exist in the table. */
522 hsh_force_insert (struct hsh_table *h, void *p)
524 void **pp = hsh_probe (h, p);
525 assert (*pp == NULL);
529 /* This wrapper around hsh_find() assures that it returns non-NULL.
530 This function is for use when it is known that the entry being
531 searched for must exist in the table. */
533 hsh_force_find (struct hsh_table *h, const void *target)
535 void *found = hsh_find (h, target);
536 assert (found != NULL);
540 /* This wrapper for hsh_delete() verifies that an item really was
543 hsh_force_delete (struct hsh_table *h, const void *target)
545 int found = hsh_delete (h, target);