1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "algorithm.h"
31 /* Note for constructing hash functions:
33 You can store the hash values in the records, then compare hash
34 values (in the compare function) before bothering to compare keys.
35 Hash values can simply be returned from the records instead of
36 recalculating when rehashing. */
40 Since hash_probe and hash_find take void * pointers, it's easy to
41 pass a void ** to your data by accidentally inserting an `&'
42 reference operator where one shouldn't go. It took me an hour to
43 hunt down a bug like that once. */
45 /* Prime numbers and hash functions. */
47 /* Returns smallest power of 2 greater than X. */
49 next_power_of_2 (size_t x)
55 /* Turn off rightmost 1-bit in x. */
56 size_t y = x & (x - 1);
58 /* If y is 0 then x only had a single 1-bit. */
62 /* Otherwise turn off the next. */
67 /* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
68 #define FNV_32_PRIME 16777619u
69 #define FNV_32_BASIS 2166136261u
71 /* Fowler-Noll-Vo 32-bit hash, for bytes. */
73 hsh_hash_bytes (const void *buf_, size_t size)
75 const unsigned char *buf = buf_;
82 hash = (hash * FNV_32_PRIME) ^ *buf++;
87 /* Fowler-Noll-Vo 32-bit hash, for strings. */
89 hsh_hash_string (const char *s_)
91 const unsigned char *s = s_;
98 hash = (hash * FNV_32_PRIME) ^ *s++;
103 /* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
105 hsh_hash_case_string (const char *s_)
107 const unsigned char *s = s_;
114 hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
123 return hsh_hash_bytes (&i, sizeof i);
126 /* Hash for double. */
128 hsh_hash_double (double d)
131 return hsh_hash_bytes (&d, sizeof d);
141 size_t used; /* Number of filled entries. */
142 size_t size; /* Number of entries (a power of 2). */
143 void **entries; /* Hash table proper. */
145 void *aux; /* Auxiliary data for comparison functions. */
146 hsh_compare_func *compare;
151 /* Creates a hash table with at least M entries. COMPARE is a
152 function that compares two entries and returns 0 if they are
153 identical, nonzero otherwise; HASH returns a nonnegative hash value
154 for an entry; FREE destroys an entry. */
156 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
157 hsh_free_func *free, void *aux)
165 assert (compare != NULL);
166 assert (hash != NULL);
168 h = xmalloc (sizeof *h);
172 h->size = next_power_of_2 (size);
173 h->entries = xmalloc (sizeof *h->entries * h->size);
174 for (i = 0; i < h->size; i++)
175 h->entries[i] = NULL;
177 h->compare = compare;
183 /* Destroys the contents of table H. */
185 hsh_clear (struct hsh_table *h)
191 for (i = 0; i < h->size; i++)
192 if (h->entries[i] != NULL)
193 h->free (h->entries[i], h->aux);
195 for (i = 0; i < h->size; i++)
196 h->entries[i] = NULL;
201 /* Destroys table H and all its contents. */
203 hsh_destroy (struct hsh_table *h)
210 for (i = 0; i < h->size; i++)
211 if (h->entries[i] != NULL)
212 h->free (h->entries[i], h->aux);
218 /* Locates an entry matching TARGET. Returns a pointer to the
219 entry, or a null pointer on failure. */
220 static inline unsigned
221 locate_matching_entry (struct hsh_table *h, const void *target)
223 unsigned i = h->hash (target, h->aux);
229 entry = h->entries[i];
230 if (entry == NULL || !h->compare (entry, target, h->aux))
236 /* Changes the capacity of H to NEW_SIZE. */
238 hsh_rehash (struct hsh_table *h, size_t new_size)
240 void **begin, **end, **table_p;
244 assert (new_size >= h->used);
247 end = begin + h->size;
250 h->entries = xmalloc (sizeof *h->entries * h->size);
251 for (i = 0; i < h->size; i++)
252 h->entries[i] = NULL;
253 for (table_p = begin; table_p < end; table_p++)
255 void *entry = *table_p;
257 h->entries[locate_matching_entry (h, entry)] = entry;
262 /* A "algo_predicate_func" that returns nonzero if DATA points
263 to a non-null void. */
265 not_null (const void *data_, void *aux UNUSED)
267 void *const *data = data_;
269 return *data != NULL;
272 /* Compacts hash table H and returns a pointer to its data. The
273 returned data consists of hsh_count(H) non-null pointers, in
274 no particular order, followed by a null pointer. After
275 calling this function, only hsh_destroy() and hsh_count() may
278 hsh_data (struct hsh_table *h)
283 n = partition (h->entries, h->size, sizeof *h->entries,
285 assert (n == h->used);
289 /* Dereferences void ** pointers and passes them to the hash
290 comparison function. */
292 comparison_helper (const void *a_, const void *b_, void *h_)
296 struct hsh_table *h = h_;
301 return h->compare (*a, *b, h->aux);
304 /* Sorts hash table H based on hash comparison function. The
305 returned data consists of hsh_count(H) non-null pointers,
306 sorted in order of the hash comparison function, followed by a
307 null pointer. After calling this function, only hsh_destroy()
308 and hsh_count() may be applied to H. */
310 hsh_sort (struct hsh_table *h)
315 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
319 /* Makes and returns a copy of the pointers to the data in H.
320 The returned data consists of hsh_count(H) non-null pointers,
321 in no particular order, followed by a null pointer. The hash
322 table is not modified. The caller is responsible for freeing
323 the allocated data. */
325 hsh_data_copy (struct hsh_table *h)
330 copy = xmalloc ((h->used + 1) * sizeof *copy);
331 copy_if (h->entries, h->size, sizeof *h->entries, copy,
333 copy[h->used] = NULL;
337 /* Makes and returns a copy of the pointers to the data in H.
338 The returned data consists of hsh_count(H) non-null pointers,
339 sorted in order of the hash comparison function, followed by a
340 null pointer. The hash table is not modified. The caller is
341 responsible for freeing the allocated data. */
343 hsh_sort_copy (struct hsh_table *h)
348 copy = hsh_data_copy (h);
349 sort (copy, h->used, sizeof *copy, comparison_helper, h);
355 /* Searches hash table H for TARGET. If found, returns a pointer
356 to a pointer to that entry; otherwise returns a pointer to a
357 NULL entry which *must* be used to insert a new entry having
358 the same key data. */
360 hsh_probe (struct hsh_table *h, const void *target)
365 assert (target != NULL);
367 if (h->used > h->size / 2)
368 hsh_rehash (h, h->size * 2);
369 i = locate_matching_entry (h, target);
370 if (h->entries[i] == NULL)
372 return &h->entries[i];
375 /* Searches hash table H for TARGET. If not found, inserts
376 TARGET and returns a null pointer. If found, returns the
377 match, without replacing it in the table. */
379 hsh_insert (struct hsh_table *h, void *target)
384 assert (target != NULL);
386 entry = hsh_probe (h, target);
396 /* Searches hash table H for TARGET. If not found, inserts
397 TARGET and returns a null pointer. If found, returns the
398 match, after replacing it in the table by TARGET. */
400 hsh_replace (struct hsh_table *h, void *target)
402 void **entry = hsh_probe (h, target);
408 /* Returns the entry in hash table H that matches TARGET, or NULL
411 hsh_find (struct hsh_table *h, const void *target)
413 return h->entries[locate_matching_entry (h, target)];
416 /* Deletes the entry in hash table H that matches TARGET.
417 Returns nonzero if an entry was deleted.
419 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
420 Because our load factor is at most 1/2, the average number of
421 moves that this algorithm makes should be at most 2 - ln 2 ~=
424 hsh_delete (struct hsh_table *h, const void *target)
426 unsigned i = locate_matching_entry (h, target);
427 if (h->entries[i] != NULL)
431 h->free (h->entries[i], h->aux);
438 h->entries[i] = NULL;
442 i = (i - 1) & (h->size - 1);
443 if (h->entries[i] == NULL)
446 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
448 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
449 h->entries[j] = h->entries[i];
458 /* Finds and returns an entry in TABLE, and initializes ITER for
459 use with hsh_next(). If TABLE is empty, returns a null
462 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
465 assert (iter != NULL);
468 return hsh_next (h, iter);
471 /* Iterates through TABLE with iterator ITER. Returns the next
472 entry in TABLE, or a null pointer after the last entry.
474 Entries are returned in an undefined order. Modifying TABLE
475 during iteration may cause some entries to be returned
476 multiple times or not at all. */
478 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
483 assert (iter != NULL);
484 assert (iter->next <= h->size);
486 for (i = iter->next; i < h->size; i++)
490 return h->entries[i];
493 iter->next = h->size;
497 /* Returns the number of items in H. */
499 hsh_count (struct hsh_table *h)
513 /* Displays contents of hash table H on stdout. */
515 hsh_dump (struct hsh_table *h)
517 void **entry = h->entries;
520 printf (_("hash table:"));
521 for (i = 0; i < h->size; i++)
522 printf (" %p", *entry++);
526 /* This wrapper around hsh_probe() assures that it returns a pointer
527 to a NULL pointer. This function is used when it is known that the
528 entry to be inserted does not already exist in the table. */
530 hsh_force_insert (struct hsh_table *h, void *p)
532 void **pp = hsh_probe (h, p);
533 assert (*pp == NULL);
537 /* This wrapper around hsh_find() assures that it returns non-NULL.
538 This function is for use when it is known that the entry being
539 searched for must exist in the table. */
541 hsh_force_find (struct hsh_table *h, const void *target)
543 void *found = hsh_find (h, target);
544 assert (found != NULL);
548 /* This wrapper for hsh_delete() verifies that an item really was
551 hsh_force_delete (struct hsh_table *h, const void *target)
553 int found = hsh_delete (h, target);