1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 #include "algorithm.h"
33 size_t used; /* Number of filled entries. */
34 size_t size; /* Number of entries (a power of 2). */
35 void **entries; /* Hash table proper. */
37 void *aux; /* Auxiliary data for comparison functions. */
38 hsh_compare_func *compare;
43 /* Note for constructing hash functions:
45 You can store the hash values in the records, then compare hash
46 values (in the compare function) before bothering to compare keys.
47 Hash values can simply be returned from the records instead of
48 recalculating when rehashing. */
52 Since hash_probe and hash_find take void * pointers, it's easy to
53 pass a void ** to your data by accidentally inserting an `&'
54 reference operator where one shouldn't go. It took me an hour to
55 hunt down a bug like that once. */
57 /* Prime numbers and hash functions. */
59 /* Returns smallest power of 2 greater than X. */
61 next_power_of_2 (size_t x)
67 /* Turn off rightmost 1-bit in x. */
68 size_t y = x & (x - 1);
70 /* If y is 0 then x only had a single 1-bit. */
74 /* Otherwise turn off the next. */
79 /* Colin Plumb's "one-at-a-time" hash, for bytes. */
81 hsh_hash_bytes (const void *buf_, size_t size)
83 const unsigned char *buf = buf_;
99 /* Colin Plumb's "one-at-a-time" hash, for strings. */
101 hsh_hash_string (const char *s_)
103 const unsigned char *s = s_;
110 hash += (hash << 10);
114 hash ^= (hash >> 11);
115 hash += (hash << 15);
123 return hsh_hash_bytes (&i, sizeof i);
126 /* Hash for double. */
128 hsh_hash_double (double d)
131 return hsh_hash_bytes (&d, sizeof d);
138 /* Creates a hash table with at least M entries. COMPARE is a
139 function that compares two entries and returns 0 if they are
140 identical, nonzero otherwise; HASH returns a nonnegative hash value
141 for an entry; FREE destroys an entry. */
143 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
144 hsh_free_func *free, void *aux)
150 assert (compare != NULL);
151 assert (hash != NULL);
153 h = xmalloc (sizeof *h);
157 h->size = next_power_of_2 (size);
158 h->entries = xmalloc (sizeof *h->entries * h->size);
159 for (i = 0; i < h->size; i++)
160 h->entries[i] = NULL;
162 h->compare = compare;
168 /* Destroys the contents of table H. */
170 hsh_clear (struct hsh_table *h)
176 for (i = 0; i < h->size; i++)
177 if (h->entries[i] != NULL)
178 h->free (h->entries[i], h->aux);
180 for (i = 0; i < h->size; i++)
181 h->entries[i] = NULL;
184 /* Destroys table H and all its contents. */
186 hsh_destroy (struct hsh_table *h)
193 for (i = 0; i < h->size; i++)
194 if (h->entries[i] != NULL)
195 h->free (h->entries[i], h->aux);
201 /* Changes the capacity of H to NEW_SIZE. */
203 hsh_rehash (struct hsh_table *h, size_t new_size)
205 void **begin, **end, **table_p;
209 assert (new_size >= h->used);
212 end = begin + h->size;
215 h->entries = xmalloc (sizeof *h->entries * h->size);
216 for (i = 0; i < h->size; i++)
217 h->entries[i] = NULL;
218 for (table_p = begin; table_p < end; table_p++)
222 if (*table_p == NULL)
224 entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)];
226 if (--entry < h->entries)
227 entry = &h->entries[h->size - 1];
233 /* A "algo_predicate_func" that returns nonzero if DATA points
234 to a non-null void. */
236 not_null (const void *data_, void *aux unused)
238 void *const *data = data_;
240 return *data != NULL;
243 /* Compacts hash table H and returns a pointer to its data. The
244 returned data consists of hsh_count(H) non-null pointers, in
245 no particular order, followed by a null pointer. After
246 calling this function, only hsh_destroy() and hsh_count() may
249 hsh_data (struct hsh_table *h)
254 n = partition (h->entries, h->size, sizeof *h->entries,
256 assert (n == h->used);
260 /* Dereferences void ** pointers and passes them to the hash
261 comparison function. */
263 comparison_helper (const void *a_, const void *b_, void *h_)
267 struct hsh_table *h = h_;
269 return h->compare (*a, *b, h->aux);
272 /* Sorts hash table H based on hash comparison function. The
273 returned data consists of hsh_count(H) non-null pointers,
274 sorted in order of the hash comparison function, followed by a
275 null pointer. After calling this function, only hsh_destroy()
276 and hsh_count() may be applied to H. */
278 hsh_sort (struct hsh_table *h)
283 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
287 /* Makes and returns a copy of the pointers to the data in H.
288 The returned data consists of hsh_count(H) non-null pointers,
289 in no particular order, followed by a null pointer. The hash
290 table is not modified. The caller is responsible for freeing
291 the allocated data. */
293 hsh_data_copy (struct hsh_table *h)
298 copy = xmalloc ((h->used + 1) * sizeof *copy);
299 copy_if (h->entries, h->size, sizeof *h->entries, copy,
301 copy[h->used] = NULL;
305 /* Makes and returns a copy of the pointers to the data in H.
306 The returned data consists of hsh_count(H) non-null pointers,
307 sorted in order of the hash comparison function, followed by a
308 null pointer. The hash table is not modified. The caller is
309 responsible for freeing the allocated data. */
311 hsh_sort_copy (struct hsh_table *h)
316 copy = hsh_data_copy (h);
317 sort (copy, h->used, sizeof *copy, comparison_helper, h);
323 /* Searches hash table H for TARGET. If found, returns a pointer
324 to a pointer to that entry; otherwise returns a pointer to a
325 NULL entry which *must* be used to insert a new entry having
326 the same key data. */
328 hsh_probe (struct hsh_table *h, const void *target)
333 assert (target != NULL);
335 /* Order of these statements is important! */
336 if (h->used > h->size / 2)
337 hsh_rehash (h, h->size * 2);
338 entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
342 if (!h->compare (*entry, target, h->aux))
344 if (--entry < h->entries)
345 entry = &h->entries[h->size - 1];
351 /* Searches hash table H for TARGET. If not found, inserts
352 TARGET and returns a null pointer. If found, returns the
353 match, without replacing it in the table. */
355 hsh_insert (struct hsh_table *h, void *target)
360 assert (target != NULL);
362 entry = hsh_probe (h, target);
372 /* Searches hash table H for TARGET. If not found, inserts
373 TARGET and returns a null pointer. If found, returns the
374 match, after replacing it in the table by TARGET. */
376 hsh_replace (struct hsh_table *h, void *target)
378 void **entry = hsh_probe (h, target);
384 /* Locates an entry matching TARGET. Returns a pointer to the
385 entry, or a null pointer on failure. */
386 static inline void **
387 locate_matching_entry (struct hsh_table *h, const void *target)
389 void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
393 if (!h->compare (*entry, target, h->aux))
395 if (--entry < h->entries)
396 entry = &h->entries[h->size - 1];
401 /* Returns the entry in hash table H that matches TARGET, or NULL
404 hsh_find (struct hsh_table *h, const void *target)
406 void **entry = locate_matching_entry (h, target);
407 return entry != NULL ? *entry : NULL;
410 /* Deletes the entry in hash table H that matches TARGET.
411 Returns nonzero if an entry was deleted.
413 Note: this function is very slow because it rehashes the
414 entire table. Don't use this hash table implementation if
415 deletion is a common operation. */
417 hsh_delete (struct hsh_table *h, const void *target)
419 void **entry = locate_matching_entry (h, target);
423 h->free (*entry, h->aux);
425 hsh_rehash (h, h->size);
434 /* Finds and returns an entry in TABLE, and initializes ITER for
435 use with hsh_next(). If TABLE is empty, returns a null
438 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
441 assert (iter != NULL);
444 return hsh_next (h, iter);
447 /* Iterates through TABLE with iterator ITER. Returns the next
448 entry in TABLE, or a null pointer after the last entry.
450 Entries are returned in an undefined order. Modifying TABLE
451 during iteration may cause some entries to be returned
452 multiple times or not at all. */
454 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
459 assert (iter != NULL);
460 assert (iter->next <= h->size);
462 for (i = iter->next; i < h->size; i++)
466 return h->entries[i];
469 iter->next = h->size;
473 /* Returns the number of items in H. */
475 hsh_count (struct hsh_table *h)
489 /* Displays contents of hash table H on stdout. */
491 hsh_dump (struct hsh_table *h)
493 void **entry = h->entries;
496 printf (_("hash table:"));
497 for (i = 0; i < h->size; i++)
498 printf (" %p", *entry++);
502 /* This wrapper around hsh_probe() assures that it returns a pointer
503 to a NULL pointer. This function is used when it is known that the
504 entry to be inserted does not already exist in the table. */
506 hsh_force_insert (struct hsh_table *h, void *p)
508 void **pp = hsh_probe (h, p);
509 assert (*pp == NULL);
513 /* This wrapper around hsh_find() assures that it returns non-NULL.
514 This function is for use when it is known that the entry being
515 searched for must exist in the table. */
517 hsh_force_find (struct hsh_table *h, const void *target)
519 void *found = hsh_find (h, target);
520 assert (found != NULL);
524 /* This wrapper for hsh_delete() verifies that an item really was
527 hsh_force_delete (struct hsh_table *h, const void *target)
529 int found = hsh_delete (h, target);