1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
34 #define _(msgid) gettext (msgid)
36 /* Note for constructing hash functions:
38 You can store the hash values in the records, then compare hash
39 values (in the compare function) before bothering to compare keys.
40 Hash values can simply be returned from the records instead of
41 recalculating when rehashing. */
45 Since hash_probe and hash_find take void * pointers, it's easy to
46 pass a void ** to your data by accidentally inserting an `&'
47 reference operator where one shouldn't go. It took me an hour to
48 hunt down a bug like that once. */
50 /* Prime numbers and hash functions. */
52 /* Returns smallest power of 2 greater than X. */
54 next_power_of_2 (size_t x)
60 /* Turn off rightmost 1-bit in x. */
61 size_t y = x & (x - 1);
63 /* If y is 0 then x only had a single 1-bit. */
67 /* Otherwise turn off the next. */
78 size_t used; /* Number of filled entries. */
79 size_t size; /* Number of entries (a power of 2). */
80 void **entries; /* Hash table proper. */
82 const void *aux; /* Auxiliary data for comparison functions. */
83 hsh_compare_func *compare;
88 /* Set to false if hsh_data() or hsh_sort() has been called,
89 so that most hsh_*() functions may no longer be called. */
93 struct pool *pool; /* The pool used for this hash table */
97 hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
98 hsh_free_func *free, const void *aux)
100 return hsh_create_pool (NULL, size, compare, hash, free, aux);
105 /* Creates a hash table with at least M entries. COMPARE is a
106 function that compares two entries and returns 0 if they are
107 identical, nonzero otherwise; HASH returns a nonnegative hash value
108 for an entry; FREE destroys an entry. */
110 hsh_create_pool (struct pool *pool, int size,
111 hsh_compare_func *compare, hsh_hash_func *hash,
112 hsh_free_func *free, const void *aux)
117 assert (compare != NULL);
118 assert (hash != NULL);
120 h = pool_malloc (pool, sizeof *h);
125 h->size = next_power_of_2 (size);
126 h->entries = pool_nmalloc (pool, h->size, sizeof *h->entries);
127 for (i = 0; i < h->size; i++)
128 h->entries[i] = NULL;
130 h->compare = compare;
134 h->hash_ordered = true;
139 /* Destroys the contents of table H. */
141 hsh_clear (struct hsh_table *h)
147 for (i = 0; i < h->size; i++)
148 if (h->entries[i] != NULL)
149 h->free (h->entries[i], h->aux);
151 for (i = 0; i < h->size; i++)
152 h->entries[i] = NULL;
157 h->hash_ordered = true;
161 /* Destroys table H and all its contents. */
163 hsh_destroy (struct hsh_table *h)
170 for (i = 0; i < h->size; i++)
171 if (h->entries[i] != NULL)
172 h->free (h->entries[i], h->aux);
173 pool_free (h->pool, h->entries);
174 pool_free (h->pool, h);
178 /* Locates an entry matching TARGET. Returns the index for the
179 entry, if found, or the index of an empty entry that indicates
180 where TARGET should go, otherwise. */
181 static inline unsigned
182 locate_matching_entry (struct hsh_table *h, const void *target)
184 unsigned i = h->hash (target, h->aux);
186 assert (h->hash_ordered);
191 entry = h->entries[i];
192 if (entry == NULL || !h->compare (entry, target, h->aux))
198 /* Returns the index of an empty entry that indicates
199 where TARGET should go, assuming that TARGET is not equal to
200 any item already in the hash table. */
201 static inline unsigned
202 locate_empty_entry (struct hsh_table *h, const void *target)
204 unsigned i = h->hash (target, h->aux);
206 assert (h->hash_ordered);
210 if (h->entries[i] == NULL)
216 /* Changes the capacity of H to NEW_SIZE, which must be a
217 positive power of 2 at least as large as the number of
220 rehash (struct hsh_table *h, size_t new_size)
222 void **begin, **end, **table_p;
226 assert (new_size >= h->used);
228 /* Verify that NEW_SIZE is a positive power of 2. */
229 assert (new_size > 0 && (new_size & (new_size - 1)) == 0);
232 end = begin + h->size;
235 h->entries = pool_nmalloc (h->pool, h->size, sizeof *h->entries);
236 for (i = 0; i < h->size; i++)
237 h->entries[i] = NULL;
238 for (table_p = begin; table_p < end; table_p++)
240 void *entry = *table_p;
242 h->entries[locate_empty_entry (h, entry)] = entry;
244 pool_free (h->pool, begin);
247 h->hash_ordered = true;
251 /* A "algo_predicate_func" that returns true if DATA points
252 to a non-null void. */
254 not_null (const void *data_, const void *aux UNUSED)
256 void *const *data = data_;
258 return *data != NULL;
261 /* Compacts hash table H and returns a pointer to its data. The
262 returned data consists of hsh_count(H) non-null pointers, in
263 no particular order, followed by a null pointer.
265 After calling this function, only hsh_destroy() and
266 hsh_count() should be applied to H. hsh_first() and
267 hsh_next() could also be used, but you're better off just
268 iterating through the returned array.
270 This function is intended for use in situations where data
271 processing occurs in two phases. In the first phase, data is
272 added, removed, and searched for within a hash table. In the
273 second phase, the contents of the hash table are output and
274 the hash property itself is no longer of interest.
276 Use hsh_sort() instead, if the second phase wants data in
277 sorted order. Use hsh_data_copy() or hsh_sort_copy() instead,
278 if the second phase still needs to search the hash table. */
280 hsh_data (struct hsh_table *h)
285 n = partition (h->entries, h->size, sizeof *h->entries, not_null, NULL);
286 assert (n == h->used);
288 h->hash_ordered = false;
293 /* Dereferences void ** pointers and passes them to the hash
294 comparison function. */
296 comparison_helper (const void *a_, const void *b_, const void *h_)
300 const struct hsh_table *h = h_;
305 return h->compare (*a, *b, h->aux);
308 /* Sorts hash table H based on hash comparison function. The
309 returned data consists of hsh_count(H) non-null pointers,
310 sorted in order of the hash comparison function, followed by a
313 After calling this function, only hsh_destroy() and
314 hsh_count() should be applied to H. hsh_first() and
315 hsh_next() could also be used, but you're better off just
316 iterating through the returned array.
318 This function is intended for use in situations where data
319 processing occurs in two phases. In the first phase, data is
320 added, removed, and searched for within a hash table. In the
321 second phase, the contents of the hash table are output and
322 the hash property itself is no longer of interest.
324 Use hsh_data() instead, if the second phase doesn't need the
325 data in any particular order. Use hsh_data_copy() or
326 hsh_sort_copy() instead, if the second phase still needs to
327 search the hash table. */
329 hsh_sort (struct hsh_table *h)
334 sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
338 /* Makes and returns a copy of the pointers to the data in H.
339 The returned data consists of hsh_count(H) non-null pointers,
340 in no particular order, followed by a null pointer. The hash
341 table is not modified. The caller is responsible for freeing
344 If you don't need to search or modify the hash table, then
345 hsh_data() is a more efficient choice. */
347 hsh_data_copy (struct hsh_table *h)
352 copy = pool_nmalloc (h->pool, (h->used + 1), sizeof *copy);
353 copy_if (h->entries, h->size, sizeof *h->entries, copy, not_null, NULL);
354 copy[h->used] = NULL;
358 /* Makes and returns a copy of the pointers to the data in H.
359 The returned data consists of hsh_count(H) non-null pointers,
360 sorted in order of the hash comparison function, followed by a
361 null pointer. The hash table is not modified. The caller is
362 responsible for freeing the allocated data.
364 If you don't need to search or modify the hash table, then
365 hsh_sort() is a more efficient choice. */
367 hsh_sort_copy (struct hsh_table *h)
372 copy = hsh_data_copy (h);
373 sort (copy, h->used, sizeof *copy, comparison_helper, h);
379 /* Searches hash table H for TARGET. If found, returns a pointer
380 to a pointer to that entry; otherwise returns a pointer to a
381 NULL entry which *must* be used to insert a new entry having
382 the same key data. */
384 hsh_probe (struct hsh_table *h, const void *target)
389 assert (target != NULL);
390 assert (h->hash_ordered);
392 if (h->used > h->size / 2)
393 rehash (h, h->size * 2);
394 i = locate_matching_entry (h, target);
395 if (h->entries[i] == NULL)
397 return &h->entries[i];
400 /* Searches hash table H for TARGET. If not found, inserts
401 TARGET and returns a null pointer. If found, returns the
402 match, without replacing it in the table. */
404 hsh_insert (struct hsh_table *h, void *target)
409 assert (target != NULL);
411 entry = hsh_probe (h, target);
421 /* Searches hash table H for TARGET. If not found, inserts
422 TARGET and returns a null pointer. If found, returns the
423 match, after replacing it in the table by TARGET. */
425 hsh_replace (struct hsh_table *h, void *target)
427 void **entry = hsh_probe (h, target);
433 /* Returns the entry in hash table H that matches TARGET, or NULL
436 hsh_find (struct hsh_table *h, const void *target)
438 return h->entries[locate_matching_entry (h, target)];
441 /* Deletes the entry in hash table H that matches TARGET.
442 Returns true if an entry was deleted.
444 Uses Knuth's Algorithm 6.4R (Deletion with linear probing).
445 Because our load factor is at most 1/2, the average number of
446 moves that this algorithm makes should be at most 2 - ln 2 ~=
449 hsh_delete (struct hsh_table *h, const void *target)
451 unsigned i = locate_matching_entry (h, target);
452 if (h->entries[i] != NULL)
456 h->free (h->entries[i], h->aux);
463 h->entries[i] = NULL;
467 i = (i - 1) & (h->size - 1);
468 if (h->entries[i] == NULL)
471 r = h->hash (h->entries[i], h->aux) & (h->size - 1);
473 while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
474 h->entries[j] = h->entries[i];
483 /* Finds and returns an entry in TABLE, and initializes ITER for
484 use with hsh_next(). If TABLE is empty, returns a null
487 hsh_first (struct hsh_table *h, struct hsh_iterator *iter)
490 assert (iter != NULL);
493 return hsh_next (h, iter);
496 /* Iterates through TABLE with iterator ITER. Returns the next
497 entry in TABLE, or a null pointer after the last entry.
499 Entries are returned in an undefined order. Modifying TABLE
500 during iteration may cause some entries to be returned
501 multiple times or not at all. */
503 hsh_next (struct hsh_table *h, struct hsh_iterator *iter)
508 assert (iter != NULL);
509 assert (iter->next <= h->size);
511 for (i = iter->next; i < h->size; i++)
515 return h->entries[i];
518 iter->next = h->size;
522 /* Returns the number of items in H. */
524 hsh_count (struct hsh_table *h)
538 /* Displays contents of hash table H on stdout. */
540 hsh_dump (struct hsh_table *h)
542 void **entry = h->entries;
545 printf ("hash table:");
546 for (i = 0; i < h->size; i++)
547 printf (" %p", *entry++);
551 /* This wrapper around hsh_probe() assures that it returns a pointer
552 to a NULL pointer. This function is used when it is known that the
553 entry to be inserted does not already exist in the table. */
555 hsh_force_insert (struct hsh_table *h, void *p)
557 void **pp = hsh_probe (h, p);
558 assert (*pp == NULL);
562 /* This wrapper around hsh_find() assures that it returns non-NULL.
563 This function is for use when it is known that the entry being
564 searched for must exist in the table. */
566 hsh_force_find (struct hsh_table *h, const void *target)
568 void *found = hsh_find (h, target);
569 assert (found != NULL);
573 /* This wrapper for hsh_delete() verifies that an item really was
576 hsh_force_delete (struct hsh_table *h, const void *target)
578 int found = hsh_delete (h, target);