pintos-os.org Git - pspp/blob - lib/hash.c

   1 /* hash - hashing table processing.
   2    Copyright (C) 1998, 1999 Free Software Foundation, Inc.
   3    Written by Jim Meyering, 1992.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 /* A generic hash table package.  */
  20
  21 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  22    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  23
  24 #if HAVE_CONFIG_H
  25 # include <config.h>
  26 #endif
  27 #if HAVE_STDLIB_H
  28 # include <stdlib.h>
  29 #endif
  30 #if HAVE_STDBOOL_H
  31 # include <stdbool.h>
  32 #else
  33 typedef enum {false = 0, true = 1} bool;
  34 #endif
  35 #include <stdio.h>
  36 #include <assert.h>
  37
  38 #if !HAVE_DECL_FREE
  39 void free ();
  40 #endif
  41 #if !HAVE_DECL_MALLOC
  42 char *malloc ();
  43 #endif
  44
  45 #if USE_OBSTACK
  46 # include "obstack.h"
  47 # ifndef obstack_chunk_alloc
  48 #  define obstack_chunk_alloc malloc
  49 # endif
  50 # ifndef obstack_chunk_free
  51 #  define obstack_chunk_free free
  52 # endif
  53 #endif
  54
  55 #include "hash.h"
  56
  57 /* An hash table contains many internal entries, each holding a pointer to
  58    some user provided data (also called a user entry).  An entry indistinctly
  59    refers to both the internal entry and its associated user entry.  A user
  60    entry contents may be hashed by a randomisation function (the hashing
  61    function, or just `hasher' for short) into a number (or `slot') between 0
  62    and the current table size.  At each slot position in the hash table,
  63    starts a linked chain of entries for which the user data all hash to this
  64    slot.  A bucket is the collection of all entries hashing to the same slot.
  65
  66    A good `hasher' function will distribute entries rather evenly in buckets.
  67    In the ideal case, the length of each bucket is roughly the number of
  68    entries divided by the table size.  Finding the slot for a data is usually
  69    done at constant speed by the `hasher', and the later finding of a precise
  70    entry is linear in time with the size of the bucket.  Consequently, a
  71    bigger hash table size (that is, a bigger number of buckets) is prone to
  72    yielding shorter buckets, *given* the `hasher' function behaves properly.
  73
  74    Long buckets slow down the lookup algorithm.  One might use big hash table
  75    sizes in hope to reduce the average length of buckets, but this might
  76    become inordinate, as unused slots in the hash table take some space.  The
  77    best bet is to make sure you are using a good `hasher' function (beware
  78    that those are not that easy to write! :-), and to use a table size at
  79    least bigger than the actual number of entries.  */
  80
  81 /* If, while adding an to a bucket which was empty, the ratio of used buckets
  82    over table size goes over the growth threshold (a number between 0.0 and
  83    1.0), then reorganise the table size bigger by the growth factor (a number
  84    greater than 1.0).  The growth threshold defaults to 0.8, and the growth
  85    factor defaults to 1.414, meaning that the table will have doubled its size
  86    every second time 80% of the buckets get used.  */
  87 #define DEFAULT_GROWTH_THRESHOLD 0.8
  88 #define DEFAULT_GROWTH_FACTOR 1.414
  89
  90 /* If, while emptying a bucket, the ratio of used buckets over table size
  91    drops below the shrink threshold (a number between 0.0 and 1.0), then
  92    reorganise the table size smaller through the usage of a shrink factor (a
  93    number greater than the shrink threshold but smaller than 1.0).  The shrink
  94    threshold and factor default to 0.0 and 1.0, meaning that the table never
  95    shrinks.  */
  96 #define DEFAULT_SHRINK_THRESHOLD 0.0
  97 #define DEFAULT_SHRINK_FACTOR 1.0
  98
  99 /* Use this to initialize or reset a TUNING structure to
 100    some sensible values. */
 101 static const Hash_tuning default_tuning =
 102   {
 103     DEFAULT_SHRINK_THRESHOLD,
 104     DEFAULT_SHRINK_FACTOR,
 105     DEFAULT_GROWTH_THRESHOLD,
 106     DEFAULT_GROWTH_FACTOR,
 107     false
 108   };
 109
 110 /* Information and lookup.  */
 111
 112 /* The following few functions provide information about the overall hash
 113    table organisation: the number of entries, number of buckets and maximum
 114    length of buckets.  */
 115
 116 /* Return the number of buckets in the hash table.  The table size, the total
 117    number of buckets (used plus unused), or the maximum number of slots, are
 118    the same quantity.  */
 119
 120 unsigned
 121 hash_get_n_buckets (const Hash_table *table)
 122 {
 123   return table->n_buckets;
 124 }
 125
 126 /* Return the number of slots in use (non-empty buckets).  */
 127
 128 unsigned
 129 hash_get_n_buckets_used (const Hash_table *table)
 130 {
 131   return table->n_buckets_used;
 132 }
 133
 134 /* Return the number of active entries.  */
 135
 136 unsigned
 137 hash_get_n_entries (const Hash_table *table)
 138 {
 139   return table->n_entries;
 140 }
 141
 142 /* Return the length of the most lenghty chain (bucket).  */
 143
 144 unsigned
 145 hash_get_max_bucket_length (const Hash_table *table)
 146 {
 147   struct hash_entry *bucket;
 148   unsigned max_bucket_length = 0;
 149
 150   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 151     {
 152       if (bucket->data)
 153         {
 154           struct hash_entry *cursor = bucket;
 155           unsigned bucket_length = 1;
 156
 157           while (cursor = cursor->next, cursor)
 158             bucket_length++;
 159
 160           if (bucket_length > max_bucket_length)
 161             max_bucket_length = bucket_length;
 162         }
 163     }
 164
 165   return max_bucket_length;
 166 }
 167
 168 /* Do a mild validation of an hash table, by traversing it and checking two
 169    statistics.  */
 170
 171 bool
 172 hash_table_ok (const Hash_table *table)
 173 {
 174   struct hash_entry *bucket;
 175   unsigned n_buckets_used = 0;
 176   unsigned n_entries = 0;
 177
 178   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 179     {
 180       if (bucket->data)
 181         {
 182           struct hash_entry *cursor = bucket;
 183
 184           /* Count bucket head.  */
 185           n_buckets_used++;
 186           n_entries++;
 187
 188           /* Count bucket overflow.  */
 189           while (cursor = cursor->next, cursor)
 190             n_entries++;
 191         }
 192     }
 193
 194   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 195     return true;
 196
 197   return false;
 198 }
 199
 200 void
 201 hash_print_statistics (const Hash_table *table, FILE *stream)
 202 {
 203   unsigned n_entries = hash_get_n_entries (table);
 204   unsigned n_buckets = hash_get_n_buckets (table);
 205   unsigned n_buckets_used = hash_get_n_buckets_used (table);
 206   unsigned max_bucket_length = hash_get_max_bucket_length (table);
 207
 208   fprintf (stream, "# entries:         %u\n", n_entries);
 209   fprintf (stream, "# buckets:         %u\n", n_buckets);
 210   fprintf (stream, "# buckets used:    %u (%.2f%%)\n", n_buckets_used,
 211            (100.0 * n_buckets_used) / n_buckets);
 212   fprintf (stream, "max bucket length: %u\n", max_bucket_length);
 213 }
 214
 215 /* Return the user entry from the hash table, if some entry in the hash table
 216    compares equally with ENTRY, or NULL otherwise. */
 217
 218 void *
 219 hash_lookup (const Hash_table *table, const void *entry)
 220 {
 221   struct hash_entry *bucket
 222     = table->bucket + table->hasher (entry, table->n_buckets);
 223   struct hash_entry *cursor;
 224
 225   assert (bucket < table->bucket_limit);
 226
 227   if (bucket->data == NULL)
 228     return NULL;
 229
 230   for (cursor = bucket; cursor; cursor = cursor->next)
 231     if (table->comparator (entry, cursor->data))
 232       return cursor->data;
 233
 234   return NULL;
 235 }
 236
 237 /* Walking.  */
 238
 239 /* The functions in this page traverse the hash table and process the
 240    contained entries.  For the traversal to work properly, the hash table
 241    should not be resized nor modified while any particular entry is being
 242    processed.  In particular, entries should not be added or removed.  */
 243
 244 /* Return the first data in the table, or NULL if the table is empty.  */
 245
 246 void *
 247 hash_get_first (const Hash_table *table)
 248 {
 249   struct hash_entry *bucket;
 250
 251   if (table->n_entries == 0)
 252     return NULL;
 253
 254   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 255     if (bucket->data)
 256       return bucket->data;
 257
 258   abort ();
 259 }
 260
 261 /* Return the user data for the entry following ENTRY, where ENTRY has been
 262    returned by a previous call to either `hash_get_first' or `hash_get_next'.
 263    Return NULL if there is no more entries.  */
 264
 265 void *
 266 hash_get_next (const Hash_table *table, const void *entry)
 267 {
 268   struct hash_entry *bucket
 269     = table->bucket + table->hasher (entry, table->n_buckets);
 270   struct hash_entry *cursor;
 271
 272   assert (bucket < table->bucket_limit);
 273
 274   /* Find next entry in the same bucket.  */
 275   for (cursor = bucket; cursor; cursor = cursor->next)
 276     if (cursor->data == entry && cursor->next)
 277       return cursor->next->data;
 278
 279   /* Find first entry in any subsequent bucket.  */
 280   for (; bucket < table->bucket_limit; bucket++)
 281     if (bucket->data)
 282       return bucket->data;
 283
 284   /* None found.  */
 285   return NULL;
 286 }
 287
 288 /* Fill BUFFER with pointers to active user entries in the hash table, then
 289    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 290    pointers.  */
 291
 292 unsigned
 293 hash_get_entries (const Hash_table *table, void **buffer,
 294                   unsigned buffer_size)
 295 {
 296   unsigned counter = 0;
 297   struct hash_entry *bucket;
 298   struct hash_entry *cursor;
 299
 300   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 301     {
 302       if (bucket->data)
 303         {
 304           for (cursor = bucket; cursor; cursor = cursor->next)
 305             {
 306               if (counter >= buffer_size)
 307                 return counter;
 308               buffer[counter++] = cursor->data;
 309             }
 310         }
 311     }
 312
 313   return counter;
 314 }
 315
 316 /* Call a PROCESSOR function for each entry of an hash table, and return the
 317    number of entries for which the processor function returned success.  A
 318    pointer to some PROCESSOR_DATA which will be made available to each call to
 319    the processor function.  The PROCESSOR accepts two arguments: the first is
 320    the user entry being walked into, the second is the value of PROCESSOR_DATA
 321    as received.  The walking continue for as long as the PROCESSOR function
 322    returns nonzero.  When it returns zero, the walking is interrupted.  */
 323
 324 unsigned
 325 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 326                   void *processor_data)
 327 {
 328   unsigned counter = 0;
 329   struct hash_entry *bucket;
 330   struct hash_entry *cursor;
 331
 332   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 333     {
 334       if (bucket->data)
 335         {
 336           for (cursor = bucket; cursor; cursor = cursor->next)
 337             {
 338               if (!(*processor) (cursor->data, processor_data))
 339                 return counter;
 340               counter++;
 341             }
 342         }
 343     }
 344
 345   return counter;
 346 }
 347
 348 /* Allocation and clean-up.  */
 349
 350 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 351    This is a convenience routine for constructing other hashing functions.  */
 352
 353 #if USE_DIFF_HASH
 354
 355 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 356    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 357    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 358    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 359    may not be good for your application."  */
 360
 361 unsigned
 362 hash_string (const char *string, unsigned n_buckets)
 363 {
 364 # ifndef CHAR_BIT
 365 #  define CHAR_BIT 8
 366 # endif
 367 # define ROTATE_LEFT(Value, Shift) \
 368   ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift)))
 369 # define HASH_ONE_CHAR(Value, Byte) \
 370   ((Byte) + ROTATE_LEFT (Value, 7))
 371
 372   unsigned value = 0;
 373
 374   for (; *string; string++)
 375     value = HASH_ONE_CHAR (value, *(const unsigned char *) string);
 376   return value % n_buckets;
 377
 378 # undef ROTATE_LEFT
 379 # undef HASH_ONE_CHAR
 380 }
 381
 382 #else /* not USE_DIFF_HASH */
 383
 384 /* This one comes from `recode', and performs a bit better than the above as
 385    per a few experiments.  It is inspired from a hashing routine found in the
 386    very old Cyber `snoop', itself written in typical Greg Mansfield style.
 387    (By the way, what happened to this excellent man?  Is he still alive?)  */
 388
 389 unsigned
 390 hash_string (const char *string, unsigned n_buckets)
 391 {
 392   unsigned value = 0;
 393
 394   while (*string)
 395     value = ((value * 31 + (int) *(const unsigned char *) string++)
 396              % n_buckets);
 397   return value;
 398 }
 399
 400 #endif /* not USE_DIFF_HASH */
 401
 402 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 403    number at least equal to 11.  */
 404
 405 static int
 406 is_prime (unsigned long candidate)
 407 {
 408   unsigned long divisor = 3;
 409   unsigned long square = divisor * divisor;
 410
 411   while (square < candidate && (candidate % divisor))
 412     {
 413       divisor++;
 414       square += 4 * divisor;
 415       divisor++;
 416     }
 417
 418   return candidate % divisor != 0;
 419 }
 420
 421 /* Round a given CANDIDATE number up to the nearest prime, and return that
 422    prime.  Primes lower than 10 are merely skipped.  */
 423
 424 static unsigned long
 425 next_prime (unsigned long candidate)
 426 {
 427   /* Skip small primes.  */
 428   if (candidate < 10)
 429     candidate = 10;
 430
 431   /* Make it definitely odd.  */
 432   candidate |= 1;
 433
 434   while (!is_prime (candidate))
 435     candidate += 2;
 436
 437   return candidate;
 438 }
 439
 440 void
 441 hash_reset_tuning (Hash_tuning *tuning)
 442 {
 443   *tuning = default_tuning;
 444 }
 445
 446 /* For the given hash TABLE, check the user supplied tuning structure for
 447    reasonable values, and return true if there is no gross error with it.
 448    Otherwise, definitvely reset the TUNING field to some acceptable default in
 449    the hash table (that is, the user looses the right of further modifying
 450    tuning arguments), and return false.  */
 451
 452 static bool
 453 check_tuning (Hash_table *table)
 454 {
 455   const Hash_tuning *tuning = table->tuning;
 456
 457   if (tuning->growth_threshold > 0.0
 458       && tuning->growth_threshold < 1.0
 459       && tuning->growth_factor > 1.0
 460       && tuning->shrink_threshold >= 0.0
 461       && tuning->shrink_threshold < 1.0
 462       && tuning->shrink_factor > tuning->shrink_threshold
 463       && tuning->shrink_factor <= 1.0
 464       && tuning->shrink_threshold < tuning->growth_threshold)
 465     return true;
 466
 467   table->tuning = &default_tuning;
 468   return false;
 469 }
 470
 471 /* Allocate and return a new hash table, or NULL if an error is met.  The
 472    initial number of buckets is automatically selected so to _guarantee_ that
 473    you may insert at least CANDIDATE different user entries before any growth
 474    of the hash table size occurs.  So, if you happen to know beforehand the
 475    number of entries you intend to insert in the hash table, you may save some
 476    table memory and insertion time, by specifying it here.  If the
 477    IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE argument
 478    has its meaning changed to the wanted number of buckets.
 479
 480    TUNING points to a structure of user-supplied values, in case some fine
 481    tuning is wanted over the default behaviour of the hasher.  If TUNING is
 482    NULL, proper defaults are used instead.
 483
 484    The user-supplied HASHER function should be provided.  It accepts two
 485    arguments ENTRY and TABLE_SIZE.  It computes, by hasing ENTRY contents, a
 486    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 487    This slot number is then returned.
 488
 489    The user-supplied COMPARATOR function should be provided.  It accepts two
 490    arguments pointing to user data, it then returns true for a pair of entries
 491    that compare equal, or false otherwise.  This function is internally called
 492    on entries which are already known to hash to the same bucket index.
 493
 494    The user-supplied DATA_FREER function, when not NULL, may be later called
 495    with the user data as an argument, just before the entry containing the
 496    data gets freed.  This happens from within `hash_free' or `hash_clear'.
 497    You should specify this function only if you want these functions to free
 498    all of your `data' data.  This is typically the case when your data is
 499    simply an auxiliary struct that you have malloc'd to aggregate several
 500    values.  */
 501
 502 Hash_table *
 503 hash_initialize (unsigned candidate, const Hash_tuning *tuning,
 504                  Hash_hasher hasher, Hash_comparator comparator,
 505                  Hash_data_freer data_freer)
 506 {
 507   Hash_table *table;
 508   struct hash_entry *bucket;
 509
 510   if (hasher == NULL || comparator == NULL)
 511     return NULL;
 512
 513   table = (Hash_table *) malloc (sizeof (Hash_table));
 514   if (table == NULL)
 515     return NULL;
 516
 517   if (!tuning)
 518     tuning = &default_tuning;
 519   table->tuning = tuning;
 520   if (!check_tuning (table))
 521     {
 522       /* Abort initialisation if tuning arguments are improper.  This is the
 523          only occasion when the user gets some feedback about it.  Later on,
 524          if the user modifies the tuning wrongly, it gets restored to some
 525          proper default, and the user looses the right of tuning further.  */
 526       free (table);
 527       return NULL;
 528     }
 529
 530   table->n_buckets
 531     = next_prime (tuning->is_n_buckets ? candidate
 532                   : (unsigned) (candidate / tuning->growth_threshold));
 533
 534   table->bucket = (struct hash_entry *)
 535     malloc (table->n_buckets * sizeof (struct hash_entry));
 536   if (table->bucket == NULL)
 537     {
 538       free (table);
 539       return NULL;
 540     }
 541   table->bucket_limit = table->bucket + table->n_buckets;
 542
 543   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 544     {
 545       bucket->data = NULL;
 546       bucket->next = NULL;
 547     }
 548   table->n_buckets_used = 0;
 549   table->n_entries = 0;
 550
 551   table->hasher = hasher;
 552   table->comparator = comparator;
 553   table->data_freer = data_freer;
 554
 555   table->free_entry_list = NULL;
 556 #if USE_OBSTACK
 557   obstack_init (&table->entry_stack);
 558 #endif
 559   return table;
 560 }
 561
 562 /* Make all buckets empty, placing any chained entries on the free list.
 563    Apply the user-specified function data_freer (if any) to the datas of any
 564    affected entries.  */
 565
 566 void
 567 hash_clear (Hash_table *table)
 568 {
 569   struct hash_entry *bucket;
 570   struct hash_entry *cursor;
 571
 572   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 573     {
 574       if (bucket->data)
 575         {
 576           /* Free the bucket overflow.  */
 577           for (cursor = bucket->next; cursor; cursor = cursor->next)
 578             {
 579               if (table->data_freer)
 580                 (*table->data_freer) (cursor->data);
 581               cursor->data = NULL;
 582
 583               /* Relinking is done one entry at a time, as it is to be expected
 584                  that overflows are either rare or short.  */
 585               cursor->next = table->free_entry_list;
 586               table->free_entry_list = cursor;
 587             }
 588
 589           /* Free the bucket head.  */
 590           if (table->data_freer)
 591             (*table->data_freer) (bucket->data);
 592           bucket->data = NULL;
 593           bucket->next = NULL;
 594         }
 595     }
 596
 597   table->n_buckets_used = 0;
 598   table->n_entries = 0;
 599 }
 600
 601 /* Reclaim all storage associated with an hash table.  If a data_freer
 602    function has been supplied by the user when the hash table was created,
 603    this function applies it to the data of each entry before freeing that
 604    entry.  */
 605
 606 void
 607 hash_free (Hash_table *table)
 608 {
 609   struct hash_entry *bucket;
 610   struct hash_entry *cursor;
 611   struct hash_entry *next;
 612
 613   /* Call the user data_freer function.  */
 614   if (table->data_freer && table->n_entries)
 615     {
 616       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 617         {
 618           if (bucket->data)
 619             {
 620               for (cursor = bucket; cursor; cursor = cursor->next)
 621                 {
 622                   (*table->data_freer) (cursor->data);
 623                 }
 624             }
 625         }
 626     }
 627
 628 #if USE_OBSTACK
 629
 630   obstack_free (&table->entry_stack, NULL);
 631
 632 #else
 633
 634   /* Free all bucket overflowed entries.  */
 635   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 636     {
 637       for (cursor = bucket->next; cursor; cursor = next)
 638         {
 639           next = cursor->next;
 640           free (cursor);
 641         }
 642     }
 643
 644   /* Also reclaim the internal list of previously freed entries.  */
 645   for (cursor = table->free_entry_list; cursor; cursor = next)
 646     {
 647       next = cursor->next;
 648       free (cursor);
 649     }
 650
 651 #endif
 652
 653   /* Free the remainder of the hash table structure.  */
 654   free (table->bucket);
 655   free (table);
 656 }
 657
 658 /* Insertion and deletion.  */
 659
 660 /* Get a new hash entry for a bucket overflow, possibly by reclying a
 661    previously freed one.  If this is not possible, allocate a new one.  */
 662
 663 static struct hash_entry *
 664 allocate_entry (Hash_table *table)
 665 {
 666   struct hash_entry *new;
 667
 668   if (table->free_entry_list)
 669     {
 670       new = table->free_entry_list;
 671       table->free_entry_list = new->next;
 672     }
 673   else
 674     {
 675 #if USE_OBSTACK
 676       new = (struct hash_entry *)
 677         obstack_alloc (&table->entry_stack, sizeof (struct hash_entry));
 678 #else
 679       new = (struct hash_entry *) malloc (sizeof (struct hash_entry));
 680 #endif
 681     }
 682
 683   return new;
 684 }
 685
 686 /* Free a hash entry which was part of some bucket overflow,
 687    saving it for later recycling.  */
 688
 689 static void
 690 free_entry (Hash_table *table, struct hash_entry *entry)
 691 {
 692   entry->data = NULL;
 693   entry->next = table->free_entry_list;
 694   table->free_entry_list = entry;
 695 }
 696
 697 /* This private function is used to help with insertion and deletion.  When
 698    ENTRY matches an entry in the table, return a pointer to the corresponding
 699    user data and set *BUCKET_HEAD to the head of the selected bucket.
 700    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 701    the table, unlink the matching entry.  */
 702
 703 static void *
 704 hash_find_entry (Hash_table *table, const void *entry,
 705                  struct hash_entry **bucket_head, bool delete)
 706 {
 707   struct hash_entry *bucket
 708     = table->bucket + table->hasher (entry, table->n_buckets);
 709   struct hash_entry *cursor;
 710
 711   assert (bucket < table->bucket_limit);
 712   *bucket_head = bucket;
 713
 714   /* Test for empty bucket.  */
 715   if (bucket->data == NULL)
 716     return NULL;
 717
 718   /* Check if then entry is found as the bucket head.  */
 719   if ((*table->comparator) (entry, bucket->data))
 720     {
 721       void *data = bucket->data;
 722
 723       if (delete)
 724         {
 725           if (bucket->next)
 726             {
 727               struct hash_entry *next = bucket->next;
 728
 729               /* Bump the first overflow entry into the bucket head, then save
 730                  the previous first overflow entry for later recycling.  */
 731               *bucket = *next;
 732               free_entry (table, next);
 733             }
 734           else
 735             {
 736               bucket->data = NULL;
 737             }
 738         }
 739
 740       return data;
 741     }
 742
 743   /* Scan the bucket overflow.  */
 744   for (cursor = bucket; cursor->next; cursor = cursor->next)
 745     {
 746       if ((*table->comparator) (entry, cursor->next->data))
 747         {
 748           void *data = cursor->next->data;
 749
 750           if (delete)
 751             {
 752               struct hash_entry *next = cursor->next;
 753
 754               /* Unlink the entry to delete, then save the freed entry for later
 755                  recycling.  */
 756               cursor->next = next->next;
 757               free_entry (table, next);
 758             }
 759
 760           return data;
 761         }
 762     }
 763
 764   /* No entry found.  */
 765   return NULL;
 766 }
 767
 768 /* For an already existing hash table, change the number of buckets through
 769    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 770    new number of buckets is automatically selected so to _guarantee_ that the
 771    table may receive at least CANDIDATE different user entries, including
 772    those already in the table, before any other growth of the hash table size
 773    occurs.  If the IS_N_BUCKETS field of the TUNING structure is true, the
 774    CANDIDATE argument has its meaning changed to the wanted number of buckets.
 775    */
 776
 777 bool
 778 hash_rehash (Hash_table *table, unsigned candidate)
 779 {
 780   Hash_table *new_table;
 781   struct hash_entry *bucket;
 782   struct hash_entry *cursor;
 783   struct hash_entry *next;
 784
 785   new_table = hash_initialize (candidate, table->tuning, table->hasher,
 786                                table->comparator, table->data_freer);
 787   if (new_table == NULL)
 788     return false;
 789
 790   /* Merely reuse the extra old space into the new table.  */
 791 #if USE_OBSTACK
 792   obstack_free (&new_table->entry_stack, NULL);
 793   new_table->entry_stack = table->entry_stack;
 794 #endif
 795   new_table->free_entry_list = table->free_entry_list;
 796
 797   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 798     if (bucket->data)
 799       for (cursor = bucket; cursor; cursor = next)
 800         {
 801           void *data = cursor->data;
 802           struct hash_entry *new_bucket
 803             = (new_table->bucket
 804                + new_table->hasher (data, new_table->n_buckets));
 805
 806           assert (new_bucket < new_table->bucket_limit);
 807           next = cursor->next;
 808
 809           if (new_bucket->data)
 810             if (cursor == bucket)
 811               {
 812                 /* Allocate or recycle an entry, when moving from a bucket
 813                    header into a bucket overflow.  */
 814                 struct hash_entry *new_entry = allocate_entry (new_table);
 815
 816                 if (new_entry == NULL)
 817                   return false;
 818
 819                 new_entry->data = data;
 820                 new_entry->next = new_bucket->next;
 821                 new_bucket->next = new_entry;
 822               }
 823             else
 824               {
 825                 /* Merely relink an existing entry, when moving from a
 826                    bucket overflow into a bucket overflow.  */
 827                 cursor->next = new_bucket->next;
 828                 new_bucket->next = cursor;
 829               }
 830           else
 831             {
 832               /* Free an existing entry, when moving from a bucket
 833                  overflow into a bucket header.  Also take care of the
 834                  simple case of moving from a bucket header into a bucket
 835                  header.  */
 836               new_bucket->data = data;
 837               new_table->n_buckets_used++;
 838               if (cursor != bucket)
 839                 free_entry (new_table, cursor);
 840             }
 841         }
 842
 843   free (table->bucket);
 844   table->bucket = new_table->bucket;
 845   table->bucket_limit = new_table->bucket_limit;
 846   table->n_buckets = new_table->n_buckets;
 847   table->n_buckets_used = new_table->n_buckets_used;
 848   /* table->n_entries already holds its value.  */
 849 #if USE_OBSTACK
 850   table->entry_stack = new_table->entry_stack;
 851 #endif
 852   free (new_table);
 853
 854   return true;
 855 }
 856
 857 /* If ENTRY matches an entry already in the hash table, return the pointer
 858    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
 859    Return NULL if the storage required for insertion cannot be allocated.  */
 860
 861 void *
 862 hash_insert (Hash_table *table, const void *entry)
 863 {
 864   void *data;
 865   struct hash_entry *bucket;
 866
 867   assert (entry);               /* cannot insert a NULL entry */
 868
 869   /* If there's a matching entry already in the table, return that.  */
 870   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
 871     return data;
 872
 873   /* ENTRY is not matched, it should be inserted.  */
 874
 875   if (bucket->data)
 876     {
 877       struct hash_entry *new_entry = allocate_entry (table);
 878
 879       if (new_entry == NULL)
 880         return NULL;
 881
 882       /* Add ENTRY in the overflow of the bucket.  */
 883
 884       new_entry->data = (void *) entry;
 885       new_entry->next = bucket->next;
 886       bucket->next = new_entry;
 887       table->n_entries++;
 888       return (void *) entry;
 889     }
 890
 891   /* Add ENTRY right in the bucket head.  */
 892
 893   bucket->data = (void *) entry;
 894   table->n_entries++;
 895   table->n_buckets_used++;
 896
 897   /* If the growth threshold of the buckets in use has been reached, rehash
 898      the table bigger.  It's no real use checking the number of entries, as
 899      if the hashing function is ill-conditioned, rehashing is not likely to
 900      improve it.  */
 901
 902   if (table->n_buckets_used
 903       > table->tuning->growth_threshold * table->n_buckets)
 904     {
 905       /* Check more fully, before starting real work.  If tuning arguments got
 906          improper, the second check will rely on proper defaults.  */
 907       check_tuning (table);
 908       if (table->n_buckets_used
 909           > table->tuning->growth_threshold * table->n_buckets)
 910         {
 911           const Hash_tuning *tuning = table->tuning;
 912           unsigned candidate
 913             = (unsigned) (tuning->is_n_buckets
 914                           ? (table->n_buckets * tuning->growth_factor)
 915                           : (table->n_buckets * tuning->growth_factor
 916                              * tuning->growth_threshold));
 917
 918           /* If the rehash fails, arrange to return NULL.  */
 919           if (!hash_rehash (table, candidate))
 920             entry = NULL;
 921         }
 922     }
 923
 924   return (void *) entry;
 925 }
 926
 927 /* If ENTRY is already in the table, remove it and return the just-deleted
 928    data (the user may want to deallocate its storage).  If ENTRY is not in the
 929    table, don't modify the table and return NULL.  */
 930
 931 void *
 932 hash_delete (Hash_table *table, const void *entry)
 933 {
 934   void *data;
 935   struct hash_entry *bucket;
 936
 937   if (data = hash_find_entry (table, entry, &bucket, true), !data)
 938     return NULL;
 939
 940   table->n_entries--;
 941   if (!bucket->data)
 942     {
 943       table->n_buckets_used--;
 944
 945       /* If the shrink threshold of the buckets in use has been reached,
 946          rehash the table smaller.  */
 947
 948       if (table->n_buckets_used
 949           < table->tuning->shrink_threshold * table->n_buckets)
 950         {
 951           /* Check more fully, before starting real work.  If tuning arguments
 952              got improper, the second check will rely on proper defaults.  */
 953           check_tuning (table);
 954           if (table->n_buckets_used
 955               < table->tuning->shrink_threshold * table->n_buckets)
 956             {
 957               const Hash_tuning *tuning = table->tuning;
 958               unsigned candidate
 959                 = (unsigned) (tuning->is_n_buckets
 960                               ? table->n_buckets * tuning->shrink_factor
 961                               : (table->n_buckets * tuning->shrink_factor
 962                                  * tuning->growth_threshold));
 963
 964               hash_rehash (table, candidate);
 965             }
 966         }
 967     }
 968
 969   return data;
 970 }
 971
 972 /* Testing.  */
 973
 974 #if TESTING
 975
 976 void
 977 hash_print (const Hash_table *table)
 978 {
 979   struct hash_entry *bucket;
 980
 981   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 982     {
 983       struct hash_entry *cursor;
 984
 985       if (bucket)
 986         printf ("%d:\n", slot);
 987
 988       for (cursor = bucket; cursor; cursor = cursor->next)
 989         {
 990           char *s = (char *) cursor->data;
 991           /* FIXME */
 992           printf ("  %s\n", s);
 993         }
 994     }
 995 }
 996
 997 #endif /* TESTING */