#include "gl/xalloc.h"
-static size_t capacity_to_mask (size_t capacity);
+static int capacity_to_shift (size_t capacity);
/* Initializes MAP as a new hash map that is initially empty. */
void
hmap_init (struct hmap *map)
{
+ map->buckets = map->two;
+ map->two[0] = map->two[1] = NULL;
map->count = 0;
- map->mask = 0;
- map->buckets = &map->one;
- map->one = NULL;
+ map->shift = HMAP_MAX_SHIFT;
}
/* Exchanges the contents of hash maps A and B. */
struct hmap tmp = *a;
*a = *b;
*b = tmp;
- if (!a->mask)
- a->buckets = &a->one;
- if (!b->mask)
- b->buckets = &b->one;
+ if (a->buckets == b->two)
+ a->buckets = a->two;
+ if (b->buckets == a->two)
+ b->buckets = b->two;
}
/* Removes all of the elements from MAP, without destroying MAP itself and
{
size_t i;
- for (i = 0; i <= map->mask; i++)
+ for (i = 0; i <= SIZE_MAX >> map->shift; i++)
map->buckets[i] = NULL;
map->count = 0;
}
void
hmap_destroy (struct hmap *map)
{
- if (map != NULL && map->buckets != &map->one)
+ if (map != NULL && map->buckets != map->two)
free (map->buckets);
}
-/* Reallocates MAP's hash buckets so that NEW_MASK becomes the
- hash value bit-mask used to choose a hash bucket, then
+/* Inserts NODE into MAP with hash value HASH. If the insertion causes MAP's
+ current capacity, as reported by hmap_capacity(), to be exceeded, rehashes
+ MAP with an increased number of hash buckets.
+
+ This function runs in constant time amortized over all the insertions into
+ MAP.
+
+ This function does not verify that MAP does not already contain a data item
+ with the same value as NODE. If duplicates should be disallowed (which is
+ the usual case), then the client must check for duplicates itself before
+ inserting the new node. */
+void
+hmap_insert (struct hmap *map, struct hmap_node *node, size_t hash)
+{
+ hmap_insert_fast (map, node, hash);
+ if (map->count > hmap_capacity (map))
+ hmap_reserve (map, map->count);
+}
+
+/* Inserts NODE into MAP with hash value HASH. Does not check whether this
+ causes MAP's current capacity to be exceeded. The caller must take
+ responsibility for that (or use hmap_insert() instead).
+
+ This function runs in time linear in the number of nodes already in the
+ bucket. Given a good hash function, this yields constant time on average.
+
+ This function does not verify that MAP does not already contain a data item
+ with the same value as NODE. If duplicates should be disallowed (which is
+ the usual case), then the client must check for duplicates itself before
+ inserting the new node. */
+void
+hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash)
+{
+ struct hmap_node **bucket;
+
+ for (bucket = &map->buckets[hash >> map->shift]; *bucket != NULL;
+ bucket = &(*bucket)->next)
+ if ((*bucket)->hash > hash)
+ break;
+
+ node->hash = hash;
+ node->next = *bucket;
+ *bucket = node;
+ map->count++;
+}
+
+/* Reallocates MAP's hash buckets so that NEW_SHIFT becomes MAP->shift and
rehashes any data elements in MAP into the new hash buckets.
- NEW_MASK must be a power of 2 minus 1 (including 0), that is,
- its value in binary must be all 1-bits. */
+ NEW_SHIFT must be between 2 and HMAP_MAX_SHIFT, inclusive. */
static void
-hmap_rehash (struct hmap *map, size_t new_mask)
+hmap_rehash (struct hmap *map, size_t new_shift)
{
struct hmap_node **new_buckets;
struct hmap_node *node, *next;
- assert ((new_mask & (new_mask + 1)) == 0);
- if (new_mask)
- new_buckets = xcalloc (new_mask + 1, sizeof *new_buckets);
- else
+ assert (new_shift != map->shift);
+
+ if (new_shift < HMAP_MAX_SHIFT)
+ new_buckets = xcalloc ((SIZE_MAX >> new_shift) + 1, sizeof *new_buckets);
+ else
{
- new_buckets = &map->one;
- new_buckets[0] = NULL;
+ new_buckets = map->two;
+ new_buckets[0] = new_buckets[1] = NULL;
}
-
+
if (map->count > 0)
{
for (node = hmap_first (map); node != NULL; node = next)
{
- size_t new_idx = node->hash & new_mask;
- struct hmap_node **new_bucket = &new_buckets[new_idx];
+ struct hmap_node **bucket;
+
next = hmap_next (map, node);
- node->next = *new_bucket;
- *new_bucket = node;
- }
+ for (bucket = &new_buckets[node->hash >> new_shift]; *bucket != NULL;
+ bucket = &(*bucket)->next)
+ continue;
+ *bucket = node;
+ node->next = NULL;
+ }
}
- if (map->buckets != &map->one)
+ if (map->buckets != map->two)
free (map->buckets);
map->buckets = new_buckets;
- map->mask = new_mask;
+ map->shift = new_shift;
}
/* Ensures that MAP has sufficient space to store at least
hmap_reserve (struct hmap *map, size_t capacity)
{
if (capacity > hmap_capacity (map))
- hmap_rehash (map, capacity_to_mask (capacity));
+ hmap_rehash (map, capacity_to_shift (capacity));
}
/* Shrinks MAP's set of buckets to the minimum number needed to
void
hmap_shrink (struct hmap *map)
{
- size_t new_mask = capacity_to_mask (map->count);
- if (new_mask < map->mask)
- hmap_rehash (map, new_mask);
+ size_t new_shift = capacity_to_shift (map->count);
+ if (new_shift > map->shift)
+ hmap_rehash (map, new_shift);
}
/* Moves NODE around in MAP to compensate for its hash value
void
hmap_changed (struct hmap *map, struct hmap_node *node, size_t new_hash)
{
- if ((new_hash ^ node->hash) & map->mask)
+ if (new_hash != node->hash)
{
hmap_delete (map, node);
hmap_insert_fast (map, node, new_hash);
}
- else
- node->hash = new_hash;
}
/* Hash map nodes may be moved around in memory as necessary,
hmap_moved (struct hmap *map,
struct hmap_node *node, const struct hmap_node *old)
{
- struct hmap_node **p = &map->buckets[node->hash & map->mask];
+ struct hmap_node **p = &map->buckets[node->hash >> map->shift];
while (*p != old)
p = &(*p)->next;
*p = node;
}
\f
-/* Returns the minimum-value mask required to allow for a hash
- table capacity of at least CAPACITY. The return value will be
- a bit-mask suitable for use as the "mask" member of struct
- hmap, that is, a power of 2 minus 1 (including 0). */
-static size_t
-capacity_to_mask (size_t capacity)
+/* Returns the highest "shift" value that allow for a hash table capacity of at
+ least CAPACITY. The return value will be between 2 and HMAP_MAX_SHIFT,
+ inclusive. */
+static int
+capacity_to_shift (size_t capacity)
{
- /* Calculate the minimum mask necesary to support the given
- capacity. */
- size_t mask = 0;
- while (hmap_mask_to_capacity__ (mask) < capacity)
- mask = (mask << 1) | 1;
+ int shift;
- /* If the mask is nonzero, make it at least 3, because there is
- little point in allocating an array of just 2 pointers. */
- mask |= (mask & 1) << 1;
+ for (shift = HMAP_MAX_SHIFT; shift > 2; shift--)
+ if (hmap_shift_to_capacity__ (shift) >= capacity)
+ break;
- return mask;
+ return shift;
}
/* PSPP - a program for statistical analysis.
- Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include "libpspp/cast.h"
/* Returns the data structure corresponding to the given NODE,
/* Hash table. */
struct hmap
{
- size_t count; /* Number of inserted nodes. */
- size_t mask; /* Number of buckets (power of 2), minus 1. */
struct hmap_node **buckets; /* Array of buckets. */
- struct hmap_node *one; /* One bucket, to eliminate corner cases. */
+ struct hmap_node *two[2]; /* Two buckets, to eliminate corner cases. */
+ size_t count; /* Number of inserted nodes. */
+ int shift; /* Bits to shift hash to get bucket index. */
};
+/* Maximum number of bits that a "size_t" value can be shifted in a single
+ operation without yielding undefined behavior. */
+#if SIZE_MAX == UINT32_MAX
+#define HMAP_MAX_SHIFT 31
+#elif SIZE_MAX == UINT64_MAX
+#define HMAP_MAX_SHIFT 63
+#else
+#error "size_t is not 32 or 64 bits"
+#endif
+
/* Suitable for use as the initializer for a struct hmap named
MAP. Typical usage:
struct hmap map = HMAP_INITIALIZER (map);
HMAP_INITIALIZER() is an alternative to hmap_init(). */
-#define HMAP_INITIALIZER(MAP) { 0, 0, &(MAP).one, NULL }
+#define HMAP_INITIALIZER(MAP) { (MAP).two, { NULL, NULL }, 0, HMAP_MAX_SHIFT }
/* Creation and destruction. */
void hmap_init (struct hmap *);
static inline struct hmap_node *hmap_next_with_hash (const struct hmap_node *);
/* Insertion and deletion. */
-static inline void hmap_insert (struct hmap *, struct hmap_node *,
- size_t hash);
-static inline void hmap_insert_fast (struct hmap *, struct hmap_node *,
- size_t hash);
+void hmap_insert (struct hmap *, struct hmap_node *, size_t hash);
+void hmap_insert_fast (struct hmap *, struct hmap_node *, size_t hash);
static inline void hmap_delete (struct hmap *, struct hmap_node *);
/* Iteration. */
static inline struct hmap_node *hmap_find_hash__ (struct hmap_node *, size_t);
static inline struct hmap_node *hmap_first_nonempty_bucket__ (
const struct hmap *, size_t start);
-static inline size_t hmap_mask_to_capacity__ (size_t mask);
+static inline size_t hmap_shift_to_capacity__ (int shift);
/* Returns the hash value associated with NODE. */
static inline size_t
constant value, its runtime degenerates to linear in the
length of NODE's hash chain.)
- Nodes are returned in arbitrary order that may change whenever
- the hash table's current capacity changes, as reported by
- hmap_capacity(). Calls to hmap_insert(), hmap_reserve(), and
- hmap_shrink() can change the capacity of a hash map.
- Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes are returned in arbitrary but consistent order.
The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE
macros provide convenient ways to iterate over all the nodes
static inline struct hmap_node *
hmap_first_with_hash (const struct hmap *map, size_t hash)
{
- return hmap_find_hash__ (map->buckets[hash & map->mask], hash);
+ return hmap_find_hash__ (map->buckets[hash >> map->shift], hash);
}
/* Returns the next node in MAP after NODE that has the same hash
constant value, its runtime degenerates to linear in the
length of NODE's hash chain.)
- Nodes are returned in arbitrary order that may change whenever
- the hash table's current capacity changes, as reported by
- hmap_capacity(). Calls to hmap_insert(), hmap_reserve(), and
- hmap_shrink() can change the capacity of a hash map.
- Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes are returned in arbitrary but consistent order.
The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE
macros provide convenient ways to iterate over all the nodes
return hmap_find_hash__ (node->next, node->hash);
}
-/* Inserts NODE into MAP with hash value HASH. If the insertion
- causes MAP's current capacity, as reported by hmap_capacity(),
- to be exceeded, rehashes MAP with an increased number of hash
- buckets.
-
- This function runs in constant time amortized over all the
- insertions into MAP.
-
- This function does not verify that MAP does not already
- contain a data item with the same value as NODE. If
- duplicates should be disallowed (which is the usual case),
- then the client must check for duplicates itself before
- inserting the new node. */
-static inline void
-hmap_insert (struct hmap *map, struct hmap_node *node, size_t hash)
-{
- hmap_insert_fast (map, node, hash);
- if (map->count > hmap_capacity (map))
- hmap_reserve (map, map->count);
-}
-
-/* Inserts NODE into MAP with hash value HASH. Does not check
- whether this causes MAP's current capacity to be exceeded.
- The caller must take responsibility for that (or use
- hmap_insert() instead).
-
- This function runs in constant time.
-
- This function does not verify that MAP does not already
- contain a data item with the same value as NODE. If
- duplicates should be disallowed (which is the usual case),
- then the client must check for duplicates itself before
- inserting the new node. */
-static inline void
-hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash)
-{
- struct hmap_node **bucket = &map->buckets[hash & map->mask];
- node->hash = hash;
- node->next = *bucket;
- *bucket = node;
- map->count++;
-}
-
/* Returns the first node in MAP in the bucket for HASH, or a null pointer if
that bucket in HASH is empty.
This function runs in constant time.
- Nodes are returned in arbitrary order that may change whenever the hash
- table's current capacity changes, as reported by hmap_capacity(). Calls to
- hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of
- a hash map. Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes in a bucket are sorted in increasing order of hash value. Nodes with
+ equal hashes are sorted in an arbitrary but consistent order.
The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide
convenient ways to iterate over all the nodes with a given hash. The
static inline struct hmap_node *
hmap_first_in_bucket (const struct hmap *map, size_t hash)
{
- return map->buckets[hash & map->mask];
+ return map->buckets[hash >> map->shift];
}
/* Returns the next node following NODE within the same bucket, or a null
This function runs in constant time.
- Nodes are returned in arbitrary order that may change whenever the hash
- table's current capacity changes, as reported by hmap_capacity(). Calls to
- hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of
- a hash map. Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes in a bucket are sorted in increasing order of hash value. Nodes with
+ equal hashes are sorted in an arbitrary but consistent order.
The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide
convenient ways to iterate over all the nodes with a given hash. The
static inline void
hmap_delete (struct hmap *map, struct hmap_node *node)
{
- struct hmap_node **bucket = &map->buckets[node->hash & map->mask];
+ struct hmap_node **bucket = &map->buckets[node->hash >> map->shift];
while (*bucket != node)
bucket = &(*bucket)->next;
*bucket = (*bucket)->next;
static inline struct hmap_node *
hmap_next (const struct hmap *map, const struct hmap_node *node)
{
- return (node->next != NULL
- ? node->next
- : hmap_first_nonempty_bucket__ (map, (node->hash & map->mask) + 1));
+ if (node->next != NULL)
+ return node->next;
+ else
+ {
+ size_t bucket_idx = node->hash >> map->shift;
+ return hmap_first_nonempty_bucket__ (map, bucket_idx + 1);
+ }
}
/* Returns true if MAP currently contains no data items, false
static inline size_t
hmap_capacity (const struct hmap *map)
{
- return hmap_mask_to_capacity__ (map->mask);
+ return hmap_shift_to_capacity__ (map->shift);
+}
+
+/* Returns the number of buckets in MAP. */
+static inline size_t
+hmap_n_buckets (const struct hmap *map)
+{
+ return (SIZE_MAX >> map->shift) + 1;
}
\f
/* Implementation details. */
/* Returns the first node at or after NODE in NODE's chain that
has hash value HASH. */
static inline struct hmap_node *
-hmap_find_hash__ (struct hmap_node *node, size_t hash)
+hmap_find_hash__ (struct hmap_node *node, size_t hash)
{
for (; node != NULL; node = node->next)
if (node->hash == hash)
{
size_t i;
- for (i = start; i <= map->mask; i++)
+ for (i = start; i <= SIZE_MAX >> map->shift; i++)
if (map->buckets[i] != NULL)
return map->buckets[i];
return NULL;
}
-/* Returns the hash table capacity associated with a given MASK,
- which should be a value for the "mask" member of struct hmap.
- MASK must be a power of 2 minus 1 (including 0), that is, its
- value in binary must be all 1-bits. */
+/* Returns the hash table capacity associated with a given SHIFT, which should
+ be a value for the "shift" member of struct hmap. SHIFT must be between 2
+ and HMAP_MAX_SHIFT, inclusive. */
static inline size_t
-hmap_mask_to_capacity__ (size_t mask)
+hmap_shift_to_capacity__ (int shift)
{
- return (mask + 1) * 2;
+ return ((size_t) 1 << HMAP_MAX_SHIFT) >> (shift - 2);
}
/* Helper for HMAP_NULLABLE_DATA (to avoid evaluating its NODE