/* PSPP - a program for statistical analysis.
- Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include "libpspp/cast.h"
/* Returns the data structure corresponding to the given NODE,
/* Hash table. */
struct hmap
{
- size_t count; /* Number of inserted nodes. */
- size_t mask; /* Number of buckets (power of 2), minus 1. */
struct hmap_node **buckets; /* Array of buckets. */
- struct hmap_node *one; /* One bucket, to eliminate corner cases. */
+ struct hmap_node *two[2]; /* Two buckets, to eliminate corner cases. */
+ size_t count; /* Number of inserted nodes. */
+ int shift; /* Bits to shift hash to get bucket index. */
};
+/* Maximum number of bits that a "size_t" value can be shifted in a single
+ operation without yielding undefined behavior. */
+#if SIZE_MAX == UINT32_MAX
+#define HMAP_MAX_SHIFT 31
+#elif SIZE_MAX == UINT64_MAX
+#define HMAP_MAX_SHIFT 63
+#else
+#error "size_t is not 32 or 64 bits"
+#endif
+
/* Suitable for use as the initializer for a struct hmap named
MAP. Typical usage:
struct hmap map = HMAP_INITIALIZER (map);
HMAP_INITIALIZER() is an alternative to hmap_init(). */
-#define HMAP_INITIALIZER(MAP) { 0, 0, &(MAP).one, NULL }
+#define HMAP_INITIALIZER(MAP) { (MAP).two, { NULL, NULL }, 0, HMAP_MAX_SHIFT }
/* Creation and destruction. */
void hmap_init (struct hmap *);
static inline struct hmap_node *hmap_next_with_hash (const struct hmap_node *);
/* Insertion and deletion. */
-static inline void hmap_insert (struct hmap *, struct hmap_node *,
- size_t hash);
-static inline void hmap_insert_fast (struct hmap *, struct hmap_node *,
- size_t hash);
+void hmap_insert (struct hmap *, struct hmap_node *, size_t hash);
+void hmap_insert_fast (struct hmap *, struct hmap_node *, size_t hash);
static inline void hmap_delete (struct hmap *, struct hmap_node *);
/* Iteration. */
static inline struct hmap_node *hmap_find_hash__ (struct hmap_node *, size_t);
static inline struct hmap_node *hmap_first_nonempty_bucket__ (
const struct hmap *, size_t start);
-static inline size_t hmap_mask_to_capacity__ (size_t mask);
+static inline size_t hmap_shift_to_capacity__ (int shift);
/* Returns the hash value associated with NODE. */
static inline size_t
constant value, its runtime degenerates to linear in the
length of NODE's hash chain.)
- Nodes are returned in arbitrary order that may change whenever
- the hash table's current capacity changes, as reported by
- hmap_capacity(). Calls to hmap_insert(), hmap_reserve(), and
- hmap_shrink() can change the capacity of a hash map.
- Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes are returned in arbitrary but consistent order.
The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE
macros provide convenient ways to iterate over all the nodes
static inline struct hmap_node *
hmap_first_with_hash (const struct hmap *map, size_t hash)
{
- return hmap_find_hash__ (map->buckets[hash & map->mask], hash);
+ return hmap_find_hash__ (map->buckets[hash >> map->shift], hash);
}
/* Returns the next node in MAP after NODE that has the same hash
constant value, its runtime degenerates to linear in the
length of NODE's hash chain.)
- Nodes are returned in arbitrary order that may change whenever
- the hash table's current capacity changes, as reported by
- hmap_capacity(). Calls to hmap_insert(), hmap_reserve(), and
- hmap_shrink() can change the capacity of a hash map.
- Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes are returned in arbitrary but consistent order.
The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE
macros provide convenient ways to iterate over all the nodes
return hmap_find_hash__ (node->next, node->hash);
}
-/* Inserts NODE into MAP with hash value HASH. If the insertion
- causes MAP's current capacity, as reported by hmap_capacity(),
- to be exceeded, rehashes MAP with an increased number of hash
- buckets.
-
- This function runs in constant time amortized over all the
- insertions into MAP.
-
- This function does not verify that MAP does not already
- contain a data item with the same value as NODE. If
- duplicates should be disallowed (which is the usual case),
- then the client must check for duplicates itself before
- inserting the new node. */
-static inline void
-hmap_insert (struct hmap *map, struct hmap_node *node, size_t hash)
-{
- hmap_insert_fast (map, node, hash);
- if (map->count > hmap_capacity (map))
- hmap_reserve (map, map->count);
-}
-
-/* Inserts NODE into MAP with hash value HASH. Does not check
- whether this causes MAP's current capacity to be exceeded.
- The caller must take responsibility for that (or use
- hmap_insert() instead).
-
- This function runs in constant time.
-
- This function does not verify that MAP does not already
- contain a data item with the same value as NODE. If
- duplicates should be disallowed (which is the usual case),
- then the client must check for duplicates itself before
- inserting the new node. */
-static inline void
-hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash)
-{
- struct hmap_node **bucket = &map->buckets[hash & map->mask];
- node->hash = hash;
- node->next = *bucket;
- *bucket = node;
- map->count++;
-}
-
/* Returns the first node in MAP in the bucket for HASH, or a null pointer if
that bucket in HASH is empty.
This function runs in constant time.
- Nodes are returned in arbitrary order that may change whenever the hash
- table's current capacity changes, as reported by hmap_capacity(). Calls to
- hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of
- a hash map. Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes in a bucket are sorted in increasing order of hash value. Nodes with
+ equal hashes are sorted in an arbitrary but consistent order.
The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide
convenient ways to iterate over all the nodes with a given hash. The
static inline struct hmap_node *
hmap_first_in_bucket (const struct hmap *map, size_t hash)
{
- return map->buckets[hash & map->mask];
+ return map->buckets[hash >> map->shift];
}
/* Returns the next node following NODE within the same bucket, or a null
This function runs in constant time.
- Nodes are returned in arbitrary order that may change whenever the hash
- table's current capacity changes, as reported by hmap_capacity(). Calls to
- hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of
- a hash map. Inserting a node with hmap_insert_fast() or deleting one with
- hmap_delete() will not change the relative ordering of nodes.
+ Nodes in a bucket are sorted in increasing order of hash value. Nodes with
+ equal hashes are sorted in an arbitrary but consistent order.
The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide
convenient ways to iterate over all the nodes with a given hash. The
static inline void
hmap_delete (struct hmap *map, struct hmap_node *node)
{
- struct hmap_node **bucket = &map->buckets[node->hash & map->mask];
+ struct hmap_node **bucket = &map->buckets[node->hash >> map->shift];
while (*bucket != node)
bucket = &(*bucket)->next;
*bucket = (*bucket)->next;
static inline struct hmap_node *
hmap_next (const struct hmap *map, const struct hmap_node *node)
{
- return (node->next != NULL
- ? node->next
- : hmap_first_nonempty_bucket__ (map, (node->hash & map->mask) + 1));
+ if (node->next != NULL)
+ return node->next;
+ else
+ {
+ size_t bucket_idx = node->hash >> map->shift;
+ return hmap_first_nonempty_bucket__ (map, bucket_idx + 1);
+ }
}
/* Returns true if MAP currently contains no data items, false
static inline size_t
hmap_capacity (const struct hmap *map)
{
- return hmap_mask_to_capacity__ (map->mask);
+ return hmap_shift_to_capacity__ (map->shift);
+}
+
+/* Returns the number of buckets in MAP. */
+static inline size_t
+hmap_n_buckets (const struct hmap *map)
+{
+ return (SIZE_MAX >> map->shift) + 1;
}
\f
/* Implementation details. */
/* Returns the first node at or after NODE in NODE's chain that
has hash value HASH. */
static inline struct hmap_node *
-hmap_find_hash__ (struct hmap_node *node, size_t hash)
+hmap_find_hash__ (struct hmap_node *node, size_t hash)
{
for (; node != NULL; node = node->next)
if (node->hash == hash)
{
size_t i;
- for (i = start; i <= map->mask; i++)
+ for (i = start; i <= SIZE_MAX >> map->shift; i++)
if (map->buckets[i] != NULL)
return map->buckets[i];
return NULL;
}
-/* Returns the hash table capacity associated with a given MASK,
- which should be a value for the "mask" member of struct hmap.
- MASK must be a power of 2 minus 1 (including 0), that is, its
- value in binary must be all 1-bits. */
+/* Returns the hash table capacity associated with a given SHIFT, which should
+ be a value for the "shift" member of struct hmap. SHIFT must be between 2
+ and HMAP_MAX_SHIFT, inclusive. */
static inline size_t
-hmap_mask_to_capacity__ (size_t mask)
+hmap_shift_to_capacity__ (int shift)
{
- return (mask + 1) * 2;
+ return ((size_t) 1 << HMAP_MAX_SHIFT) >> (shift - 2);
}
/* Helper for HMAP_NULLABLE_DATA (to avoid evaluating its NODE