X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fhmap.h;fp=src%2Flibpspp%2Fhmap.h;h=89cb7d6827316afb24bde0eb2f8b83d3394a1935;hb=38492a3d6a1b4400357fb18aa17ecef9d9066cbb;hp=3f2858a8a8ca9f2482629f3991c0728167f12e69;hpb=b9a86e50c9e9631c98fea4b56732de92044e880b;p=pspp diff --git a/src/libpspp/hmap.h b/src/libpspp/hmap.h index 3f2858a8a8..89cb7d6827 100644 --- a/src/libpspp/hmap.h +++ b/src/libpspp/hmap.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -117,6 +117,7 @@ #include #include +#include #include "libpspp/cast.h" /* Returns the data structure corresponding to the given NODE, @@ -143,17 +144,27 @@ static inline size_t hmap_node_hash (const struct hmap_node *); /* Hash table. */ struct hmap { - size_t count; /* Number of inserted nodes. */ - size_t mask; /* Number of buckets (power of 2), minus 1. */ struct hmap_node **buckets; /* Array of buckets. */ - struct hmap_node *one; /* One bucket, to eliminate corner cases. */ + struct hmap_node *two[2]; /* Two buckets, to eliminate corner cases. */ + size_t count; /* Number of inserted nodes. */ + int shift; /* Bits to shift hash to get bucket index. */ }; +/* Maximum number of bits that a "size_t" value can be shifted in a single + operation without yielding undefined behavior. */ +#if SIZE_MAX == UINT32_MAX +#define HMAP_MAX_SHIFT 31 +#elif SIZE_MAX == UINT64_MAX +#define HMAP_MAX_SHIFT 63 +#else +#error "size_t is not 32 or 64 bits" +#endif + /* Suitable for use as the initializer for a struct hmap named MAP. Typical usage: struct hmap map = HMAP_INITIALIZER (map); HMAP_INITIALIZER() is an alternative to hmap_init(). */ -#define HMAP_INITIALIZER(MAP) { 0, 0, &(MAP).one, NULL } +#define HMAP_INITIALIZER(MAP) { (MAP).two, { NULL, NULL }, 0, HMAP_MAX_SHIFT } /* Creation and destruction. */ void hmap_init (struct hmap *); @@ -172,10 +183,8 @@ static inline struct hmap_node *hmap_first_with_hash (const struct hmap *, static inline struct hmap_node *hmap_next_with_hash (const struct hmap_node *); /* Insertion and deletion. */ -static inline void hmap_insert (struct hmap *, struct hmap_node *, - size_t hash); -static inline void hmap_insert_fast (struct hmap *, struct hmap_node *, - size_t hash); +void hmap_insert (struct hmap *, struct hmap_node *, size_t hash); +void hmap_insert_fast (struct hmap *, struct hmap_node *, size_t hash); static inline void hmap_delete (struct hmap *, struct hmap_node *); /* Iteration. */ @@ -269,7 +278,7 @@ void hmap_moved (struct hmap *, static inline struct hmap_node *hmap_find_hash__ (struct hmap_node *, size_t); static inline struct hmap_node *hmap_first_nonempty_bucket__ ( const struct hmap *, size_t start); -static inline size_t hmap_mask_to_capacity__ (size_t mask); +static inline size_t hmap_shift_to_capacity__ (int shift); /* Returns the hash value associated with NODE. */ static inline size_t @@ -290,12 +299,7 @@ hmap_node_hash (const struct hmap_node *node) constant value, its runtime degenerates to linear in the length of NODE's hash chain.) - Nodes are returned in arbitrary order that may change whenever - the hash table's current capacity changes, as reported by - hmap_capacity(). Calls to hmap_insert(), hmap_reserve(), and - hmap_shrink() can change the capacity of a hash map. - Inserting a node with hmap_insert_fast() or deleting one with - hmap_delete() will not change the relative ordering of nodes. + Nodes are returned in arbitrary but consistent order. The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE macros provide convenient ways to iterate over all the nodes @@ -305,7 +309,7 @@ hmap_node_hash (const struct hmap_node *node) static inline struct hmap_node * hmap_first_with_hash (const struct hmap *map, size_t hash) { - return hmap_find_hash__ (map->buckets[hash & map->mask], hash); + return hmap_find_hash__ (map->buckets[hash >> map->shift], hash); } /* Returns the next node in MAP after NODE that has the same hash @@ -320,12 +324,7 @@ hmap_first_with_hash (const struct hmap *map, size_t hash) constant value, its runtime degenerates to linear in the length of NODE's hash chain.) - Nodes are returned in arbitrary order that may change whenever - the hash table's current capacity changes, as reported by - hmap_capacity(). Calls to hmap_insert(), hmap_reserve(), and - hmap_shrink() can change the capacity of a hash map. - Inserting a node with hmap_insert_fast() or deleting one with - hmap_delete() will not change the relative ordering of nodes. + Nodes are returned in arbitrary but consistent order. The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE macros provide convenient ways to iterate over all the nodes @@ -338,59 +337,13 @@ hmap_next_with_hash (const struct hmap_node *node) return hmap_find_hash__ (node->next, node->hash); } -/* Inserts NODE into MAP with hash value HASH. If the insertion - causes MAP's current capacity, as reported by hmap_capacity(), - to be exceeded, rehashes MAP with an increased number of hash - buckets. - - This function runs in constant time amortized over all the - insertions into MAP. - - This function does not verify that MAP does not already - contain a data item with the same value as NODE. If - duplicates should be disallowed (which is the usual case), - then the client must check for duplicates itself before - inserting the new node. */ -static inline void -hmap_insert (struct hmap *map, struct hmap_node *node, size_t hash) -{ - hmap_insert_fast (map, node, hash); - if (map->count > hmap_capacity (map)) - hmap_reserve (map, map->count); -} - -/* Inserts NODE into MAP with hash value HASH. Does not check - whether this causes MAP's current capacity to be exceeded. - The caller must take responsibility for that (or use - hmap_insert() instead). - - This function runs in constant time. - - This function does not verify that MAP does not already - contain a data item with the same value as NODE. If - duplicates should be disallowed (which is the usual case), - then the client must check for duplicates itself before - inserting the new node. */ -static inline void -hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash) -{ - struct hmap_node **bucket = &map->buckets[hash & map->mask]; - node->hash = hash; - node->next = *bucket; - *bucket = node; - map->count++; -} - /* Returns the first node in MAP in the bucket for HASH, or a null pointer if that bucket in HASH is empty. This function runs in constant time. - Nodes are returned in arbitrary order that may change whenever the hash - table's current capacity changes, as reported by hmap_capacity(). Calls to - hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of - a hash map. Inserting a node with hmap_insert_fast() or deleting one with - hmap_delete() will not change the relative ordering of nodes. + Nodes in a bucket are sorted in increasing order of hash value. Nodes with + equal hashes are sorted in an arbitrary but consistent order. The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide convenient ways to iterate over all the nodes with a given hash. The @@ -399,7 +352,7 @@ hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash) static inline struct hmap_node * hmap_first_in_bucket (const struct hmap *map, size_t hash) { - return map->buckets[hash & map->mask]; + return map->buckets[hash >> map->shift]; } /* Returns the next node following NODE within the same bucket, or a null @@ -407,11 +360,8 @@ hmap_first_in_bucket (const struct hmap *map, size_t hash) This function runs in constant time. - Nodes are returned in arbitrary order that may change whenever the hash - table's current capacity changes, as reported by hmap_capacity(). Calls to - hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of - a hash map. Inserting a node with hmap_insert_fast() or deleting one with - hmap_delete() will not change the relative ordering of nodes. + Nodes in a bucket are sorted in increasing order of hash value. Nodes with + equal hashes are sorted in an arbitrary but consistent order. The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide convenient ways to iterate over all the nodes with a given hash. The @@ -446,7 +396,7 @@ hmap_next_in_bucket (const struct hmap_node *node) static inline void hmap_delete (struct hmap *map, struct hmap_node *node) { - struct hmap_node **bucket = &map->buckets[node->hash & map->mask]; + struct hmap_node **bucket = &map->buckets[node->hash >> map->shift]; while (*bucket != node) bucket = &(*bucket)->next; *bucket = (*bucket)->next; @@ -506,9 +456,13 @@ hmap_first (const struct hmap *map) static inline struct hmap_node * hmap_next (const struct hmap *map, const struct hmap_node *node) { - return (node->next != NULL - ? node->next - : hmap_first_nonempty_bucket__ (map, (node->hash & map->mask) + 1)); + if (node->next != NULL) + return node->next; + else + { + size_t bucket_idx = node->hash >> map->shift; + return hmap_first_nonempty_bucket__ (map, bucket_idx + 1); + } } /* Returns true if MAP currently contains no data items, false @@ -537,7 +491,14 @@ hmap_count (const struct hmap *map) static inline size_t hmap_capacity (const struct hmap *map) { - return hmap_mask_to_capacity__ (map->mask); + return hmap_shift_to_capacity__ (map->shift); +} + +/* Returns the number of buckets in MAP. */ +static inline size_t +hmap_n_buckets (const struct hmap *map) +{ + return (SIZE_MAX >> map->shift) + 1; } /* Implementation details. */ @@ -545,7 +506,7 @@ hmap_capacity (const struct hmap *map) /* Returns the first node at or after NODE in NODE's chain that has hash value HASH. */ static inline struct hmap_node * -hmap_find_hash__ (struct hmap_node *node, size_t hash) +hmap_find_hash__ (struct hmap_node *node, size_t hash) { for (; node != NULL; node = node->next) if (node->hash == hash) @@ -561,20 +522,19 @@ hmap_first_nonempty_bucket__ (const struct hmap *map, size_t start) { size_t i; - for (i = start; i <= map->mask; i++) + for (i = start; i <= SIZE_MAX >> map->shift; i++) if (map->buckets[i] != NULL) return map->buckets[i]; return NULL; } -/* Returns the hash table capacity associated with a given MASK, - which should be a value for the "mask" member of struct hmap. - MASK must be a power of 2 minus 1 (including 0), that is, its - value in binary must be all 1-bits. */ +/* Returns the hash table capacity associated with a given SHIFT, which should + be a value for the "shift" member of struct hmap. SHIFT must be between 2 + and HMAP_MAX_SHIFT, inclusive. */ static inline size_t -hmap_mask_to_capacity__ (size_t mask) +hmap_shift_to_capacity__ (int shift) { - return (mask + 1) * 2; + return ((size_t) 1 << HMAP_MAX_SHIFT) >> (shift - 2); } /* Helper for HMAP_NULLABLE_DATA (to avoid evaluating its NODE