1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "libpspp/hmap.h"
26 #include "gl/xalloc.h"
28 static int capacity_to_shift (size_t capacity);
30 /* Initializes MAP as a new hash map that is initially empty. */
32 hmap_init (struct hmap *map)
34 map->buckets = map->two;
35 map->two[0] = map->two[1] = NULL;
37 map->shift = HMAP_MAX_SHIFT;
40 /* Exchanges the contents of hash maps A and B. */
42 hmap_swap (struct hmap *a, struct hmap *b)
47 if (a->buckets == b->two)
49 if (b->buckets == a->two)
53 /* Removes all of the elements from MAP, without destroying MAP itself and
54 without accessing the existing elements (if any). */
56 hmap_clear (struct hmap *map)
60 for (i = 0; i <= SIZE_MAX >> map->shift; i++)
61 map->buckets[i] = NULL;
65 /* Frees the memory, if any, allocated by hash map MAP. This has
66 no effect on the actual data items in MAP, if any, because the
67 client is responsible for allocating and freeing them. It
68 could, however, render them inaccessible if the only pointers
69 to them were from MAP itself, so in such a situation one
70 should iterate through the map and free the data items before
73 hmap_destroy (struct hmap *map)
75 if (map != NULL && map->buckets != map->two)
79 /* Inserts NODE into MAP with hash value HASH. If the insertion causes MAP's
80 current capacity, as reported by hmap_capacity(), to be exceeded, rehashes
81 MAP with an increased number of hash buckets.
83 This function runs in constant time amortized over all the insertions into
86 This function does not verify that MAP does not already contain a data item
87 with the same value as NODE. If duplicates should be disallowed (which is
88 the usual case), then the client must check for duplicates itself before
89 inserting the new node. */
91 hmap_insert (struct hmap *map, struct hmap_node *node, size_t hash)
93 hmap_insert_fast (map, node, hash);
94 if (map->count > hmap_capacity (map))
95 hmap_reserve (map, map->count);
98 /* Inserts NODE into MAP with hash value HASH. Does not check whether this
99 causes MAP's current capacity to be exceeded. The caller must take
100 responsibility for that (or use hmap_insert() instead).
102 This function runs in time linear in the number of nodes already in the
103 bucket. Given a good hash function, this yields constant time on average.
105 This function does not verify that MAP does not already contain a data item
106 with the same value as NODE. If duplicates should be disallowed (which is
107 the usual case), then the client must check for duplicates itself before
108 inserting the new node. */
110 hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash)
112 struct hmap_node **bucket;
114 for (bucket = &map->buckets[hash >> map->shift]; *bucket != NULL;
115 bucket = &(*bucket)->next)
116 if ((*bucket)->hash > hash)
120 node->next = *bucket;
125 /* Reallocates MAP's hash buckets so that NEW_SHIFT becomes MAP->shift and
126 rehashes any data elements in MAP into the new hash buckets.
128 NEW_SHIFT must be between 2 and HMAP_MAX_SHIFT, inclusive. */
130 hmap_rehash (struct hmap *map, size_t new_shift)
132 struct hmap_node **new_buckets;
133 struct hmap_node *node, *next;
135 assert (new_shift != map->shift);
137 if (new_shift < HMAP_MAX_SHIFT)
138 new_buckets = xcalloc ((SIZE_MAX >> new_shift) + 1, sizeof *new_buckets);
141 new_buckets = map->two;
142 new_buckets[0] = new_buckets[1] = NULL;
147 for (node = hmap_first (map); node != NULL; node = next)
149 struct hmap_node **bucket;
151 next = hmap_next (map, node);
152 for (bucket = &new_buckets[node->hash >> new_shift]; *bucket != NULL;
153 bucket = &(*bucket)->next)
159 if (map->buckets != map->two)
161 map->buckets = new_buckets;
162 map->shift = new_shift;
165 /* Ensures that MAP has sufficient space to store at least
166 CAPACITY data elements, allocating a new set of buckets and
167 rehashing if necessary. */
169 hmap_reserve (struct hmap *map, size_t capacity)
171 if (capacity > hmap_capacity (map))
172 hmap_rehash (map, capacity_to_shift (capacity));
175 /* Shrinks MAP's set of buckets to the minimum number needed to
176 store its current number of elements, allocating a new set of
177 buckets and rehashing if that would save space. */
179 hmap_shrink (struct hmap *map)
181 size_t new_shift = capacity_to_shift (map->count);
182 if (new_shift > map->shift)
183 hmap_rehash (map, new_shift);
186 /* Moves NODE around in MAP to compensate for its hash value
187 having changed to NEW_HASH.
189 This function does not verify that MAP does not already
190 contain a data item that duplicates NODE's new value. If
191 duplicates should be disallowed (which is the usual case),
192 then the client must check for duplicates before changing
195 hmap_changed (struct hmap *map, struct hmap_node *node, size_t new_hash)
197 if (new_hash != node->hash)
199 hmap_delete (map, node);
200 hmap_insert_fast (map, node, new_hash);
204 /* Hash map nodes may be moved around in memory as necessary,
205 e.g. as the result of an realloc operation on a block that
206 contains a node. Once this is done, call this function
207 passing NODE that was moved, its former location in memory
208 OLD, and its hash map MAP before attempting any other
209 operation on MAP, NODE, or any other node in MAP.
211 It is not safe to move more than one node, then to call this
212 function for each node. Instead, move a single node, call
213 this function, move another node, and so on. Alternatively,
214 remove all affected nodes from the hash map, move them, then
215 re-insert all of them.
217 Assuming uniform hashing and no duplicate data items in MAP,
218 this function runs in constant time. */
220 hmap_moved (struct hmap *map,
221 struct hmap_node *node, const struct hmap_node *old)
223 struct hmap_node **p = &map->buckets[node->hash >> map->shift];
229 /* Returns the highest "shift" value that allow for a hash table capacity of at
230 least CAPACITY. The return value will be between 2 and HMAP_MAX_SHIFT,
233 capacity_to_shift (size_t capacity)
237 for (shift = HMAP_MAX_SHIFT; shift > 2; shift--)
238 if (hmap_shift_to_capacity__ (shift) >= capacity)