1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2008, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Hash table with separate chaining.
19 This header (hmapx.h) supplies an "external" implementation of
20 a hash table that uses linked lists to resolve collisions
21 ("separate chaining"). Its companion header (hmap.h) supplies
22 a "embedded" implementation that is otherwise similar. The
23 two variants are described briefly here. The external
24 variant, for which this is the header, is described in
25 slightly more detail below. Each function also has a detailed
26 usage comment at its point of definition. (Many of those
27 definitions are inline in this file, because they are so
28 simple. Others are in hmapx.c.)
30 The "hmap" embedded hash table implementation puts the hash
31 table node (which includes the linked list used for resolving
32 collisions) within the data structure that the hash table
33 contains. This makes allocation efficient, in space and time,
34 because no additional call into an allocator is needed to
35 obtain memory for the hash table node. It also makes it easy
36 to find the hash table node associated with a given object.
37 However, it's difficult to include a given object in an
38 arbitrary number of hash tables.
40 The "hmapx" external hash table implementation allocates hash
41 table nodes separately from the objects in the hash table.
42 Inserting and removing hash table elements requires dynamic
43 allocation, so it is normally slower and takes more memory
44 than the embedded implementation. It also requires searching
45 the table to find the node associated with a given object.
46 However, it's easy to include a given object in an arbitrary
47 number of hash tables. It's also possible to create an
48 external hash table without adding a member to the data
49 structure that the hash table contains. */
51 #ifndef LIBPSPP_HMAPX_H
52 #define LIBPSPP_HMAPX_H 1
54 /* External hash table with separate chaining.
56 To create an external hash table, declare an instance of
57 struct hmapx, then initialize it with hmapx_init():
61 struct hmapx map = HMAPX_INITIALIZER (map);
63 An hmapx data structure contains data represented as void *.
64 The hmapx_insert() function inserts such a datum and returns
65 the address of a newly created struct hmapx_node that
66 represents the new element:
71 struct foo foo = {"key", "value"};
72 struct hmapx_node *node;
73 node = hmapx_insert (&map, &foo, hsh_hash_string (foo.key));
74 The element's hash value must be passed as one of
75 hmapx_insert()'s arguments. The hash table saves this hash
76 value for use later to speed searches and to rehash as the
79 hmapx_insert() does not check whether the newly inserted
80 element duplicates an element already in the hash table. The
81 client is responsible for doing so, if this is desirable.
83 Use hmapx_delete() to delete an element from the hash table,
84 passing in its hmapx_node:
85 hmapx_delete (&map, node);
86 Deleting an element frees its node.
88 The hash table does not provide a direct way to search for an
89 existing element. Instead, it provides the means to iterate
90 over all the elements in the hash table with a given hash
91 value. It is easy to compose a search function from such a
92 building block. For example:
94 find_node (const struct hmapx *map, const char *target)
96 struct hmapx_node *node;
98 HMAPX_FOR_EACH_WITH_HASH (foo, node, hsh_hash_string (target), map)
99 if (!strcmp (foo->key, target))
103 This function's client can extract the data item from the
104 returned hmapx_node using the hmapx_node_data() function. The
105 hmapx_node can also be useful directly as an argument to other
106 hmapx functions, such as hmapx_delete().
108 Here is how to iterate through the elements currently in the
110 struct hmapx_node *node;
112 HMAPX_FOR_EACH (data, node, &map)
114 ...do something with string...
118 #include <libpspp/hmap.h>
121 /* Hash table node. */
124 struct hmap_node hmap_node; /* Underlying hash node. */
125 void *data; /* User data. */
128 static inline void *hmapx_node_data (const struct hmapx_node *);
129 static inline size_t hmapx_node_hash (const struct hmapx_node *);
137 /* Suitable for use as the initializer for a struct hmapx named
139 struct hmap map = HMAPX_INITIALIZER (map);
140 HMAPX_INITIALIZER() is an alternative to hmapx_init(). */
141 #define HMAPX_INITIALIZER(MAP) { HMAP_INITIALIZER (MAP.hmap) }
143 /* Creation and destruction. */
144 static inline void hmapx_init (struct hmapx *);
145 static inline void hmapx_swap (struct hmapx *, struct hmapx *);
146 void hmapx_destroy (struct hmapx *);
148 /* Storage management. */
149 static inline void hmapx_reserve (struct hmapx *, size_t capacity);
150 static inline void hmapx_shrink (struct hmapx *);
153 static inline struct hmapx_node *hmapx_first_with_hash (struct hmapx *,
155 static inline struct hmapx_node *hmapx_next_with_hash (struct hmapx_node *);
157 /* Insertion and deletion. */
158 struct hmapx_node *hmapx_insert (struct hmapx *, void *, size_t hash);
159 struct hmapx_node *hmapx_insert_fast (struct hmapx *, void *, size_t hash);
160 static inline void hmapx_delete (struct hmapx *, struct hmapx_node *);
163 static inline struct hmapx_node *hmapx_first (const struct hmapx *);
164 static inline struct hmapx_node *hmapx_next (const struct hmapx *,
165 const struct hmapx_node *);
168 static inline bool hmapx_is_empty (const struct hmapx *);
169 static inline size_t hmapx_count (const struct hmapx *);
170 static inline size_t hmapx_capacity (const struct hmapx *);
172 /* Updating data elements. */
173 static inline void hmapx_change (struct hmapx *,
174 struct hmapx_node *, void *, size_t new_hash);
175 static inline void hmapx_changed (struct hmapx *, struct hmapx_node *,
177 static inline void hmapx_move (struct hmapx_node *, void *);
179 /* Convenience macros for search.
181 These macros automatically use hmapx_node_data() to obtain the
182 data elements that encapsulate hmap nodes, which often saves
183 typing and can make code easier to read. Refer to the large
184 comment near the top of this file for an example.
186 These macros evaluate HASH only once. They evaluate their
187 other arguments many times. */
188 #define HMAPX_FOR_EACH_WITH_HASH(DATA, NODE, HASH, HMAPX) \
189 for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \
190 (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \
191 (NODE) = hmapx_next_with_hash (NODE))
192 #define HMAPX_FOR_EACH_WITH_HASH_SAFE(DATA, NODE, NEXT, HASH, HMAPX) \
193 for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \
195 ? ((DATA) = hmapx_node_data (NODE), \
196 (NEXT) = hmapx_next_with_hash (NODE), \
201 /* Convenience macros for iteration.
203 These macros automatically use hmapx_node_data() to obtain the
204 data elements that encapsulate hmap nodes, which often saves
205 typing and can make code easier to read. Refer to the large
206 comment near the top of this file for an example.
208 These macros evaluate their arguments many times. */
209 #define HMAPX_FOR_EACH(DATA, NODE, HMAPX) \
210 for ((NODE) = hmapx_first (HMAPX); \
211 (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \
212 (NODE) = hmapx_next (HMAPX, NODE))
213 #define HMAPX_FOR_EACH_SAFE(DATA, NODE, NEXT, HMAPX) \
214 for ((NODE) = hmapx_first (HMAPX); \
216 ? ((DATA) = hmapx_node_data (NODE), \
217 (NEXT) = hmapx_next (HMAPX, NODE), \
222 /* Inline definitions. */
224 /* Returns the data stored in NODE. */
226 hmapx_node_data (const struct hmapx_node *node)
231 /* Returns the hash value stored in NODE */
233 hmapx_node_hash (const struct hmapx_node *node)
235 return hmap_node_hash (&node->hmap_node);
238 /* Initializes MAP as a new hash map that is initially empty. */
240 hmapx_init (struct hmapx *map)
242 hmap_init (&map->hmap);
245 /* Exchanges the contents of hash maps A and B. */
247 hmapx_swap (struct hmapx *a, struct hmapx *b)
249 hmap_swap (&a->hmap, &b->hmap);
252 /* Ensures that MAP has sufficient space to store at least
253 CAPACITY data elements, allocating a new set of buckets and
254 rehashing if necessary. */
256 hmapx_reserve (struct hmapx *map, size_t capacity)
258 hmap_reserve (&map->hmap, capacity);
261 /* Shrinks MAP's set of buckets to the minimum number needed to
262 store its current number of elements, allocating a new set of
263 buckets and rehashing if that would save space. */
265 hmapx_shrink (struct hmapx *map)
267 hmap_shrink (&map->hmap);
270 /* Returns the first node in MAP that has hash value HASH, or a
271 null pointer if MAP does not contain any node with that hash
274 Assuming uniform hashing and no duplicate data items in MAP,
275 this function runs in constant time. (Amortized over an
276 iteration over all data items with a given HASH, its runtime
277 is proportional to the length of the hash chain for HASH, so
278 given a pathological hash function, e.g. one that returns a
279 constant value, its runtime degenerates to linear in the
280 length of NODE's hash chain.)
282 Nodes are returned in arbitrary order that may change whenever
283 the hash table's current capacity changes, as reported by
284 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
285 and hmapx_shrink() can change the capacity of a hash map.
286 Inserting a node with hmapx_insert_fast() or deleting one with
287 hmapx_delete() will not change the relative ordering of nodes.
289 The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
290 macros provide convenient ways to iterate over all the nodes
291 with a given hash. */
292 static inline struct hmapx_node *
293 hmapx_first_with_hash (struct hmapx *map, size_t hash)
295 return HMAP_FIRST_WITH_HASH (struct hmapx_node, hmap_node, &map->hmap, hash);
298 /* Returns the next node in MAP after NODE that has the same hash
299 value as NODE, or a null pointer if MAP does not contain any
300 more nodes with that hash value.
302 Assuming uniform hashing and no duplicate data items in MAP,
303 this function runs in constant time. (Amortized over an
304 iteration over all data items with a given HASH, its runtime
305 is proportional to the length of the hash chain for HASH, so
306 given a pathological hash function, e.g. one that returns a
307 constant value, its runtime degenerates to linear in the
308 length of NODE's hash chain.)
310 Nodes are returned in arbitrary order that may change whenever
311 the hash table's current capacity changes, as reported by
312 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
313 and hmapx_shrink() can change the capacity of a hash map.
314 Inserting a node with hmapx_insert_fast() or deleting one with
315 hmapx_delete() will not change the relative ordering of nodes.
317 The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
318 macros provide convenient ways to iterate over all the nodes
319 with a given hash. */
320 static inline struct hmapx_node *
321 hmapx_next_with_hash (struct hmapx_node *node)
323 return HMAP_NEXT_WITH_HASH (node, struct hmapx_node, hmap_node);
326 /* Removes NODE from MAP and frees NODE. The client is
327 responsible for freeing the user data associated with NODE, if
330 Assuming uniform hashing, this function runs in constant time.
331 (Its runtime is proportional to the position of NODE in its
332 hash chain, so given a pathological hash function, e.g. one
333 that returns a constant value, its runtime degenerates to
334 linear in the length of NODE's hash chain.)
336 This function never reduces the number of buckets in MAP.
337 When one deletes a large number of nodes from a hash table,
338 calling hmapx_shrink() afterward may therefore save a small
339 amount of memory. It is also more expensive to iterate
340 through a very sparse hash table than a denser one, so
341 shrinking the hash table could also save some time. However,
342 rehashing has an immediate cost that must be weighed against
345 hmapx_delete() does not change NODE's hash value reported by
346 hmapx_node_hash(). */
348 hmapx_delete (struct hmapx *map, struct hmapx_node *node)
350 hmap_delete (&map->hmap, &node->hmap_node);
354 /* Returns the first node in MAP, or a null pointer if MAP is
357 Amortized over iterating through every data element in MAP,
358 this function runs in constant time. However, this assumes
359 that MAP is not excessively sparse, that is, that
360 hmapx_capacity(MAP) is at most a constant factor greater than
361 hmapx_count(MAP). This will always be true unless many nodes
362 have been inserted into MAP and then most or all of them
363 deleted; in such a case, calling hmapx_shrink() is advised.
365 Nodes are returned in arbitrary order that may change whenever
366 the hash table's current capacity changes, as reported by
367 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
368 and hmapx_shrink() can change the capacity of a hash map.
369 Inserting a node with hmapx_insert_fast() or deleting one with
370 hmapx_delete() will not change the relative ordering of nodes.
372 The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
373 convenient ways to iterate over all the nodes in a hash
375 static inline struct hmapx_node *
376 hmapx_first (const struct hmapx *map)
378 return HMAP_FIRST (struct hmapx_node, hmap_node, &map->hmap);
381 /* Returns the next node in MAP following NODE, or a null pointer
382 if NODE is the last node in MAP.
384 Amortized over iterating through every data element in MAP,
385 this function runs in constant time. However, this assumes
386 that MAP is not excessively sparse, that is, that
387 hmapx_capacity(MAP) is at most a constant factor greater than
388 hmapx_count(MAP). This will always be true unless many nodes
389 have been inserted into MAP and then most or all of them
390 deleted; in such a case, calling hmapx_shrink() is advised.
392 Nodes are returned in arbitrary order that may change whenever
393 the hash table's current capacity changes, as reported by
394 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
395 and hmapx_shrink() can change the capacity of a hash map.
396 Inserting a node with hmapx_insert_fast() or deleting one with
397 hmapx_delete() will not change the relative ordering of nodes.
399 The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
400 convenient ways to iterate over all the nodes in a hash
402 static inline struct hmapx_node *
403 hmapx_next (const struct hmapx *map, const struct hmapx_node *node)
405 return HMAP_NEXT (node, struct hmapx_node, hmap_node, &map->hmap);
408 /* Returns true if MAP currently contains no data items, false
411 hmapx_is_empty (const struct hmapx *map)
413 return hmap_is_empty (&map->hmap);
416 /* Returns the number of data items currently in MAP. */
418 hmapx_count (const struct hmapx *map)
420 return hmap_count (&map->hmap);
423 /* Returns the current capacity of MAP, that is, the maximum
424 number of data elements that MAP may hold before it becomes
427 The capacity is advisory only: it is possible to insert any
428 number of data elements into a hash map regardless of its
429 capacity. However, inserting many more elements than the
430 map's capacity will degrade search performance. */
432 hmapx_capacity (const struct hmapx *map)
434 return hmap_capacity (&map->hmap);
437 /* Changes NODE's data to DATA and its hash value to NEW_HASH.
438 NODE must reside in MAP.
440 This function does not verify that MAP does not already
441 contain a data item that duplicates DATA. If duplicates
442 should be disallowed (which is the usual case), then the
443 client must check for duplicates before changing NODE's
446 hmapx_change (struct hmapx *map,
447 struct hmapx_node *node, void *data, size_t new_hash)
449 hmapx_move (node, data);
450 hmapx_changed (map, node, new_hash);
453 /* Moves NODE around in MAP to compensate for its hash value
454 having changed to NEW_HASH.
456 This function does not verify that MAP does not already
457 contain a data item that duplicates the new value of NODE's
458 data. If duplicates should be disallowed (which is the usual
459 case), then the client must check for duplicates before
460 changing NODE's value. */
462 hmapx_changed (struct hmapx *map, struct hmapx_node *node, size_t new_hash)
464 hmap_changed (&map->hmap, &node->hmap_node, new_hash);
467 /* Updates NODE to compensate for its data item having moved
468 around in memory to new location DATA. The data item's value
469 and hash value should not have changed. (If they have
470 changed, call hmapx_change() instead.) */
472 hmapx_move (struct hmapx_node *node, void *data)
477 #endif /* libpspp/hmapx.h */