1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Hash table with separate chaining.
19 This header (hmapx.h) supplies an "external" implementation of
20 a hash table that uses linked lists to resolve collisions
21 ("separate chaining"). Its companion header (hmap.h) supplies
22 a "embedded" implementation that is otherwise similar. The
23 two variants are described briefly here. The external
24 variant, for which this is the header, is described in
25 slightly more detail below. Each function also has a detailed
26 usage comment at its point of definition. (Many of those
27 definitions are inline in this file, because they are so
28 simple. Others are in hmapx.c.)
30 The "hmap" embedded hash table implementation puts the hash
31 table node (which includes the linked list used for resolving
32 collisions) within the data structure that the hash table
33 contains. This makes allocation efficient, in space and time,
34 because no additional call into an allocator is needed to
35 obtain memory for the hash table node. It also makes it easy
36 to find the hash table node associated with a given object.
37 However, it's difficult to include a given object in an
38 arbitrary number of hash tables.
40 The "hmapx" external hash table implementation allocates hash
41 table nodes separately from the objects in the hash table.
42 Inserting and removing hash table elements requires dynamic
43 allocation, so it is normally slower and takes more memory
44 than the embedded implementation. It also requires searching
45 the table to find the node associated with a given object.
46 However, it's easy to include a given object in an arbitrary
47 number of hash tables. It's also possible to create an
48 external hash table without adding a member to the data
49 structure that the hash table contains. */
51 #ifndef LIBPSPP_HMAPX_H
52 #define LIBPSPP_HMAPX_H 1
54 /* External hash table with separate chaining.
56 To create an external hash table, declare an instance of
57 struct hmapx, then initialize it with hmapx_init():
61 struct hmapx map = HMAPX_INITIALIZER (map);
63 An hmapx data structure contains data represented as void *.
64 The hmapx_insert() function inserts such a datum and returns
65 the address of a newly created struct hmapx_node that
66 represents the new element:
71 struct foo foo = {"key", "value"};
72 struct hmapx_node *node;
73 node = hmapx_insert (&map, &foo, hsh_hash_string (foo.key));
74 The element's hash value must be passed as one of
75 hmapx_insert()'s arguments. The hash table saves this hash
76 value for use later to speed searches and to rehash as the
79 hmapx_insert() does not check whether the newly inserted
80 element duplicates an element already in the hash table. The
81 client is responsible for doing so, if this is desirable.
83 Use hmapx_delete() to delete an element from the hash table,
84 passing in its hmapx_node:
85 hmapx_delete (&map, node);
86 Deleting an element frees its node.
88 The hash table does not provide a direct way to search for an
89 existing element. Instead, it provides the means to iterate
90 over all the elements in the hash table with a given hash
91 value. It is easy to compose a search function from such a
92 building block. For example:
94 find_node (const struct hmapx *map, const char *target)
96 struct hmapx_node *node;
98 HMAPX_FOR_EACH_WITH_HASH (foo, node, hsh_hash_string (target), map)
99 if (!strcmp (foo->key, target))
103 This function's client can extract the data item from the
104 returned hmapx_node using the hmapx_node_data() function. The
105 hmapx_node can also be useful directly as an argument to other
106 hmapx functions, such as hmapx_delete().
108 Here is how to iterate through the elements currently in the
110 struct hmapx_node *node;
112 HMAPX_FOR_EACH (data, node, &map)
114 ...do something with string...
118 #include <libpspp/hmap.h>
121 /* Hash table node. */
124 struct hmap_node hmap_node; /* Underlying hash node. */
125 void *data; /* User data. */
128 static inline void *hmapx_node_data (const struct hmapx_node *);
129 static inline size_t hmapx_node_hash (const struct hmapx_node *);
137 /* Suitable for use as the initializer for a struct hmapx named
139 struct hmap map = HMAPX_INITIALIZER (map);
140 HMAPX_INITIALIZER() is an alternative to hmapx_init(). */
141 #define HMAPX_INITIALIZER(MAP) { HMAP_INITIALIZER (MAP.hmap) }
143 /* Creation and destruction. */
144 static inline void hmapx_init (struct hmapx *);
145 static inline void hmapx_swap (struct hmapx *, struct hmapx *);
146 void hmapx_clear (struct hmapx *);
147 void hmapx_destroy (struct hmapx *);
149 /* Storage management. */
150 static inline void hmapx_reserve (struct hmapx *, size_t capacity);
151 static inline void hmapx_shrink (struct hmapx *);
154 static inline struct hmapx_node *hmapx_first_with_hash (struct hmapx *,
156 static inline struct hmapx_node *hmapx_next_with_hash (struct hmapx_node *);
158 /* Insertion and deletion. */
159 struct hmapx_node *hmapx_insert (struct hmapx *, void *, size_t hash);
160 struct hmapx_node *hmapx_insert_fast (struct hmapx *, void *, size_t hash);
161 static inline void hmapx_delete (struct hmapx *, struct hmapx_node *);
164 static inline struct hmapx_node *hmapx_first (const struct hmapx *);
165 static inline struct hmapx_node *hmapx_next (const struct hmapx *,
166 const struct hmapx_node *);
169 static inline bool hmapx_is_empty (const struct hmapx *);
170 static inline size_t hmapx_count (const struct hmapx *);
171 static inline size_t hmapx_capacity (const struct hmapx *);
173 /* Updating data elements. */
174 static inline void hmapx_change (struct hmapx *,
175 struct hmapx_node *, void *, size_t new_hash);
176 static inline void hmapx_changed (struct hmapx *, struct hmapx_node *,
178 static inline void hmapx_move (struct hmapx_node *, void *);
180 /* Convenience macros for search.
182 These macros automatically use hmapx_node_data() to obtain the
183 data elements that encapsulate hmap nodes, which often saves
184 typing and can make code easier to read. Refer to the large
185 comment near the top of this file for an example.
187 These macros evaluate HASH only once. They evaluate their
188 other arguments many times. */
189 #define HMAPX_FOR_EACH_WITH_HASH(DATA, NODE, HASH, HMAPX) \
190 for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \
191 (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \
192 (NODE) = hmapx_next_with_hash (NODE))
193 #define HMAPX_FOR_EACH_WITH_HASH_SAFE(DATA, NODE, NEXT, HASH, HMAPX) \
194 for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \
196 ? ((DATA) = hmapx_node_data (NODE), \
197 (NEXT) = hmapx_next_with_hash (NODE), \
202 /* Convenience macros for iteration.
204 These macros automatically use hmapx_node_data() to obtain the
205 data elements that encapsulate hmap nodes, which often saves
206 typing and can make code easier to read. Refer to the large
207 comment near the top of this file for an example.
209 These macros evaluate their arguments many times. */
210 #define HMAPX_FOR_EACH(DATA, NODE, HMAPX) \
211 for ((NODE) = hmapx_first (HMAPX); \
212 (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \
213 (NODE) = hmapx_next (HMAPX, NODE))
214 #define HMAPX_FOR_EACH_SAFE(DATA, NODE, NEXT, HMAPX) \
215 for ((NODE) = hmapx_first (HMAPX); \
217 ? ((DATA) = hmapx_node_data (NODE), \
218 (NEXT) = hmapx_next (HMAPX, NODE), \
223 /* Inline definitions. */
225 /* Returns the data stored in NODE. */
227 hmapx_node_data (const struct hmapx_node *node)
232 /* Returns the hash value stored in NODE */
234 hmapx_node_hash (const struct hmapx_node *node)
236 return hmap_node_hash (&node->hmap_node);
239 /* Initializes MAP as a new hash map that is initially empty. */
241 hmapx_init (struct hmapx *map)
243 hmap_init (&map->hmap);
246 /* Exchanges the contents of hash maps A and B. */
248 hmapx_swap (struct hmapx *a, struct hmapx *b)
250 hmap_swap (&a->hmap, &b->hmap);
253 /* Ensures that MAP has sufficient space to store at least
254 CAPACITY data elements, allocating a new set of buckets and
255 rehashing if necessary. */
257 hmapx_reserve (struct hmapx *map, size_t capacity)
259 hmap_reserve (&map->hmap, capacity);
262 /* Shrinks MAP's set of buckets to the minimum number needed to
263 store its current number of elements, allocating a new set of
264 buckets and rehashing if that would save space. */
266 hmapx_shrink (struct hmapx *map)
268 hmap_shrink (&map->hmap);
271 /* Returns the first node in MAP that has hash value HASH, or a
272 null pointer if MAP does not contain any node with that hash
275 Assuming uniform hashing and no duplicate data items in MAP,
276 this function runs in constant time. (Amortized over an
277 iteration over all data items with a given HASH, its runtime
278 is proportional to the length of the hash chain for HASH, so
279 given a pathological hash function, e.g. one that returns a
280 constant value, its runtime degenerates to linear in the
281 length of NODE's hash chain.)
283 Nodes are returned in arbitrary order that may change whenever
284 the hash table's current capacity changes, as reported by
285 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
286 and hmapx_shrink() can change the capacity of a hash map.
287 Inserting a node with hmapx_insert_fast() or deleting one with
288 hmapx_delete() will not change the relative ordering of nodes.
290 The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
291 macros provide convenient ways to iterate over all the nodes
292 with a given hash. */
293 static inline struct hmapx_node *
294 hmapx_first_with_hash (struct hmapx *map, size_t hash)
296 return HMAP_FIRST_WITH_HASH (struct hmapx_node, hmap_node, &map->hmap, hash);
299 /* Returns the next node in MAP after NODE that has the same hash
300 value as NODE, or a null pointer if MAP does not contain any
301 more nodes with that hash value.
303 Assuming uniform hashing and no duplicate data items in MAP,
304 this function runs in constant time. (Amortized over an
305 iteration over all data items with a given HASH, its runtime
306 is proportional to the length of the hash chain for HASH, so
307 given a pathological hash function, e.g. one that returns a
308 constant value, its runtime degenerates to linear in the
309 length of NODE's hash chain.)
311 Nodes are returned in arbitrary order that may change whenever
312 the hash table's current capacity changes, as reported by
313 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
314 and hmapx_shrink() can change the capacity of a hash map.
315 Inserting a node with hmapx_insert_fast() or deleting one with
316 hmapx_delete() will not change the relative ordering of nodes.
318 The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
319 macros provide convenient ways to iterate over all the nodes
320 with a given hash. */
321 static inline struct hmapx_node *
322 hmapx_next_with_hash (struct hmapx_node *node)
324 return HMAP_NEXT_WITH_HASH (node, struct hmapx_node, hmap_node);
327 /* Removes NODE from MAP and frees NODE. The client is
328 responsible for freeing the user data associated with NODE, if
331 Assuming uniform hashing, this function runs in constant time.
332 (Its runtime is proportional to the position of NODE in its
333 hash chain, so given a pathological hash function, e.g. one
334 that returns a constant value, its runtime degenerates to
335 linear in the length of NODE's hash chain.)
337 This function never reduces the number of buckets in MAP.
338 When one deletes a large number of nodes from a hash table,
339 calling hmapx_shrink() afterward may therefore save a small
340 amount of memory. It is also more expensive to iterate
341 through a very sparse hash table than a denser one, so
342 shrinking the hash table could also save some time. However,
343 rehashing has an immediate cost that must be weighed against
346 hmapx_delete() does not change NODE's hash value reported by
347 hmapx_node_hash(). */
349 hmapx_delete (struct hmapx *map, struct hmapx_node *node)
351 hmap_delete (&map->hmap, &node->hmap_node);
355 /* Returns the first node in MAP, or a null pointer if MAP is
358 Amortized over iterating through every data element in MAP,
359 this function runs in constant time. However, this assumes
360 that MAP is not excessively sparse, that is, that
361 hmapx_capacity(MAP) is at most a constant factor greater than
362 hmapx_count(MAP). This will always be true unless many nodes
363 have been inserted into MAP and then most or all of them
364 deleted; in such a case, calling hmapx_shrink() is advised.
366 Nodes are returned in arbitrary order that may change whenever
367 the hash table's current capacity changes, as reported by
368 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
369 and hmapx_shrink() can change the capacity of a hash map.
370 Inserting a node with hmapx_insert_fast() or deleting one with
371 hmapx_delete() will not change the relative ordering of nodes.
373 The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
374 convenient ways to iterate over all the nodes in a hash
376 static inline struct hmapx_node *
377 hmapx_first (const struct hmapx *map)
379 return HMAP_FIRST (struct hmapx_node, hmap_node, &map->hmap);
382 /* Returns the next node in MAP following NODE, or a null pointer
383 if NODE is the last node in MAP.
385 Amortized over iterating through every data element in MAP,
386 this function runs in constant time. However, this assumes
387 that MAP is not excessively sparse, that is, that
388 hmapx_capacity(MAP) is at most a constant factor greater than
389 hmapx_count(MAP). This will always be true unless many nodes
390 have been inserted into MAP and then most or all of them
391 deleted; in such a case, calling hmapx_shrink() is advised.
393 Nodes are returned in arbitrary order that may change whenever
394 the hash table's current capacity changes, as reported by
395 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
396 and hmapx_shrink() can change the capacity of a hash map.
397 Inserting a node with hmapx_insert_fast() or deleting one with
398 hmapx_delete() will not change the relative ordering of nodes.
400 The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
401 convenient ways to iterate over all the nodes in a hash
403 static inline struct hmapx_node *
404 hmapx_next (const struct hmapx *map, const struct hmapx_node *node)
406 return HMAP_NEXT (node, struct hmapx_node, hmap_node, &map->hmap);
409 /* Returns true if MAP currently contains no data items, false
412 hmapx_is_empty (const struct hmapx *map)
414 return hmap_is_empty (&map->hmap);
417 /* Returns the number of data items currently in MAP. */
419 hmapx_count (const struct hmapx *map)
421 return hmap_count (&map->hmap);
424 /* Returns the current capacity of MAP, that is, the maximum
425 number of data elements that MAP may hold before it becomes
428 The capacity is advisory only: it is possible to insert any
429 number of data elements into a hash map regardless of its
430 capacity. However, inserting many more elements than the
431 map's capacity will degrade search performance. */
433 hmapx_capacity (const struct hmapx *map)
435 return hmap_capacity (&map->hmap);
438 /* Changes NODE's data to DATA and its hash value to NEW_HASH.
439 NODE must reside in MAP.
441 This function does not verify that MAP does not already
442 contain a data item that duplicates DATA. If duplicates
443 should be disallowed (which is the usual case), then the
444 client must check for duplicates before changing NODE's
447 hmapx_change (struct hmapx *map,
448 struct hmapx_node *node, void *data, size_t new_hash)
450 hmapx_move (node, data);
451 hmapx_changed (map, node, new_hash);
454 /* Moves NODE around in MAP to compensate for its hash value
455 having changed to NEW_HASH.
457 This function does not verify that MAP does not already
458 contain a data item that duplicates the new value of NODE's
459 data. If duplicates should be disallowed (which is the usual
460 case), then the client must check for duplicates before
461 changing NODE's value. */
463 hmapx_changed (struct hmapx *map, struct hmapx_node *node, size_t new_hash)
465 hmap_changed (&map->hmap, &node->hmap_node, new_hash);
468 /* Updates NODE to compensate for its data item having moved
469 around in memory to new location DATA. The data item's value
470 and hash value should not have changed. (If they have
471 changed, call hmapx_change() instead.) */
473 hmapx_move (struct hmapx_node *node, void *data)
478 #endif /* libpspp/hmapx.h */