1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Hash table with separate chaining.
19 This header (hmapx.h) supplies an "external" implementation of
20 a hash table that uses linked lists to resolve collisions
21 ("separate chaining"). Its companion header (hmap.h) supplies
22 a "embedded" implementation that is otherwise similar. The
23 two variants are described briefly here. The external
24 variant, for which this is the header, is described in
25 slightly more detail below. Each function also has a detailed
26 usage comment at its point of definition. (Many of those
27 definitions are inline in this file, because they are so
28 simple. Others are in hmapx.c.)
30 The "hmap" embedded hash table implementation puts the hash
31 table node (which includes the linked list used for resolving
32 collisions) within the data structure that the hash table
33 contains. This makes allocation efficient, in space and time,
34 because no additional call into an allocator is needed to
35 obtain memory for the hash table node. It also makes it easy
36 to find the hash table node associated with a given object.
37 However, it's difficult to include a given object in an
38 arbitrary number of hash tables.
40 The "hmapx" external hash table implementation allocates hash
41 table nodes separately from the objects in the hash table.
42 Inserting and removing hash table elements requires dynamic
43 allocation, so it is normally slower and takes more memory
44 than the embedded implementation. It also requires searching
45 the table to find the node associated with a given object.
46 However, it's easy to include a given object in an arbitrary
47 number of hash tables. It's also possible to create an
48 external hash table without adding a member to the data
49 structure that the hash table contains. */
51 #ifndef LIBPSPP_HMAPX_H
52 #define LIBPSPP_HMAPX_H 1
54 /* External hash table with separate chaining.
56 To create an external hash table, declare an instance of
57 struct hmapx, then initialize it with hmapx_init():
61 struct hmapx map = HMAPX_INITIALIZER (map);
63 An hmapx data structure contains data represented as void *.
64 The hmapx_insert() function inserts such a datum and returns
65 the address of a newly created struct hmapx_node that
66 represents the new element:
71 struct foo foo = {"key", "value"};
72 struct hmapx_node *node;
73 node = hmapx_insert (&map, &foo, hsh_hash_string (foo.key));
74 The element's hash value must be passed as one of
75 hmapx_insert()'s arguments. The hash table saves this hash
76 value for use later to speed searches and to rehash as the
79 hmapx_insert() does not check whether the newly inserted
80 element duplicates an element already in the hash table. The
81 client is responsible for doing so, if this is desirable.
83 Use hmapx_delete() to delete an element from the hash table,
84 passing in its hmapx_node:
85 hmapx_delete (&map, node);
86 Deleting an element frees its node.
88 The hash table does not provide a direct way to search for an
89 existing element. Instead, it provides the means to iterate
90 over all the elements in the hash table with a given hash
91 value. It is easy to compose a search function from such a
92 building block. For example:
94 find_node (const struct hmapx *map, const char *target)
96 struct hmapx_node *node;
98 HMAPX_FOR_EACH_WITH_HASH (foo, node, hsh_hash_string (target), map)
99 if (!strcmp (foo->key, target))
103 This function's client can extract the data item from the
104 returned hmapx_node using the hmapx_node_data() function. The
105 hmapx_node can also be useful directly as an argument to other
106 hmapx functions, such as hmapx_delete().
108 Here is how to iterate through the elements currently in the
110 struct hmapx_node *node;
112 HMAPX_FOR_EACH (data, node, &map)
114 ...do something with string...
118 #include <libpspp/hmap.h>
121 /* Hash table node. */
124 struct hmap_node hmap_node; /* Underlying hash node. */
125 void *data; /* User data. */
128 static inline void *hmapx_node_data (const struct hmapx_node *);
129 static inline size_t hmapx_node_hash (const struct hmapx_node *);
137 /* Suitable for use as the initializer for a struct hmapx named
139 struct hmap map = HMAPX_INITIALIZER (map);
140 HMAPX_INITIALIZER() is an alternative to hmapx_init(). */
141 #define HMAPX_INITIALIZER(MAP) { HMAP_INITIALIZER (MAP.hmap) }
143 /* Creation and destruction. */
144 static inline void hmapx_init (struct hmapx *);
145 static inline void hmapx_swap (struct hmapx *, struct hmapx *);
146 void hmapx_destroy (struct hmapx *);
148 /* Storage management. */
149 static inline void hmapx_reserve (struct hmapx *, size_t capacity);
150 static inline void hmapx_shrink (struct hmapx *);
153 static inline struct hmapx_node *hmapx_first_with_hash (struct hmapx *,
155 static inline struct hmapx_node *hmapx_next_with_hash (struct hmapx_node *);
157 /* Insertion and deletion. */
158 struct hmapx_node *hmapx_insert (struct hmapx *, void *, size_t hash);
159 struct hmapx_node *hmapx_insert_fast (struct hmapx *, void *, size_t hash);
160 static inline void hmapx_delete (struct hmapx *, struct hmapx_node *);
163 static inline struct hmapx_node *hmapx_first (const struct hmapx *);
164 static inline struct hmapx_node *hmapx_next (const struct hmapx *,
165 const struct hmapx_node *);
168 static inline size_t hmapx_count (const struct hmapx *);
169 static inline size_t hmapx_capacity (const struct hmapx *);
171 /* Updating data elements. */
172 static inline void hmapx_change (struct hmapx *,
173 struct hmapx_node *, void *, size_t new_hash);
174 static inline void hmapx_changed (struct hmapx *, struct hmapx_node *,
176 static inline void hmapx_move (struct hmapx_node *, void *);
178 /* Convenience macros for search.
180 These macros automatically use hmapx_node_data() to obtain the
181 data elements that encapsulate hmap nodes, which often saves
182 typing and can make code easier to read. Refer to the large
183 comment near the top of this file for an example.
185 These macros evaluate HASH only once. They evaluate their
186 other arguments many times. */
187 #define HMAPX_FOR_EACH_WITH_HASH(DATA, NODE, HASH, HMAPX) \
188 for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \
189 (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \
190 (NODE) = hmapx_next_with_hash (NODE))
191 #define HMAPX_FOR_EACH_WITH_HASH_SAFE(DATA, NODE, NEXT, HASH, HMAPX) \
192 for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \
194 ? ((DATA) = hmapx_node_data (NODE), \
195 (NEXT) = hmapx_next_with_hash (NODE), \
200 /* Convenience macros for iteration.
202 These macros automatically use hmapx_node_data() to obtain the
203 data elements that encapsulate hmap nodes, which often saves
204 typing and can make code easier to read. Refer to the large
205 comment near the top of this file for an example.
207 These macros evaluate their arguments many times. */
208 #define HMAPX_FOR_EACH(DATA, NODE, HMAPX) \
209 for ((NODE) = hmapx_first (HMAPX); \
210 (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \
211 (NODE) = hmapx_next (HMAPX, NODE))
212 #define HMAPX_FOR_EACH_SAFE(DATA, NODE, NEXT, HMAPX) \
213 for ((NODE) = hmapx_first (HMAPX); \
215 ? ((DATA) = hmapx_node_data (NODE), \
216 (NEXT) = hmapx_next (HMAPX, NODE), \
221 /* Inline definitions. */
223 /* Returns the data stored in NODE. */
225 hmapx_node_data (const struct hmapx_node *node)
230 /* Returns the hash value stored in NODE */
232 hmapx_node_hash (const struct hmapx_node *node)
234 return hmap_node_hash (&node->hmap_node);
237 /* Initializes MAP as a new hash map that is initially empty. */
239 hmapx_init (struct hmapx *map)
241 hmap_init (&map->hmap);
244 /* Exchanges the contents of hash maps A and B. */
246 hmapx_swap (struct hmapx *a, struct hmapx *b)
248 hmap_swap (&a->hmap, &b->hmap);
251 /* Ensures that MAP has sufficient space to store at least
252 CAPACITY data elements, allocating a new set of buckets and
253 rehashing if necessary. */
255 hmapx_reserve (struct hmapx *map, size_t capacity)
257 hmap_reserve (&map->hmap, capacity);
260 /* Shrinks MAP's set of buckets to the minimum number needed to
261 store its current number of elements, allocating a new set of
262 buckets and rehashing if that would save space. */
264 hmapx_shrink (struct hmapx *map)
266 hmap_shrink (&map->hmap);
269 /* Returns the first node in MAP that has hash value HASH, or a
270 null pointer if MAP does not contain any node with that hash
273 Assuming uniform hashing and no duplicate data items in MAP,
274 this function runs in constant time. (Amortized over an
275 iteration over all data items with a given HASH, its runtime
276 is proportional to the length of the hash chain for HASH, so
277 given a pathological hash function, e.g. one that returns a
278 constant value, its runtime degenerates to linear in the
279 length of NODE's hash chain.)
281 Nodes are returned in arbitrary order that may change whenever
282 the hash table's current capacity changes, as reported by
283 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
284 and hmapx_shrink() can change the capacity of a hash map.
285 Inserting a node with hmapx_insert_fast() or deleting one with
286 hmapx_delete() will not change the relative ordering of nodes.
288 The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
289 macros provide convenient ways to iterate over all the nodes
290 with a given hash. */
291 static inline struct hmapx_node *
292 hmapx_first_with_hash (struct hmapx *map, size_t hash)
294 return HMAP_FIRST_WITH_HASH (struct hmapx_node, hmap_node, &map->hmap, hash);
297 /* Returns the next node in MAP after NODE that has the same hash
298 value as NODE, or a null pointer if MAP does not contain any
299 more nodes with that hash value.
301 Assuming uniform hashing and no duplicate data items in MAP,
302 this function runs in constant time. (Amortized over an
303 iteration over all data items with a given HASH, its runtime
304 is proportional to the length of the hash chain for HASH, so
305 given a pathological hash function, e.g. one that returns a
306 constant value, its runtime degenerates to linear in the
307 length of NODE's hash chain.)
309 Nodes are returned in arbitrary order that may change whenever
310 the hash table's current capacity changes, as reported by
311 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
312 and hmapx_shrink() can change the capacity of a hash map.
313 Inserting a node with hmapx_insert_fast() or deleting one with
314 hmapx_delete() will not change the relative ordering of nodes.
316 The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
317 macros provide convenient ways to iterate over all the nodes
318 with a given hash. */
319 static inline struct hmapx_node *
320 hmapx_next_with_hash (struct hmapx_node *node)
322 return HMAP_NEXT_WITH_HASH (node, struct hmapx_node, hmap_node);
325 /* Removes NODE from MAP and frees NODE. The client is
326 responsible for freeing the user data associated with NODE, if
329 Assuming uniform hashing, this function runs in constant time.
330 (Its runtime is proportional to the position of NODE in its
331 hash chain, so given a pathological hash function, e.g. one
332 that returns a constant value, its runtime degenerates to
333 linear in the length of NODE's hash chain.)
335 This function never reduces the number of buckets in MAP.
336 When one deletes a large number of nodes from a hash table,
337 calling hmapx_shrink() afterward may therefore save a small
338 amount of memory. It is also more expensive to iterate
339 through a very sparse hash table than a denser one, so
340 shrinking the hash table could also save some time. However,
341 rehashing has an immediate cost that must be weighed against
344 hmapx_delete() does not change NODE's hash value reported by
345 hmapx_node_hash(). */
347 hmapx_delete (struct hmapx *map, struct hmapx_node *node)
349 hmap_delete (&map->hmap, &node->hmap_node);
353 /* Returns the first node in MAP, or a null pointer if MAP is
356 Amortized over iterating through every data element in MAP,
357 this function runs in constant time. However, this assumes
358 that MAP is not excessively sparse, that is, that
359 hmapx_capacity(MAP) is at most a constant factor greater than
360 hmapx_count(MAP). This will always be true unless many nodes
361 have been inserted into MAP and then most or all of them
362 deleted; in such a case, calling hmapx_shrink() is advised.
364 Nodes are returned in arbitrary order that may change whenever
365 the hash table's current capacity changes, as reported by
366 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
367 and hmapx_shrink() can change the capacity of a hash map.
368 Inserting a node with hmapx_insert_fast() or deleting one with
369 hmapx_delete() will not change the relative ordering of nodes.
371 The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
372 convenient ways to iterate over all the nodes in a hash
374 static inline struct hmapx_node *
375 hmapx_first (const struct hmapx *map)
377 return HMAP_FIRST (struct hmapx_node, hmap_node, &map->hmap);
380 /* Returns the next node in MAP following NODE, or a null pointer
381 if NODE is the last node in MAP.
383 Amortized over iterating through every data element in MAP,
384 this function runs in constant time. However, this assumes
385 that MAP is not excessively sparse, that is, that
386 hmapx_capacity(MAP) is at most a constant factor greater than
387 hmapx_count(MAP). This will always be true unless many nodes
388 have been inserted into MAP and then most or all of them
389 deleted; in such a case, calling hmapx_shrink() is advised.
391 Nodes are returned in arbitrary order that may change whenever
392 the hash table's current capacity changes, as reported by
393 hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(),
394 and hmapx_shrink() can change the capacity of a hash map.
395 Inserting a node with hmapx_insert_fast() or deleting one with
396 hmapx_delete() will not change the relative ordering of nodes.
398 The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
399 convenient ways to iterate over all the nodes in a hash
401 static inline struct hmapx_node *
402 hmapx_next (const struct hmapx *map, const struct hmapx_node *node)
404 return HMAP_NEXT (node, struct hmapx_node, hmap_node, &map->hmap);
407 /* Returns the number of data items currently in MAP. */
409 hmapx_count (const struct hmapx *map)
411 return hmap_count (&map->hmap);
414 /* Returns the current capacity of MAP, that is, the maximum
415 number of data elements that MAP may hold before it becomes
418 The capacity is advisory only: it is possible to insert any
419 number of data elements into a hash map regardless of its
420 capacity. However, inserting many more elements than the
421 map's capacity will degrade search performance. */
423 hmapx_capacity (const struct hmapx *map)
425 return hmap_capacity (&map->hmap);
428 /* Changes NODE's data to DATA and its hash value to NEW_HASH.
429 NODE must reside in MAP.
431 This function does not verify that MAP does not already
432 contain a data item that duplicates DATA. If duplicates
433 should be disallowed (which is the usual case), then the
434 client must check for duplicates before changing NODE's
437 hmapx_change (struct hmapx *map,
438 struct hmapx_node *node, void *data, size_t new_hash)
440 hmapx_move (node, data);
441 hmapx_changed (map, node, new_hash);
444 /* Moves NODE around in MAP to compensate for its hash value
445 having changed to NEW_HASH.
447 This function does not verify that MAP does not already
448 contain a data item that duplicates the new value of NODE's
449 data. If duplicates should be disallowed (which is the usual
450 case), then the client must check for duplicates before
451 changing NODE's value. */
453 hmapx_changed (struct hmapx *map, struct hmapx_node *node, size_t new_hash)
455 hmap_changed (&map->hmap, &node->hmap_node, new_hash);
458 /* Updates NODE to compensate for its data item having moved
459 around in memory to new location DATA. The data item's value
460 and hash value should not have changed. (If they have
461 changed, call hmapx_change() instead.) */
463 hmapx_move (struct hmapx_node *node, void *data)
468 #endif /* libpspp/hmapx.h */