2 * Copyright (c) 2009, 2010, 2011 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
13 #include <linux/genetlink.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
17 #include <asm/pgtable.h>
20 * struct tbl_bucket - single bucket within a hash table
21 * @rcu: RCU callback structure
22 * @n_objs: number of objects in @objs[] array
23 * @objs: array of @n_objs pointers to table nodes contained inside objects
25 * The expected number of objects per bucket is 1, but this allows for an
26 * arbitrary number of collisions.
31 struct tbl_node *objs[];
34 static struct tbl_bucket *get_bucket(struct tbl_bucket __rcu *bucket)
36 return rcu_dereference_check(bucket, rcu_read_lock_held() ||
37 lockdep_genl_is_held());
40 static struct tbl_bucket *get_bucket_protected(struct tbl_bucket __rcu *bucket)
42 return rcu_dereference_protected(bucket, lockdep_genl_is_held());
45 static inline int bucket_size(int n_objs)
47 return sizeof(struct tbl_bucket) + sizeof(struct tbl_node *) * n_objs;
50 static struct tbl_bucket *bucket_alloc(int n_objs)
52 return kmalloc(bucket_size(n_objs), GFP_KERNEL);
55 static void free_buckets(struct tbl_bucket __rcu ***l1,
56 unsigned int n_buckets,
57 void (*free_obj)(struct tbl_node *))
61 for (i = 0; i < n_buckets >> TBL_L1_SHIFT; i++) {
62 struct tbl_bucket __rcu **l2 = l1[i];
65 for (j = 0; j < TBL_L2_SIZE; j++) {
66 struct tbl_bucket *bucket = (struct tbl_bucket __force *)l2[j];
72 for (k = 0; k < bucket->n_objs; k++)
73 free_obj(bucket->objs[k]);
77 free_page((unsigned long)l2);
82 static struct tbl_bucket __rcu ***alloc_buckets(unsigned int n_buckets)
84 struct tbl_bucket __rcu ***l1;
87 l1 = kmalloc((n_buckets >> TBL_L1_SHIFT) * sizeof(struct tbl_bucket **),
91 for (i = 0; i < n_buckets >> TBL_L1_SHIFT; i++) {
92 l1[i] = (struct tbl_bucket __rcu **)get_zeroed_page(GFP_KERNEL);
94 free_buckets(l1, i << TBL_L1_SHIFT, NULL);
102 * tbl_create - create and return a new hash table
103 * @n_buckets: number of buckets in the new table
105 * Creates and returns a new hash table, or %NULL if memory cannot be
106 * allocated. @n_buckets must be a power of 2 in the range %TBL_MIN_BUCKETS to
109 struct tbl *tbl_create(unsigned int n_buckets)
113 table = kzalloc(sizeof(*table), GFP_KERNEL);
117 table->n_buckets = n_buckets;
118 table->buckets = alloc_buckets(n_buckets);
131 * tbl_destroy - destroy hash table and optionally the objects it contains
132 * @table: table to destroy
133 * @destructor: function to be called on objects at destruction time
135 * If a destructor is null, then the buckets in @table are destroyed
136 * but not the objects within those buckets. This behavior is useful when a
137 * table is being replaced by a larger or smaller one without destroying the
140 * If a destructor is not null, then it is called on the objects in @table
141 * before destroying the buckets.
143 void tbl_destroy(struct tbl *table, void (*destructor)(struct tbl_node *))
148 free_buckets(table->buckets, table->n_buckets, destructor);
152 static void destroy_table_rcu(struct rcu_head *rcu)
154 struct tbl *table = container_of(rcu, struct tbl, rcu);
155 tbl_destroy(table, table->obj_destructor);
159 * tbl_deferred_destroy - destroy table after a RCU grace period
160 * @table: table to destroy
161 * @destructor: function to be called on objects at destruction time
163 * Calls tbl_destroy() on @table after an RCU grace period. If @destructor is
164 * not null it is called on every element before the table is destroyed. */
165 void tbl_deferred_destroy(struct tbl *table, void (*destructor)(struct tbl_node *))
170 table->obj_destructor = destructor;
171 call_rcu(&table->rcu, destroy_table_rcu);
174 static struct tbl_bucket __rcu **find_bucket(struct tbl *table, u32 hash)
176 unsigned int l1 = (hash & (table->n_buckets - 1)) >> TBL_L1_SHIFT;
177 unsigned int l2 = hash & ((1 << TBL_L2_BITS) - 1);
178 return &table->buckets[l1][l2];
181 static int search_bucket(const struct tbl_bucket *bucket, void *target, int len, u32 hash,
182 int (*cmp)(const struct tbl_node *, void *, int len))
186 for (i = 0; i < bucket->n_objs; i++) {
187 struct tbl_node *obj = bucket->objs[i];
188 if (obj->hash == hash && likely(cmp(obj, target, len)))
196 * tbl_lookup - searches hash table for a matching object
197 * @table: hash table to search
198 * @target: identifier for the object that is being searched for, will be
199 * provided as an argument to @cmp when making comparisions
200 * @len: length of @target in bytes, will be provided as an argument to @cmp
201 * when making comparisons
202 * @hash: hash of @target
203 * @cmp: comparision function to match objects with the given hash, returns
204 * nonzero if the objects match, zero otherwise
206 * Searches @table for an object identified by @target. Returns the tbl_node
207 * contained in the object if successful, otherwise %NULL.
209 struct tbl_node *tbl_lookup(struct tbl *table, void *target, int len, u32 hash,
210 int (*cmp)(const struct tbl_node *, void *, int))
212 struct tbl_bucket __rcu **bucketp = find_bucket(table, hash);
213 struct tbl_bucket *bucket = get_bucket(*bucketp);
219 index = search_bucket(bucket, target, len, hash, cmp);
223 return bucket->objs[index];
227 * tbl_foreach - iterate through hash table
228 * @table: table to iterate
229 * @callback: function to call for each entry
230 * @aux: Extra data to pass to @callback
232 * Iterates through all of the objects in @table in hash order, passing each of
233 * them in turn to @callback. If @callback returns nonzero, this terminates
234 * the iteration and tbl_foreach() returns the same value. Returns 0 if
235 * @callback never returns nonzero.
237 * This function does not try to intelligently handle the case where @callback
238 * adds or removes flows in @table.
240 int tbl_foreach(struct tbl *table,
241 int (*callback)(struct tbl_node *, void *aux), void *aux)
243 unsigned int n_l1 = table->n_buckets >> TBL_L1_SHIFT;
246 for (l1_idx = 0; l1_idx < n_l1; l1_idx++) {
247 struct tbl_bucket __rcu **l2 = table->buckets[l1_idx];
250 for (l2_idx = 0; l2_idx < TBL_L2_SIZE; l2_idx++) {
251 struct tbl_bucket *bucket;
254 bucket = get_bucket(l2[l2_idx]);
258 for (i = 0; i < bucket->n_objs; i++) {
259 int error = (*callback)(bucket->objs[i], aux);
269 * tbl_next - find next node in hash table
270 * @table: table to iterate
271 * @bucketp: On entry, hash value of bucket to start from. On exit, updated
272 * to bucket to start from on next call.
273 * @objp: On entry, index to start from within first bucket. On exit, updated
274 * to index to start from on next call.
276 * Returns the next node in @table in hash order, or %NULL when no nodes remain
279 * On entry, uses the values that @bucketp and @objp reference to determine
280 * where to begin iteration. Use 0 for both values to begin a new iteration.
281 * On exit, stores the values to pass on the next iteration into @bucketp and
284 struct tbl_node *tbl_next(struct tbl *table, u32 *bucketp, u32 *objp)
286 unsigned int n_l1 = table->n_buckets >> TBL_L1_SHIFT;
287 u32 s_l1_idx = *bucketp >> TBL_L1_SHIFT;
288 u32 s_l2_idx = *bucketp & (TBL_L2_SIZE - 1);
292 for (l1_idx = s_l1_idx; l1_idx < n_l1; l1_idx++) {
293 struct tbl_bucket __rcu **l2 = table->buckets[l1_idx];
296 for (l2_idx = s_l2_idx; l2_idx < TBL_L2_SIZE; l2_idx++) {
297 struct tbl_bucket *bucket;
299 bucket = get_bucket_protected(l2[l2_idx]);
300 if (bucket && s_obj < bucket->n_objs) {
301 *bucketp = (l1_idx << TBL_L1_SHIFT) + l2_idx;
303 return bucket->objs[s_obj];
315 static int insert_table_flow(struct tbl_node *node, void *new_table_)
317 struct tbl *new_table = new_table_;
318 return tbl_insert(new_table, node, node->hash);
322 * tbl_expand - create a hash table with more buckets
323 * @table: table to expand
325 * Creates a new table containing the same objects as @table but with twice
326 * as many buckets. Returns 0 if successful, otherwise a negative error. The
327 * caller should free @table upon success (probably using
328 * tbl_deferred_destroy()).
330 struct tbl *tbl_expand(struct tbl *table)
333 int n_buckets = table->n_buckets * 2;
334 struct tbl *new_table;
336 if (n_buckets >= TBL_MAX_BUCKETS) {
342 new_table = tbl_create(n_buckets);
346 if (tbl_foreach(table, insert_table_flow, new_table))
347 goto error_free_new_table;
351 error_free_new_table:
352 tbl_destroy(new_table, NULL);
358 * tbl_n_buckets - returns the number of buckets
359 * @table: table to examine
361 * Returns the number of buckets currently allocated in @table, useful when
362 * deciding whether to expand.
364 int tbl_n_buckets(struct tbl *table)
366 return table->n_buckets;
369 static void free_bucket_rcu(struct rcu_head *rcu)
371 struct tbl_bucket *bucket = container_of(rcu, struct tbl_bucket, rcu);
376 * tbl_insert - insert object into table
377 * @table: table in which to insert object
378 * @target: tbl_node contained in object to insert
379 * @hash: hash of object to insert
381 * The caller must ensure that no object considered to be identical to @target
382 * already exists in @table. Returns 0 or a negative error (currently just
385 int tbl_insert(struct tbl *table, struct tbl_node *target, u32 hash)
387 struct tbl_bucket __rcu **oldp = find_bucket(table, hash);
388 struct tbl_bucket *old = get_bucket_protected(*oldp);
389 unsigned int n = old ? old->n_objs : 0;
390 struct tbl_bucket *new = bucket_alloc(n + 1);
399 memcpy(new->objs, old->objs, n * sizeof(struct tbl_node *));
400 new->objs[n] = target;
402 rcu_assign_pointer(*oldp, new);
404 call_rcu(&old->rcu, free_bucket_rcu);
412 * tbl_remove - remove object from table
413 * @table: table from which to remove object
414 * @target: tbl_node inside of object to remove
416 * The caller must ensure that @target itself is in @table. (It is not
417 * good enough for @table to contain a different object considered identical
420 * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is*
421 * possible for object deletion to fail due to lack of memory.
423 int tbl_remove(struct tbl *table, struct tbl_node *target)
425 struct tbl_bucket __rcu **oldp = find_bucket(table, target->hash);
426 struct tbl_bucket *old = get_bucket_protected(*oldp);
427 unsigned int n = old->n_objs;
428 struct tbl_bucket *new;
433 new = bucket_alloc(n - 1);
438 for (i = 0; i < n; i++) {
439 struct tbl_node *obj = old->objs[i];
441 new->objs[new->n_objs++] = obj;
443 WARN_ON_ONCE(new->n_objs != n - 1);
448 rcu_assign_pointer(*oldp, new);
449 call_rcu(&old->rcu, free_bucket_rcu);
457 * tbl_count - retrieves the number of stored objects
458 * @table: table to count
460 * Returns the number of objects that have been inserted into the hash table.
462 unsigned int tbl_count(struct tbl *table)