Get rid of unnecessary synchronization in tables.
[openvswitch] / datapath / table-hash.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 #include "table.h"
8 #include "crc32.h"
9 #include "flow.h"
10 #include "datapath.h"
11
12 #include <linux/slab.h>
13 #include <linux/vmalloc.h>
14 #include <linux/mm.h>
15 #include <linux/highmem.h>
16 #include <asm/pgtable.h>
17
18 static void *kmem_alloc(size_t);
19 static void *kmem_zalloc(size_t);
20 static void kmem_free(void *, size_t);
21
22 struct sw_table_hash {
23         struct sw_table swt;
24         struct crc32 crc32;
25         atomic_t n_flows;
26         unsigned int bucket_mask; /* Number of buckets minus 1. */
27         struct sw_flow **buckets;
28 };
29
30 static struct sw_flow **find_bucket(struct sw_table *swt,
31                                                                         const struct sw_flow_key *key)
32 {
33         struct sw_table_hash *th = (struct sw_table_hash *) swt;
34         unsigned int crc = crc32_calculate(&th->crc32, key, sizeof *key);
35         return &th->buckets[crc & th->bucket_mask];
36 }
37
38 static struct sw_flow *table_hash_lookup(struct sw_table *swt,
39                                                                                  const struct sw_flow_key *key)
40 {
41         struct sw_flow *flow = *find_bucket(swt, key);
42         return flow && !memcmp(&flow->key, key, sizeof *key) ? flow : NULL;
43 }
44
45 static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow)
46 {
47         struct sw_table_hash *th = (struct sw_table_hash *) swt;
48         struct sw_flow **bucket;
49         int retval;
50
51         if (flow->key.wildcards != 0)
52                 return 0;
53
54         bucket = find_bucket(swt, &flow->key);
55         if (*bucket == NULL) {
56                 atomic_inc(&th->n_flows);
57                 rcu_assign_pointer(*bucket, flow);
58                 retval = 1;
59         } else {
60                 struct sw_flow *old_flow = *bucket;
61                 if (!memcmp(&old_flow->key, &flow->key, sizeof flow->key)
62                                         && flow_del(old_flow)) {
63                         rcu_assign_pointer(*bucket, flow);
64                         flow_deferred_free(old_flow);
65                         retval = 1;
66                 } else {
67                         retval = 0;
68                 }
69         }
70         return retval;
71 }
72
73 /* Caller must update n_flows. */
74 static int do_delete(struct sw_flow **bucket, struct sw_flow *flow)
75 {
76         if (flow_del(flow)) {
77                 rcu_assign_pointer(*bucket, NULL);
78                 flow_deferred_free(flow);
79                 return 1;
80         }
81         return 0;
82 }
83
84 /* Returns number of deleted flows.  We can ignore the priority
85  * argument, since all exact-match entries are the same (highest)
86  * priority. */
87 static int table_hash_delete(struct sw_table *swt,
88                                                          const struct sw_flow_key *key, 
89                                                          uint16_t priority, int strict)
90 {
91         struct sw_table_hash *th = (struct sw_table_hash *) swt;
92         unsigned int count = 0;
93
94         if (key->wildcards == 0) {
95                 struct sw_flow **bucket = find_bucket(swt, key);
96                 struct sw_flow *flow = *bucket;
97                 if (flow && !memcmp(&flow->key, key, sizeof *key))
98                         count = do_delete(bucket, flow);
99         } else {
100                 unsigned int i;
101
102                 for (i = 0; i <= th->bucket_mask; i++) {
103                         struct sw_flow **bucket = &th->buckets[i];
104                         struct sw_flow *flow = *bucket;
105                         if (flow && flow_del_matches(&flow->key, key, strict))
106                                 count += do_delete(bucket, flow);
107                 }
108         }
109         if (count)
110                 atomic_sub(count, &th->n_flows);
111         return count;
112 }
113
114 static int table_hash_timeout(struct datapath *dp, struct sw_table *swt)
115 {
116         struct sw_table_hash *th = (struct sw_table_hash *) swt;
117         unsigned int i;
118         int count = 0;
119
120         mutex_lock(&dp_mutex);
121         for (i = 0; i <= th->bucket_mask; i++) {
122                 struct sw_flow **bucket = &th->buckets[i];
123                 struct sw_flow *flow = *bucket;
124                 if (flow && flow_timeout(flow)) {
125                         count += do_delete(bucket, flow); 
126                         if (dp->flags & OFPC_SEND_FLOW_EXP)
127                                 dp_send_flow_expired(dp, flow);
128                 }
129         }
130         mutex_unlock(&dp_mutex);
131
132         if (count)
133                 atomic_sub(count, &th->n_flows);
134         return count;
135 }
136
137 static void table_hash_destroy(struct sw_table *swt)
138 {
139         struct sw_table_hash *th = (struct sw_table_hash *) swt;
140         unsigned int i;
141         for (i = 0; i <= th->bucket_mask; i++)
142         if (th->buckets[i])
143                 flow_free(th->buckets[i]);
144         kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets);
145         kfree(th);
146 }
147
148 static int table_hash_iterate(struct sw_table *swt,
149                               const struct sw_flow_key *key,
150                               struct sw_table_position *position,
151                               int (*callback)(struct sw_flow *, void *private),
152                               void *private) 
153 {
154         struct sw_table_hash *th = (struct sw_table_hash *) swt;
155
156         if (position->private[0] > th->bucket_mask)
157                 return 0;
158
159         if (key->wildcards == 0) {
160                 struct sw_flow *flow;
161                 int error;
162
163                 flow = table_hash_lookup(swt, key);
164                 if (!flow)
165                         return 0;
166
167                 error = callback(flow, private);
168                 if (!error)
169                         position->private[0] = -1;
170                 return error;
171         } else {
172                 int i;
173
174                 for (i = position->private[0]; i <= th->bucket_mask; i++) {
175                         struct sw_flow *flow = th->buckets[i];
176                         if (flow && flow_matches(key, &flow->key)) {
177                                 int error = callback(flow, private);
178                                 if (error) {
179                                         position->private[0] = i;
180                                         return error;
181                                 }
182                         }
183                 }
184                 return 0;
185         }
186 }
187 static void table_hash_stats(struct sw_table *swt,
188                                  struct sw_table_stats *stats) 
189 {
190         struct sw_table_hash *th = (struct sw_table_hash *) swt;
191         stats->name = "hash";
192         stats->n_flows = atomic_read(&th->n_flows);
193         stats->max_flows = th->bucket_mask + 1;
194 }
195
196 struct sw_table *table_hash_create(unsigned int polynomial,
197                         unsigned int n_buckets)
198 {
199         struct sw_table_hash *th;
200         struct sw_table *swt;
201
202         th = kmalloc(sizeof *th, GFP_KERNEL);
203         if (th == NULL)
204                 return NULL;
205
206         BUG_ON(n_buckets & (n_buckets - 1));
207         th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets);
208         if (th->buckets == NULL) {
209                 printk("failed to allocate %u buckets\n", n_buckets);
210                 kfree(th);
211                 return NULL;
212         }
213         th->bucket_mask = n_buckets - 1;
214
215         swt = &th->swt;
216         swt->lookup = table_hash_lookup;
217         swt->insert = table_hash_insert;
218         swt->delete = table_hash_delete;
219         swt->timeout = table_hash_timeout;
220         swt->destroy = table_hash_destroy;
221         swt->iterate = table_hash_iterate;
222         swt->stats = table_hash_stats;
223
224         crc32_init(&th->crc32, polynomial);
225         atomic_set(&th->n_flows, 0);
226
227         return swt;
228 }
229
230 /* Double-hashing table. */
231
232 struct sw_table_hash2 {
233         struct sw_table swt;
234         struct sw_table *subtable[2];
235 };
236
237 static struct sw_flow *table_hash2_lookup(struct sw_table *swt,
238                                                                                   const struct sw_flow_key *key)
239 {
240         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
241         int i;
242         
243         for (i = 0; i < 2; i++) {
244                 struct sw_flow *flow = *find_bucket(t2->subtable[i], key);
245                 if (flow && !memcmp(&flow->key, key, sizeof *key))
246                         return flow;
247         }
248         return NULL;
249 }
250
251 static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow)
252 {
253         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
254
255         if (table_hash_insert(t2->subtable[0], flow))
256                 return 1;
257         return table_hash_insert(t2->subtable[1], flow);
258 }
259
260 static int table_hash2_delete(struct sw_table *swt,
261                                                           const struct sw_flow_key *key, 
262                                                           uint16_t priority, int strict)
263 {
264         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
265         return (table_hash_delete(t2->subtable[0], key, priority, strict)
266                         + table_hash_delete(t2->subtable[1], key, priority, strict));
267 }
268
269 static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt)
270 {
271         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
272         return (table_hash_timeout(dp, t2->subtable[0])
273                         + table_hash_timeout(dp, t2->subtable[1]));
274 }
275
276 static void table_hash2_destroy(struct sw_table *swt)
277 {
278         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
279         table_hash_destroy(t2->subtable[0]);
280         table_hash_destroy(t2->subtable[1]);
281         kfree(t2);
282 }
283
284 static int table_hash2_iterate(struct sw_table *swt,
285                                const struct sw_flow_key *key,
286                                struct sw_table_position *position,
287                                int (*callback)(struct sw_flow *, void *),
288                                void *private)
289 {
290         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
291         int i;
292
293         for (i = position->private[1]; i < 2; i++) {
294                 int error = table_hash_iterate(t2->subtable[i], key, position,
295                                                callback, private);
296                 if (error) {
297                         return error;
298                 }
299                 position->private[0] = 0;
300                 position->private[1]++;
301         }
302         return 0;
303 }
304
305 static void table_hash2_stats(struct sw_table *swt,
306                                  struct sw_table_stats *stats)
307 {
308         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
309         struct sw_table_stats substats[2];
310         int i;
311
312         for (i = 0; i < 2; i++)
313                 table_hash_stats(t2->subtable[i], &substats[i]);
314         stats->name = "hash2";
315         stats->n_flows = substats[0].n_flows + substats[1].n_flows;
316         stats->max_flows = substats[0].max_flows + substats[1].max_flows;
317 }
318
319 struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
320                                                                         unsigned int poly1, unsigned int buckets1)
321
322 {
323         struct sw_table_hash2 *t2;
324         struct sw_table *swt;
325
326         t2 = kmalloc(sizeof *t2, GFP_KERNEL);
327         if (t2 == NULL)
328                 return NULL;
329
330         t2->subtable[0] = table_hash_create(poly0, buckets0);
331         if (t2->subtable[0] == NULL)
332                 goto out_free_t2;
333
334         t2->subtable[1] = table_hash_create(poly1, buckets1);
335         if (t2->subtable[1] == NULL)
336                 goto out_free_subtable0;
337
338         swt = &t2->swt;
339         swt->lookup = table_hash2_lookup;
340         swt->insert = table_hash2_insert;
341         swt->delete = table_hash2_delete;
342         swt->timeout = table_hash2_timeout;
343         swt->destroy = table_hash2_destroy;
344         swt->iterate = table_hash2_iterate;
345         swt->stats = table_hash2_stats;
346
347         return swt;
348
349 out_free_subtable0:
350         table_hash_destroy(t2->subtable[0]);
351 out_free_t2:
352         kfree(t2);
353         return NULL;
354 }
355
356 /* From fs/xfs/linux-2.4/kmem.c. */
357
358 static void *
359 kmem_alloc(size_t size)
360 {
361         void *ptr;
362
363 #ifdef KMALLOC_MAX_SIZE
364         if (size > KMALLOC_MAX_SIZE)
365                 return NULL;
366 #endif
367         ptr = kmalloc(size, GFP_KERNEL);
368         if (!ptr) {
369                 ptr = vmalloc(size);
370                 if (ptr)
371                         printk("openflow: used vmalloc for %lu bytes\n", 
372                                         (unsigned long)size);
373         }
374         return ptr;
375 }
376
377 static void *
378 kmem_zalloc(size_t size)
379 {
380         void *ptr = kmem_alloc(size);
381         if (ptr)
382                 memset(ptr, 0, size);
383         return ptr;
384 }
385
386 static void
387 kmem_free(void *ptr, size_t size)
388 {
389         if (((unsigned long)ptr < VMALLOC_START) ||
390                 ((unsigned long)ptr >= VMALLOC_END)) {
391                 kfree(ptr);
392         } else {
393                 vfree(ptr);
394         }
395 }