Simplify use of dp_mutex.
[openvswitch] / datapath / table-hash.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 #include "table.h"
8 #include "crc32.h"
9 #include "flow.h"
10 #include "datapath.h"
11
12 #include <linux/slab.h>
13 #include <linux/vmalloc.h>
14 #include <linux/mm.h>
15 #include <linux/highmem.h>
16 #include <asm/pgtable.h>
17
18 static void *kmem_alloc(size_t);
19 static void *kmem_zalloc(size_t);
20 static void kmem_free(void *, size_t);
21
22 struct sw_table_hash {
23         struct sw_table swt;
24         spinlock_t lock;
25         struct crc32 crc32;
26         atomic_t n_flows;
27         unsigned int bucket_mask; /* Number of buckets minus 1. */
28         struct sw_flow **buckets;
29 };
30
31 static struct sw_flow **find_bucket(struct sw_table *swt,
32                                                                         const struct sw_flow_key *key)
33 {
34         struct sw_table_hash *th = (struct sw_table_hash *) swt;
35         unsigned int crc = crc32_calculate(&th->crc32, key, sizeof *key);
36         return &th->buckets[crc & th->bucket_mask];
37 }
38
39 static struct sw_flow *table_hash_lookup(struct sw_table *swt,
40                                                                                  const struct sw_flow_key *key)
41 {
42         struct sw_flow *flow = *find_bucket(swt, key);
43         return flow && !memcmp(&flow->key, key, sizeof *key) ? flow : NULL;
44 }
45
46 static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow)
47 {
48         struct sw_table_hash *th = (struct sw_table_hash *) swt;
49         struct sw_flow **bucket;
50         unsigned long int flags;
51         int retval;
52
53         if (flow->key.wildcards != 0)
54                 return 0;
55
56         spin_lock_irqsave(&th->lock, flags);
57         bucket = find_bucket(swt, &flow->key);
58         if (*bucket == NULL) {
59                 atomic_inc(&th->n_flows);
60                 rcu_assign_pointer(*bucket, flow);
61                 retval = 1;
62         } else {
63                 struct sw_flow *old_flow = *bucket;
64                 if (!memcmp(&old_flow->key, &flow->key, sizeof flow->key)
65                                         && flow_del(old_flow)) {
66                         rcu_assign_pointer(*bucket, flow);
67                         flow_deferred_free(old_flow);
68                         retval = 1;
69                 } else {
70                         retval = 0;
71                 }
72         }
73         spin_unlock_irqrestore(&th->lock, flags);
74         return retval;
75 }
76
77 /* Caller must update n_flows. */
78 static int do_delete(struct sw_flow **bucket, struct sw_flow *flow)
79 {
80         if (flow_del(flow)) {
81                 rcu_assign_pointer(*bucket, NULL);
82                 flow_deferred_free(flow);
83                 return 1;
84         }
85         return 0;
86 }
87
88 /* Returns number of deleted flows.  We can ignore the priority
89  * argument, since all exact-match entries are the same (highest)
90  * priority. */
91 static int table_hash_delete(struct sw_table *swt,
92                                                          const struct sw_flow_key *key, 
93                                                          uint16_t priority, int strict)
94 {
95         struct sw_table_hash *th = (struct sw_table_hash *) swt;
96         unsigned int count = 0;
97
98         if (key->wildcards == 0) {
99                 struct sw_flow **bucket = find_bucket(swt, key);
100                 struct sw_flow *flow = *bucket;
101                 if (flow && !memcmp(&flow->key, key, sizeof *key))
102                         count = do_delete(bucket, flow);
103         } else {
104                 unsigned int i;
105
106                 for (i = 0; i <= th->bucket_mask; i++) {
107                         struct sw_flow **bucket = &th->buckets[i];
108                         struct sw_flow *flow = *bucket;
109                         if (flow && flow_del_matches(&flow->key, key, strict))
110                                 count += do_delete(bucket, flow);
111                 }
112         }
113         if (count)
114                 atomic_sub(count, &th->n_flows);
115         return count;
116 }
117
118 static int table_hash_timeout(struct datapath *dp, struct sw_table *swt)
119 {
120         struct sw_table_hash *th = (struct sw_table_hash *) swt;
121         unsigned int i;
122         int count = 0;
123
124         mutex_lock(&dp_mutex);
125         for (i = 0; i <= th->bucket_mask; i++) {
126                 struct sw_flow **bucket = &th->buckets[i];
127                 struct sw_flow *flow = *bucket;
128                 if (flow && flow_timeout(flow)) {
129                         count += do_delete(bucket, flow); 
130                         if (dp->flags & OFPC_SEND_FLOW_EXP)
131                                 dp_send_flow_expired(dp, flow);
132                 }
133         }
134         mutex_unlock(&dp_mutex);
135
136         if (count)
137                 atomic_sub(count, &th->n_flows);
138         return count;
139 }
140
141 static void table_hash_destroy(struct sw_table *swt)
142 {
143         struct sw_table_hash *th = (struct sw_table_hash *) swt;
144         unsigned int i;
145         for (i = 0; i <= th->bucket_mask; i++)
146         if (th->buckets[i])
147                 flow_free(th->buckets[i]);
148         kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets);
149         kfree(th);
150 }
151
152 static int table_hash_iterate(struct sw_table *swt,
153                               const struct sw_flow_key *key,
154                               struct sw_table_position *position,
155                               int (*callback)(struct sw_flow *, void *private),
156                               void *private) 
157 {
158         struct sw_table_hash *th = (struct sw_table_hash *) swt;
159
160         if (position->private[0] > th->bucket_mask)
161                 return 0;
162
163         if (key->wildcards == 0) {
164                 struct sw_flow *flow;
165                 int error;
166
167                 flow = table_hash_lookup(swt, key);
168                 if (!flow)
169                         return 0;
170
171                 error = callback(flow, private);
172                 if (!error)
173                         position->private[0] = -1;
174                 return error;
175         } else {
176                 int i;
177
178                 for (i = position->private[0]; i <= th->bucket_mask; i++) {
179                         struct sw_flow *flow = th->buckets[i];
180                         if (flow && flow_matches(key, &flow->key)) {
181                                 int error = callback(flow, private);
182                                 if (error) {
183                                         position->private[0] = i;
184                                         return error;
185                                 }
186                         }
187                 }
188                 return 0;
189         }
190 }
191 static void table_hash_stats(struct sw_table *swt,
192                                  struct sw_table_stats *stats) 
193 {
194         struct sw_table_hash *th = (struct sw_table_hash *) swt;
195         stats->name = "hash";
196         stats->n_flows = atomic_read(&th->n_flows);
197         stats->max_flows = th->bucket_mask + 1;
198 }
199
200 struct sw_table *table_hash_create(unsigned int polynomial,
201                         unsigned int n_buckets)
202 {
203         struct sw_table_hash *th;
204         struct sw_table *swt;
205
206         th = kmalloc(sizeof *th, GFP_KERNEL);
207         if (th == NULL)
208                 return NULL;
209
210         BUG_ON(n_buckets & (n_buckets - 1));
211         th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets);
212         if (th->buckets == NULL) {
213                 printk("failed to allocate %u buckets\n", n_buckets);
214                 kfree(th);
215                 return NULL;
216         }
217         th->bucket_mask = n_buckets - 1;
218
219         swt = &th->swt;
220         swt->lookup = table_hash_lookup;
221         swt->insert = table_hash_insert;
222         swt->delete = table_hash_delete;
223         swt->timeout = table_hash_timeout;
224         swt->destroy = table_hash_destroy;
225         swt->iterate = table_hash_iterate;
226         swt->stats = table_hash_stats;
227
228         spin_lock_init(&th->lock);
229         crc32_init(&th->crc32, polynomial);
230         atomic_set(&th->n_flows, 0);
231
232         return swt;
233 }
234
235 /* Double-hashing table. */
236
237 struct sw_table_hash2 {
238         struct sw_table swt;
239         struct sw_table *subtable[2];
240 };
241
242 static struct sw_flow *table_hash2_lookup(struct sw_table *swt,
243                                                                                   const struct sw_flow_key *key)
244 {
245         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
246         int i;
247         
248         for (i = 0; i < 2; i++) {
249                 struct sw_flow *flow = *find_bucket(t2->subtable[i], key);
250                 if (flow && !memcmp(&flow->key, key, sizeof *key))
251                         return flow;
252         }
253         return NULL;
254 }
255
256 static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow)
257 {
258         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
259
260         if (table_hash_insert(t2->subtable[0], flow))
261                 return 1;
262         return table_hash_insert(t2->subtable[1], flow);
263 }
264
265 static int table_hash2_delete(struct sw_table *swt,
266                                                           const struct sw_flow_key *key, 
267                                                           uint16_t priority, int strict)
268 {
269         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
270         return (table_hash_delete(t2->subtable[0], key, priority, strict)
271                         + table_hash_delete(t2->subtable[1], key, priority, strict));
272 }
273
274 static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt)
275 {
276         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
277         return (table_hash_timeout(dp, t2->subtable[0])
278                         + table_hash_timeout(dp, t2->subtable[1]));
279 }
280
281 static void table_hash2_destroy(struct sw_table *swt)
282 {
283         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
284         table_hash_destroy(t2->subtable[0]);
285         table_hash_destroy(t2->subtable[1]);
286         kfree(t2);
287 }
288
289 static int table_hash2_iterate(struct sw_table *swt,
290                                const struct sw_flow_key *key,
291                                struct sw_table_position *position,
292                                int (*callback)(struct sw_flow *, void *),
293                                void *private)
294 {
295         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
296         int i;
297
298         for (i = position->private[1]; i < 2; i++) {
299                 int error = table_hash_iterate(t2->subtable[i], key, position,
300                                                callback, private);
301                 if (error) {
302                         return error;
303                 }
304                 position->private[0] = 0;
305                 position->private[1]++;
306         }
307         return 0;
308 }
309
310 static void table_hash2_stats(struct sw_table *swt,
311                                  struct sw_table_stats *stats)
312 {
313         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
314         struct sw_table_stats substats[2];
315         int i;
316
317         for (i = 0; i < 2; i++)
318                 table_hash_stats(t2->subtable[i], &substats[i]);
319         stats->name = "hash2";
320         stats->n_flows = substats[0].n_flows + substats[1].n_flows;
321         stats->max_flows = substats[0].max_flows + substats[1].max_flows;
322 }
323
324 struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
325                                                                         unsigned int poly1, unsigned int buckets1)
326
327 {
328         struct sw_table_hash2 *t2;
329         struct sw_table *swt;
330
331         t2 = kmalloc(sizeof *t2, GFP_KERNEL);
332         if (t2 == NULL)
333                 return NULL;
334
335         t2->subtable[0] = table_hash_create(poly0, buckets0);
336         if (t2->subtable[0] == NULL)
337                 goto out_free_t2;
338
339         t2->subtable[1] = table_hash_create(poly1, buckets1);
340         if (t2->subtable[1] == NULL)
341                 goto out_free_subtable0;
342
343         swt = &t2->swt;
344         swt->lookup = table_hash2_lookup;
345         swt->insert = table_hash2_insert;
346         swt->delete = table_hash2_delete;
347         swt->timeout = table_hash2_timeout;
348         swt->destroy = table_hash2_destroy;
349         swt->iterate = table_hash2_iterate;
350         swt->stats = table_hash2_stats;
351
352         return swt;
353
354 out_free_subtable0:
355         table_hash_destroy(t2->subtable[0]);
356 out_free_t2:
357         kfree(t2);
358         return NULL;
359 }
360
361 /* From fs/xfs/linux-2.4/kmem.c. */
362
363 static void *
364 kmem_alloc(size_t size)
365 {
366         void *ptr;
367
368 #ifdef KMALLOC_MAX_SIZE
369         if (size > KMALLOC_MAX_SIZE)
370                 return NULL;
371 #endif
372         ptr = kmalloc(size, GFP_KERNEL);
373         if (!ptr) {
374                 ptr = vmalloc(size);
375                 if (ptr)
376                         printk("openflow: used vmalloc for %lu bytes\n", 
377                                         (unsigned long)size);
378         }
379         return ptr;
380 }
381
382 static void *
383 kmem_zalloc(size_t size)
384 {
385         void *ptr = kmem_alloc(size);
386         if (ptr)
387                 memset(ptr, 0, size);
388         return ptr;
389 }
390
391 static void
392 kmem_free(void *ptr, size_t size)
393 {
394         if (((unsigned long)ptr < VMALLOC_START) ||
395                 ((unsigned long)ptr >= VMALLOC_END)) {
396                 kfree(ptr);
397         } else {
398                 vfree(ptr);
399         }
400 }