Allow timeout implementations to block, by using a kernel thread instead of a timer.
[openvswitch] / datapath / datapath.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 /* Functions for managing the dp interface/device. */
8
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_bridge.h>
12 #include <linux/if_vlan.h>
13 #include <linux/in.h>
14 #include <net/genetlink.h>
15 #include <linux/ip.h>
16 #include <linux/delay.h>
17 #include <linux/etherdevice.h>
18 #include <linux/kernel.h>
19 #include <linux/kthread.h>
20 #include <linux/mutex.h>
21 #include <linux/rtnetlink.h>
22 #include <linux/rcupdate.h>
23 #include <linux/version.h>
24 #include <linux/ethtool.h>
25 #include <linux/random.h>
26 #include <asm/system.h>
27 #include <linux/netfilter_bridge.h>
28 #include <linux/inetdevice.h>
29 #include <linux/list.h>
30
31 #include "openflow-netlink.h"
32 #include "datapath.h"
33 #include "table.h"
34 #include "chain.h"
35 #include "forward.h"
36 #include "flow.h"
37 #include "datapath_t.h"
38
39 #include "compat.h"
40
41
42 /* Number of milliseconds between runs of the maintenance thread. */
43 #define MAINT_SLEEP_MSECS 1000
44
45 #define BRIDGE_PORT_NO_FLOOD    0x00000001 
46
47 #define UINT32_MAX                        4294967295U
48
49 struct net_bridge_port {
50         u16     port_no;
51         u32 flags;
52         struct datapath *dp;
53         struct net_device *dev;
54         struct list_head node; /* Element in datapath.ports. */
55 };
56
57 static struct genl_family dp_genl_family;
58 static struct genl_multicast_group mc_group;
59
60 int dp_dev_setup(struct net_device *dev);  
61
62 /* It's hard to imagine wanting more than one datapath, but... */
63 #define DP_MAX 32
64
65 /* datapaths.  Protected on the read side by rcu_read_lock, on the write side
66  * by dp_mutex.
67  *
68  * It is safe to access the datapath and net_bridge_port structures with just
69  * the dp_mutex, but to access the chain you need to take the rcu_read_lock
70  * also (because dp_mutex doesn't prevent flows from being destroyed).
71  */
72 static struct datapath *dps[DP_MAX];
73 static DEFINE_MUTEX(dp_mutex);
74
75 static int dp_maint_func(void *data);
76 static int send_port_status(struct net_bridge_port *p, uint8_t status);
77
78
79 /* nla_unreserve - reduce amount of space reserved by nla_reserve  
80  * @skb: socket buffer from which to recover room
81  * @nla: netlink attribute to adjust
82  * @len: amount by which to reduce attribute payload
83  *
84  * Reduces amount of space reserved by a call to nla_reserve.
85  *
86  * No other attributes may be added between calling nla_reserve and this
87  * function, since it will create a hole in the message.
88  */
89 void nla_unreserve(struct sk_buff *skb, struct nlattr *nla, int len)
90 {
91         skb->tail -= len;
92         skb->len  -= len;
93
94         nla->nla_len -= len;
95 }
96
97 /* Generates a unique datapath id.  It incorporates the datapath index
98  * and a hardware address, if available.  If not, it generates a random
99  * one.
100  */
101 static 
102 uint64_t gen_datapath_id(uint16_t dp_idx)
103 {
104         uint64_t id;
105         int i;
106         struct net_device *dev;
107
108         /* The top 16 bits are used to identify the datapath.  The lower 48 bits
109          * use an interface address.  */
110         id = (uint64_t)dp_idx << 48;
111         if ((dev = dev_get_by_name(&init_net, "ctl0")) 
112                         || (dev = dev_get_by_name(&init_net, "eth0"))) {
113                 for (i=0; i<ETH_ALEN; i++) {
114                         id |= (uint64_t)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
115                 }
116                 dev_put(dev);
117         } else {
118                 /* Randomly choose the lower 48 bits if we cannot find an
119                  * address and mark the most significant bit to indicate that
120                  * this was randomly generated. */
121                 uint8_t rand[ETH_ALEN];
122                 get_random_bytes(rand, ETH_ALEN);
123                 id |= (uint64_t)1 << 63;
124                 for (i=0; i<ETH_ALEN; i++) {
125                         id |= (uint64_t)rand[i] << (8*(ETH_ALEN-1 - i));
126                 }
127         }
128
129         return id;
130 }
131
132 /* Creates a new datapath numbered 'dp_idx'.  Returns 0 for success or a
133  * negative error code.
134  *
135  * Not called with any locks. */
136 static int new_dp(int dp_idx)
137 {
138         struct datapath *dp;
139         int err;
140
141         if (dp_idx < 0 || dp_idx >= DP_MAX)
142                 return -EINVAL;
143
144         if (!try_module_get(THIS_MODULE))
145                 return -ENODEV;
146
147         mutex_lock(&dp_mutex);
148         dp = rcu_dereference(dps[dp_idx]);
149         if (dp != NULL) {
150                 err = -EEXIST;
151                 goto err_unlock;
152         }
153
154         err = -ENOMEM;
155         dp = kzalloc(sizeof *dp, GFP_KERNEL);
156         if (dp == NULL)
157                 goto err_unlock;
158
159         dp->dp_idx = dp_idx;
160         dp->id = gen_datapath_id(dp_idx);
161         dp->chain = chain_create(dp);
162         if (dp->chain == NULL)
163                 goto err_free_dp;
164         INIT_LIST_HEAD(&dp->port_list);
165
166 #if 0
167         /* Setup our "of" device */
168         dp->dev.priv = dp;
169         rtnl_lock();
170         err = dp_dev_setup(&dp->dev);
171         rtnl_unlock();
172         if (err != 0) 
173                 printk("datapath: problem setting up 'of' device\n");
174 #endif
175
176         dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
177
178         dp->dp_task = kthread_run(dp_maint_func, dp, "dp%d", dp_idx);
179         if (IS_ERR(dp->dp_task))
180                 goto err_free_dp;
181
182         rcu_assign_pointer(dps[dp_idx], dp);
183         mutex_unlock(&dp_mutex);
184
185         return 0;
186
187 err_free_dp:
188         kfree(dp);
189 err_unlock:
190         mutex_unlock(&dp_mutex);
191         module_put(THIS_MODULE);
192                 return err;
193 }
194
195 /* Find and return a free port number under 'dp'.  Called under dp_mutex. */
196 static int find_portno(struct datapath *dp)
197 {
198         int i;
199         for (i = 0; i < OFPP_MAX; i++)
200                 if (dp->ports[i] == NULL)
201                         return i;
202         return -EXFULL;
203 }
204
205 static struct net_bridge_port *new_nbp(struct datapath *dp,
206                                                                            struct net_device *dev)
207 {
208         struct net_bridge_port *p;
209         int port_no;
210
211         port_no = find_portno(dp);
212         if (port_no < 0)
213                 return ERR_PTR(port_no);
214
215         p = kzalloc(sizeof(*p), GFP_KERNEL);
216         if (p == NULL)
217                 return ERR_PTR(-ENOMEM);
218
219         p->dp = dp;
220         dev_hold(dev);
221         p->dev = dev;
222         p->port_no = port_no;
223
224         return p;
225 }
226
227 /* Called with dp_mutex. */
228 int add_switch_port(struct datapath *dp, struct net_device *dev)
229 {
230         struct net_bridge_port *p;
231
232         if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
233                 return -EINVAL;
234
235         if (dev->br_port != NULL)
236                 return -EBUSY;
237
238         p = new_nbp(dp, dev);
239         if (IS_ERR(p))
240                 return PTR_ERR(p);
241
242         dev_hold(dev);
243         rcu_assign_pointer(dev->br_port, p);
244         rtnl_lock();
245         dev_set_promiscuity(dev, 1);
246         rtnl_unlock();
247
248         rcu_assign_pointer(dp->ports[p->port_no], p);
249         list_add_rcu(&p->node, &dp->port_list);
250
251         /* Notify the ctlpath that this port has been added */
252         send_port_status(p, OFPPR_ADD);
253
254         return 0;
255 }
256
257 /* Delete 'p' from switch.
258  * Called with dp_mutex. */
259 static int del_switch_port(struct net_bridge_port *p)
260 {
261         /* First drop references to device. */
262         rtnl_lock();
263         dev_set_promiscuity(p->dev, -1);
264         rtnl_unlock();
265         list_del_rcu(&p->node);
266         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
267         rcu_assign_pointer(p->dev->br_port, NULL);
268
269         /* Then wait until no one is still using it, and destroy it. */
270         synchronize_rcu();
271
272         /* Notify the ctlpath that this port no longer exists */
273         send_port_status(p, OFPPR_DELETE);
274
275         dev_put(p->dev);
276         kfree(p);
277
278         return 0;
279 }
280
281 /* Called with dp_mutex. */
282 static void del_dp(struct datapath *dp)
283 {
284         struct net_bridge_port *p, *n;
285
286 #if 0
287         /* Unregister the "of" device of this dp */
288         rtnl_lock();
289         unregister_netdevice(&dp->dev);
290         rtnl_unlock();
291 #endif
292
293         kthread_stop(dp->dp_task);
294
295         /* Drop references to DP. */
296         list_for_each_entry_safe (p, n, &dp->port_list, node)
297                 del_switch_port(p);
298         rcu_assign_pointer(dps[dp->dp_idx], NULL);
299
300         /* Wait until no longer in use, then destroy it. */
301         synchronize_rcu();
302         chain_destroy(dp->chain);
303         kfree(dp);
304         module_put(THIS_MODULE);
305 }
306
307 static int dp_maint_func(void *data)
308 {
309         struct datapath *dp = (struct datapath *) data;
310
311         while (!kthread_should_stop()) {
312 #if 1
313                 chain_timeout(dp->chain);
314 #else
315                 int count = chain_timeout(dp->chain);
316                 chain_print_stats(dp->chain);
317                 if (count)
318                         printk("%d flows timed out\n", count);
319 #endif
320                 msleep_interruptible(MAINT_SLEEP_MSECS);
321         }
322                 
323         return 0;
324 }
325
326 /*
327  * Used as br_handle_frame_hook.  (Cannot run bridge at the same time, even on
328  * different set of devices!)  Returns 0 if *pskb should be processed further,
329  * 1 if *pskb is handled. */
330 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
331 /* Called with rcu_read_lock. */
332 static struct sk_buff *dp_frame_hook(struct net_bridge_port *p,
333                                          struct sk_buff *skb)
334 {
335         struct ethhdr *eh = eth_hdr(skb);
336         struct sk_buff *skb_local = NULL;
337
338
339         if (compare_ether_addr(eh->h_dest, skb->dev->dev_addr) == 0) 
340                 return skb;
341
342         if (is_broadcast_ether_addr(eh->h_dest)
343                                 || is_multicast_ether_addr(eh->h_dest)
344                                 || is_local_ether_addr(eh->h_dest)) 
345                 skb_local = skb_clone(skb, GFP_ATOMIC);
346
347         /* Push the Ethernet header back on. */
348         if (skb->protocol == htons(ETH_P_8021Q))
349                 skb_push(skb, VLAN_ETH_HLEN);
350         else
351                 skb_push(skb, ETH_HLEN);
352
353         fwd_port_input(p->dp->chain, skb, p->port_no);
354
355         return skb_local;
356 }
357 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
358 static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
359 {
360         /* Push the Ethernet header back on. */
361         if ((*pskb)->protocol == htons(ETH_P_8021Q))
362                 skb_push(*pskb, VLAN_ETH_HLEN);
363         else
364                 skb_push(*pskb, ETH_HLEN);
365
366         fwd_port_input(p->dp->chain, *pskb, p->port_no);
367         return 1;
368 }
369 #else 
370 /* NB: This has only been tested on 2.4.35 */
371
372 /* Called without any locks (?) */
373 static void dp_frame_hook(struct sk_buff *skb)
374 {
375         struct net_bridge_port *p = skb->dev->br_port;
376
377         /* Push the Ethernet header back on. */
378         if (skb->protocol == htons(ETH_P_8021Q))
379                 skb_push(skb, VLAN_ETH_HLEN);
380         else
381                 skb_push(skb, ETH_HLEN);
382
383         if (p) {
384                 rcu_read_lock();
385                 fwd_port_input(p->dp->chain, skb, p->port_no);
386                 rcu_read_unlock();
387         } else
388                 kfree_skb(skb);
389 }
390 #endif
391
392 /* Forwarding output path.
393  * Based on net/bridge/br_forward.c. */
394
395 /* Don't forward packets to originating port or with flooding disabled */
396 static inline int should_deliver(const struct net_bridge_port *p,
397                         const struct sk_buff *skb)
398 {
399         if ((skb->dev == p->dev) || (p->flags & BRIDGE_PORT_NO_FLOOD)) {
400                 return 0;
401         } 
402
403         return 1;
404 }
405
406 static inline unsigned packet_length(const struct sk_buff *skb)
407 {
408         int length = skb->len - ETH_HLEN;
409         if (skb->protocol == htons(ETH_P_8021Q))
410                 length -= VLAN_HLEN;
411         return length;
412 }
413
414 static int
415 flood(struct datapath *dp, struct sk_buff *skb)
416 {
417         struct net_bridge_port *p;
418         int prev_port;
419
420         prev_port = -1;
421         list_for_each_entry_rcu (p, &dp->port_list, node) {
422                 if (!should_deliver(p, skb))
423                         continue;
424                 if (prev_port != -1) {
425                         struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
426                         if (!clone) {
427                                 kfree_skb(skb);
428                                 return -ENOMEM;
429                         }
430                         dp_output_port(dp, clone, prev_port); 
431                 }
432                 prev_port = p->port_no;
433         }
434         if (prev_port != -1)
435                 dp_output_port(dp, skb, prev_port);
436         else
437                 kfree_skb(skb);
438
439         return 0;
440 }
441
442 /* Marks 'skb' as having originated from 'in_port' in 'dp'.
443    FIXME: how are devices reference counted? */
444 int dp_set_origin(struct datapath *dp, uint16_t in_port,
445                            struct sk_buff *skb)
446 {
447         if (in_port < OFPP_MAX && dp->ports[in_port]) {
448                 skb->dev = dp->ports[in_port]->dev;
449                 return 0;
450         }
451         return -ENOENT;
452 }
453
454 /* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
455  */
456 int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port)
457 {
458         struct net_bridge_port *p;
459         int len = skb->len;
460
461         BUG_ON(!skb);
462         if (out_port == OFPP_FLOOD)
463                 return flood(dp, skb);
464         else if (out_port == OFPP_CONTROLLER)
465                 return dp_output_control(dp, skb, fwd_save_skb(skb), 0,
466                                                   OFPR_ACTION);
467         else if (out_port >= OFPP_MAX)
468                 goto bad_port;
469
470         p = dp->ports[out_port];
471         if (p == NULL)
472                 goto bad_port;
473
474         skb->dev = p->dev;
475         if (packet_length(skb) > skb->dev->mtu) {
476                 printk("dropped over-mtu packet: %d > %d\n",
477                                         packet_length(skb), skb->dev->mtu);
478                 kfree_skb(skb);
479                 return -E2BIG;
480         }
481
482         dev_queue_xmit(skb);
483
484         return len;
485
486 bad_port:
487         kfree_skb(skb);
488         if (net_ratelimit())
489                 printk("can't forward to bad port %d\n", out_port);
490         return -ENOENT;
491 }
492
493 /* Takes ownership of 'skb' and transmits it to 'dp''s control path.  If
494  * 'buffer_id' != -1, then only the first 64 bytes of 'skb' are sent;
495  * otherwise, all of 'skb' is sent.  'reason' indicates why 'skb' is being
496  * sent. 'max_len' sets the maximum number of bytes that the caller
497  * wants to be sent; a value of 0 indicates the entire packet should be
498  * sent. */
499 int
500 dp_output_control(struct datapath *dp, struct sk_buff *skb,
501                            uint32_t buffer_id, size_t max_len, int reason)
502 {
503         /* FIXME? packet_rcv_spkt in net/packet/af_packet.c does some stuff
504            that we should possibly be doing here too. */
505         /* FIXME?  Can we avoid creating a new skbuff in the case where we
506          * forward the whole packet? */
507         struct sk_buff *f_skb;
508         struct nlattr *attr;
509         struct ofp_packet_in *opi;
510         size_t opi_len;
511         size_t len, fwd_len;
512         void *data;
513         int err = -ENOMEM;
514
515         fwd_len = skb->len;
516         if ((buffer_id != (uint32_t) -1) && max_len)
517                 fwd_len = min(fwd_len, max_len);
518
519         len = nla_total_size(offsetof(struct ofp_packet_in, data) + fwd_len) 
520                                 + nla_total_size(sizeof(uint32_t));
521
522         f_skb = genlmsg_new(len, GFP_ATOMIC); 
523         if (!f_skb)
524                 goto error_free_skb;
525
526         data = genlmsg_put(f_skb, 0, 0, &dp_genl_family, 0,
527                                 DP_GENL_C_OPENFLOW);
528         if (data == NULL)
529                 goto error_free_f_skb;
530
531         NLA_PUT_U32(f_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
532
533         opi_len = offsetof(struct ofp_packet_in, data) + fwd_len;
534         attr = nla_reserve(f_skb, DP_GENL_A_OPENFLOW, opi_len);
535         if (!attr)
536                 goto error_free_f_skb;
537         opi = nla_data(attr);
538         opi->header.version = OFP_VERSION;
539         opi->header.type    = OFPT_PACKET_IN;
540         opi->header.length  = htons(opi_len);
541         opi->header.xid     = htonl(0);
542
543         opi->buffer_id      = htonl(buffer_id);
544         opi->total_len      = htons(skb->len);
545         opi->in_port        = htons(skb->dev->br_port->port_no);
546         opi->reason         = reason;
547         opi->pad            = 0;
548         SKB_LINEAR_ASSERT(skb);
549         memcpy(opi->data, skb_mac_header(skb), fwd_len);
550
551         err = genlmsg_end(f_skb, data);
552         if (err < 0)
553                 goto error_free_f_skb;
554
555         err = genlmsg_multicast(f_skb, 0, mc_group.id, GFP_ATOMIC);
556         if (err && net_ratelimit())
557                 printk(KERN_WARNING "dp_output_control: genlmsg_multicast failed: %d\n", err);
558
559         kfree_skb(skb);  
560
561         return err;
562
563 nla_put_failure:
564 error_free_f_skb:
565         nlmsg_free(f_skb);
566 error_free_skb:
567         kfree_skb(skb);
568         if (net_ratelimit())
569                 printk(KERN_ERR "dp_output_control: failed to send: %d\n", err);
570         return err;
571 }
572
573 static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
574 {
575         desc->port_no = htons(p->port_no);
576         strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
577         desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
578         memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
579         desc->flags = htonl(p->flags);
580         desc->features = 0;
581         desc->speed = 0;
582
583 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
584         if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) {
585                 struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
586
587                 if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) {
588                         if (ecmd.supported & SUPPORTED_10baseT_Half) 
589                                 desc->features |= OFPPF_10MB_HD;
590                         if (ecmd.supported & SUPPORTED_10baseT_Full)
591                                 desc->features |= OFPPF_10MB_FD;
592                         if (ecmd.supported & SUPPORTED_100baseT_Half) 
593                                 desc->features |= OFPPF_100MB_HD;
594                         if (ecmd.supported & SUPPORTED_100baseT_Full)
595                                 desc->features |= OFPPF_100MB_FD;
596                         if (ecmd.supported & SUPPORTED_1000baseT_Half)
597                                 desc->features |= OFPPF_1GB_HD;
598                         if (ecmd.supported & SUPPORTED_1000baseT_Full)
599                                 desc->features |= OFPPF_1GB_FD;
600                         /* 10Gbps half-duplex doesn't exist... */
601                         if (ecmd.supported & SUPPORTED_10000baseT_Full)
602                                 desc->features |= OFPPF_10GB_FD;
603
604                         desc->features = htonl(desc->features);
605                         desc->speed = htonl(ecmd.speed);
606                 }
607         }
608 #endif
609 }
610
611 static int 
612 fill_data_hello(struct datapath *dp, struct ofp_data_hello *odh)
613 {
614         struct net_bridge_port *p;
615         int port_count = 0;
616
617         odh->header.version = OFP_VERSION;
618         odh->header.type    = OFPT_DATA_HELLO;
619         odh->header.xid     = htonl(0);
620         odh->datapath_id    = cpu_to_be64(dp->id); 
621
622         odh->n_exact        = htonl(2 * TABLE_HASH_MAX_FLOWS);
623         odh->n_mac_only     = htonl(TABLE_MAC_MAX_FLOWS);
624         odh->n_compression  = 0;                                           /* Not supported */
625         odh->n_general      = htonl(TABLE_LINEAR_MAX_FLOWS);
626         odh->buffer_mb      = htonl(UINT32_MAX);
627         odh->n_buffers      = htonl(N_PKT_BUFFERS);
628         odh->capabilities   = htonl(OFP_SUPPORTED_CAPABILITIES);
629         odh->actions        = htonl(OFP_SUPPORTED_ACTIONS);
630         odh->miss_send_len  = htons(dp->miss_send_len); 
631
632         list_for_each_entry_rcu (p, &dp->port_list, node) {
633                 fill_port_desc(p, &odh->ports[port_count]);
634                 port_count++;
635         }
636
637         return port_count;
638 }
639
640 int
641 dp_send_hello(struct datapath *dp)
642 {
643         struct sk_buff *skb;
644         struct nlattr *attr;
645         struct ofp_data_hello *odh;
646         size_t odh_max_len, odh_len, port_max_len, len;
647         void *data;
648         int err = -ENOMEM;
649         int port_count;
650
651
652         /* Overallocate, since we can't reliably determine the number of
653          * ports a priori. */
654         port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX;
655
656         len = nla_total_size(sizeof(*odh) + port_max_len) 
657                                 + nla_total_size(sizeof(uint32_t));
658
659         skb = genlmsg_new(len, GFP_ATOMIC);
660         if (!skb) {
661                 if (net_ratelimit())
662                         printk("dp_send_hello: genlmsg_new failed\n");
663                 goto error;
664         }
665
666         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
667                            DP_GENL_C_OPENFLOW);
668         if (data == NULL) {
669                 if (net_ratelimit())
670                         printk("dp_send_hello: genlmsg_put failed\n");
671                 goto error;
672         }
673
674         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx);
675
676         odh_max_len = sizeof(*odh) + port_max_len;
677         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, odh_max_len);
678         if (!attr) {
679                 if (net_ratelimit())
680                         printk("dp_send_hello: nla_reserve failed\n");
681                 goto error;
682         }
683         odh = nla_data(attr);
684         port_count = fill_data_hello(dp, odh);
685
686         /* Only now that we know how many ports we've added can we say
687          * say something about the length. */
688         odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count);
689         odh->header.length = htons(odh_len);
690
691         /* Take back the unused part that was reserved */
692         nla_unreserve(skb, attr, (odh_max_len - odh_len));
693
694         err = genlmsg_end(skb, data);
695         if (err < 0) {
696                 if (net_ratelimit())
697                         printk("dp_send_hello: genlmsg_end failed\n");
698                 goto error;
699         }
700
701         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
702         if (err && net_ratelimit())
703                 printk(KERN_WARNING "dp_send_hello: genlmsg_multicast failed: %d\n", err);
704
705         return err;
706
707 nla_put_failure:
708 error:
709         kfree_skb(skb);
710         if (net_ratelimit())
711                 printk(KERN_ERR "dp_send_hello: failed to send: %d\n", err);
712         return err;
713 }
714
715 int
716 dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp)
717 {
718         struct net_bridge_port *p;
719
720         p = dp->ports[htons(opp->port_no)];
721
722         /* Make sure the port id hasn't changed since this was sent */
723         if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN) != 0) 
724                 return -1;
725         
726         p->flags = htonl(opp->flags);
727
728         return 0;
729 }
730
731
732 static int
733 send_port_status(struct net_bridge_port *p, uint8_t status)
734 {
735         struct sk_buff *skb;
736         struct nlattr *attr;
737         struct ofp_port_status *ops;
738         void *data;
739         int err = -ENOMEM;
740
741
742         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
743         if (!skb) {
744                 if (net_ratelimit())
745                         printk("send_port_status: genlmsg_new failed\n");
746                 goto error;
747         }
748
749         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
750                            DP_GENL_C_OPENFLOW);
751         if (data == NULL) {
752                 if (net_ratelimit())
753                         printk("send_port_status: genlmsg_put failed\n");
754                 goto error;
755         }
756
757         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, p->dp->dp_idx);
758
759         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ops));
760         if (!attr) {
761                 if (net_ratelimit())
762                         printk("send_port_status: nla_reserve failed\n");
763                 goto error;
764         }
765
766         ops = nla_data(attr);
767         ops->header.version = OFP_VERSION;
768         ops->header.type    = OFPT_PORT_STATUS;
769         ops->header.length  = htons(sizeof(*ops));
770         ops->header.xid     = htonl(0);
771
772         ops->reason         = status;
773         fill_port_desc(p, &ops->desc);
774
775         err = genlmsg_end(skb, data);
776         if (err < 0) {
777                 if (net_ratelimit())
778                         printk("send_port_status: genlmsg_end failed\n");
779                 goto error;
780         }
781
782         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
783         if (err && net_ratelimit())
784                 printk(KERN_WARNING "send_port_status: genlmsg_multicast failed: %d\n", err);
785
786         return err;
787
788 nla_put_failure:
789 error:
790         kfree_skb(skb);
791         if (net_ratelimit())
792                 printk(KERN_ERR "send_port_status: failed to send: %d\n", err);
793         return err;
794 }
795
796 int 
797 dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
798 {
799         struct sk_buff *skb;
800         struct nlattr *attr;
801         struct ofp_flow_expired *ofe;
802         void *data;
803         unsigned long duration_j;
804         int err = -ENOMEM;
805
806
807         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
808         if (!skb) {
809                 if (net_ratelimit())
810                         printk("dp_send_flow_expired: genlmsg_new failed\n");
811                 goto error;
812         }
813
814         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
815                            DP_GENL_C_OPENFLOW);
816         if (data == NULL) {
817                 if (net_ratelimit())
818                         printk("dp_send_flow_expired: genlmsg_put failed\n");
819                 goto error;
820         }
821
822         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx);
823
824         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ofe));
825         if (!attr) {
826                 if (net_ratelimit())
827                         printk("dp_send_flow_expired: nla_reserve failed\n");
828                 goto error;
829         }
830
831         ofe = nla_data(attr);
832         ofe->header.version = OFP_VERSION;
833         ofe->header.type    = OFPT_FLOW_EXPIRED;
834         ofe->header.length  = htons(sizeof(*ofe));
835         ofe->header.xid     = htonl(0);
836
837         flow_fill_match(&ofe->match, &flow->key);
838         duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time;
839         ofe->duration   = htonl(duration_j / HZ);
840         ofe->packet_count   = cpu_to_be64(flow->packet_count);
841         ofe->byte_count     = cpu_to_be64(flow->byte_count);
842
843         err = genlmsg_end(skb, data);
844         if (err < 0) {
845                 if (net_ratelimit())
846                         printk("dp_send_flow_expired: genlmsg_end failed\n");
847                 goto error;
848         }
849
850         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
851         if (err && net_ratelimit())
852                 printk(KERN_WARNING "send_flow_expired: genlmsg_multicast failed: %d\n", err);
853
854         return err;
855
856 nla_put_failure:
857 error:
858         kfree_skb(skb);
859         if (net_ratelimit())
860                 printk(KERN_ERR "send_flow_expired: failed to send: %d\n", err);
861         return err;
862 }
863
864 /* Generic Netlink interface.
865  *
866  * See netlink(7) for an introduction to netlink.  See
867  * http://linux-net.osdl.org/index.php/Netlink for more information and
868  * pointers on how to work with netlink and Generic Netlink in the kernel and
869  * in userspace. */
870
871 static struct genl_family dp_genl_family = {
872         .id = GENL_ID_GENERATE,
873         .hdrsize = 0,
874         .name = DP_GENL_FAMILY_NAME,
875         .version = 1,
876         .maxattr = DP_GENL_A_MAX,
877 };
878
879 /* Attribute policy: what each attribute may contain.  */
880 static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = {
881         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
882         [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 },
883         [DP_GENL_A_PORTNAME] = { .type = NLA_STRING }
884 };
885
886 static int dp_genl_add(struct sk_buff *skb, struct genl_info *info)
887 {
888         if (!info->attrs[DP_GENL_A_DP_IDX])
889                 return -EINVAL;
890
891         return new_dp(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
892 }
893
894 static struct genl_ops dp_genl_ops_add_dp = {
895         .cmd = DP_GENL_C_ADD_DP,
896         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
897         .policy = dp_genl_policy,
898         .doit = dp_genl_add,
899         .dumpit = NULL,
900 };
901
902 struct datapath *dp_get(int dp_idx)
903 {
904         if (dp_idx < 0 || dp_idx > DP_MAX)
905                 return NULL;
906         return rcu_dereference(dps[dp_idx]);
907 }
908
909 static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
910 {
911         struct datapath *dp;
912         int err;
913
914         if (!info->attrs[DP_GENL_A_DP_IDX])
915                 return -EINVAL;
916
917         mutex_lock(&dp_mutex);
918         dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
919         if (!dp)
920                 err = -ENOENT;
921         else {
922                 del_dp(dp);
923                 err = 0;
924         }
925         mutex_unlock(&dp_mutex);
926         return err;
927 }
928
929 static struct genl_ops dp_genl_ops_del_dp = {
930         .cmd = DP_GENL_C_DEL_DP,
931         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
932         .policy = dp_genl_policy,
933         .doit = dp_genl_del,
934         .dumpit = NULL,
935 };
936
937 /* Queries a datapath for related information.  Currently the only relevant
938  * information is the datapath's multicast group ID.  Really we want one
939  * multicast group per datapath, but because of locking issues[*] we can't
940  * easily get one.  Thus, every datapath will currently return the same
941  * global multicast group ID, but in the future it would be nice to fix that.
942  *
943  * [*] dp_genl_add, to add a new datapath, is called under the genl_lock
944  *       mutex, and genl_register_mc_group, called to acquire a new multicast
945  *       group ID, also acquires genl_lock, thus deadlock.
946  */
947 static int dp_genl_query(struct sk_buff *skb, struct genl_info *info)
948 {
949         struct datapath *dp;
950         struct sk_buff *ans_skb = NULL;
951         int dp_idx;
952         int err = -ENOMEM;
953
954         if (!info->attrs[DP_GENL_A_DP_IDX])
955                 return -EINVAL;
956
957         rcu_read_lock();
958         dp_idx = nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]));
959         dp = dp_get(dp_idx);
960         if (!dp)
961                 err = -ENOENT;
962         else {
963                 void *data;
964                 ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
965                 if (!ans_skb) {
966                         err = -ENOMEM;
967                         goto err;
968                 }
969                 data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
970                                          0, DP_GENL_C_QUERY_DP);
971                 if (data == NULL) {
972                         err = -ENOMEM;
973                         goto err;
974                 }
975                 NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp_idx);
976                 NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id);
977
978                 genlmsg_end(ans_skb, data);
979                 err = genlmsg_reply(ans_skb, info);
980                 if (!err)
981                         ans_skb = NULL;
982         }
983 err:
984 nla_put_failure:
985         if (ans_skb)
986                 kfree_skb(ans_skb);
987         rcu_read_unlock();
988         return err;
989 }
990
991 /*
992  * Fill flow entry for nl flow query.  Called with rcu_lock  
993  *
994  */
995 static
996 int
997 dp_fill_flow(struct ofp_flow_mod* ofm, struct swt_iterator* iter)
998 {
999         ofm->header.version  = OFP_VERSION;
1000         ofm->header.type     = OFPT_FLOW_MOD;
1001         ofm->header.length   = htons(sizeof(struct ofp_flow_mod) 
1002                                 + sizeof(ofm->actions[0]));
1003         ofm->header.xid      = htonl(0);
1004
1005         ofm->match.wildcards = htons(iter->flow->key.wildcards);
1006         ofm->match.in_port   = iter->flow->key.in_port;
1007         ofm->match.dl_vlan   = iter->flow->key.dl_vlan;
1008         memcpy(ofm->match.dl_src, iter->flow->key.dl_src, ETH_ALEN);
1009         memcpy(ofm->match.dl_dst, iter->flow->key.dl_dst, ETH_ALEN);
1010         ofm->match.dl_type   = iter->flow->key.dl_type;
1011         ofm->match.nw_src    = iter->flow->key.nw_src;
1012         ofm->match.nw_dst    = iter->flow->key.nw_dst;
1013         ofm->match.nw_proto  = iter->flow->key.nw_proto;
1014         ofm->match.tp_src    = iter->flow->key.tp_src;
1015         ofm->match.tp_dst    = iter->flow->key.tp_dst;
1016         ofm->group_id        = iter->flow->group_id;
1017         ofm->max_idle        = iter->flow->max_idle;
1018         /* TODO support multiple actions  */
1019         ofm->actions[0]      = iter->flow->actions[0];
1020
1021         return 0;
1022 }
1023
1024 static int dp_genl_show(struct sk_buff *skb, struct genl_info *info)
1025 {
1026         struct datapath *dp;
1027         int err = -ENOMEM;
1028         struct sk_buff *ans_skb = NULL;
1029         void *data;
1030         struct nlattr *attr;
1031         struct ofp_data_hello *odh;
1032         size_t odh_max_len, odh_len, port_max_len, len;
1033         int port_count;
1034
1035         if (!info->attrs[DP_GENL_A_DP_IDX])
1036                 return -EINVAL;
1037
1038         mutex_lock(&dp_mutex);
1039         dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
1040         if (!dp)
1041                 goto error;
1042
1043         /* Overallocate, since we can't reliably determine the number of
1044          * ports a priori. */
1045         port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX;
1046
1047         len = nla_total_size(sizeof(*odh) + port_max_len)
1048                         + nla_total_size(sizeof(uint32_t));
1049
1050         ans_skb = nlmsg_new(len, GFP_KERNEL);
1051         if (!ans_skb)
1052                 goto error;
1053
1054         data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
1055                                  0, DP_GENL_C_SHOW_DP);
1056         if (data == NULL) 
1057                 goto error;
1058
1059         NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
1060
1061         odh_max_len = sizeof(*odh) + port_max_len;
1062         attr = nla_reserve(ans_skb, DP_GENL_A_DP_INFO, odh_max_len);
1063         if (!attr)
1064                 goto error;
1065         odh = nla_data(attr);
1066         port_count = fill_data_hello(dp, odh);
1067
1068         /* Only now that we know how many ports we've added can we say
1069          * say something about the length. */
1070         odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count);
1071         odh->header.length = htons(odh_len);
1072
1073         /* Take back the unused part that was reserved */
1074         nla_unreserve(ans_skb, attr, (odh_max_len - odh_len));
1075
1076         genlmsg_end(ans_skb, data);
1077         err = genlmsg_reply(ans_skb, info);
1078         if (!err)
1079                 ans_skb = NULL;
1080
1081 error:
1082 nla_put_failure:
1083         if (ans_skb)
1084                 kfree_skb(ans_skb);
1085         mutex_unlock(&dp_mutex);
1086         return err;
1087 }
1088
1089 static struct genl_ops dp_genl_ops_show_dp = {
1090         .cmd = DP_GENL_C_SHOW_DP,
1091         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1092         .policy = dp_genl_policy,
1093         .doit = dp_genl_show,
1094         .dumpit = NULL,
1095 };
1096
1097 /* Convenience function */
1098 static
1099 void* 
1100 dp_init_nl_flow_msg(uint32_t dp_idx, uint16_t table_idx, 
1101                 struct genl_info *info, struct sk_buff* skb)
1102 {
1103         void* data;
1104
1105         data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, 
1106                                 DP_GENL_C_QUERY_FLOW);
1107         if (data == NULL)
1108                 return NULL;
1109         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX,   dp_idx);
1110         NLA_PUT_U16(skb, DP_GENL_A_TABLEIDX, table_idx);
1111
1112         return data;
1113
1114 nla_put_failure:
1115         return NULL;
1116 }
1117
1118 /*  Iterate through the specified table and send all flow entries over
1119  *  netlink to userspace.  Each flow message has the following format:
1120  *
1121  *  32bit dpix
1122  *  16bit tabletype
1123  *  32bit number of flows
1124  *  openflow-flow-entries
1125  *
1126  *  The full table may require multiple messages.  A message with 0 flows
1127  *  signifies end-of message.
1128  */
1129
1130 static 
1131 int 
1132 dp_dump_table(struct datapath *dp, uint16_t table_idx, struct genl_info *info, struct ofp_flow_mod* matchme) 
1133
1134         struct sk_buff  *skb = 0; 
1135         struct sw_table *table = 0;
1136         struct swt_iterator iter;
1137         struct sw_flow_key in_flow; 
1138         struct nlattr   *attr;
1139         int count = 0, sum_count = 0;
1140         void *data; 
1141         uint8_t* ofm_ptr = 0;
1142         struct nlattr   *num_attr; 
1143         int err = -ENOMEM;
1144
1145         table = dp->chain->tables[table_idx]; 
1146         if ( table == NULL ) {
1147                 dprintk("dp::dp_dump_table error, non-existant table at position %d\n", table_idx);
1148                 return -EINVAL;
1149         }
1150
1151         if (!table->iterator(table, &iter)) {
1152                 dprintk("dp::dp_dump_table couldn't initialize empty table iterator\n");
1153                 return -ENOMEM;
1154         }
1155
1156         while (iter.flow) {
1157
1158                 /* verify that we can fit all NL_FLOWS_PER_MESSAGE in a single
1159                  * sk_buf */
1160                 if( (sizeof(dp_genl_family) + sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + 
1161                                         (NL_FLOWS_PER_MESSAGE * sizeof(struct ofp_flow_mod))) > (8192 - 64)){
1162                         dprintk("dp::dp_dump_table NL_FLOWS_PER_MESSAGE may cause overrun in skbuf\n");
1163                         return -ENOMEM;
1164                 }
1165
1166                 skb = nlmsg_new(8192 - 64, GFP_ATOMIC);
1167                 if (skb == NULL) {
1168                         return -ENOMEM;
1169                 }
1170
1171                 data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb);
1172                 if (data == NULL){
1173                         err= -ENOMEM;   
1174                         goto error_free_skb;
1175                 } 
1176
1177                 /* reserve space to put the number of flows for this message, to
1178                  * be filled after the loop*/
1179                 num_attr = nla_reserve(skb, DP_GENL_A_NUMFLOWS, sizeof(uint32_t));
1180                 if(!num_attr){
1181                         err = -ENOMEM;
1182                         goto error_free_skb;
1183                 }
1184
1185                 /* Only load NL_FLOWS_PER_MESSAGE flows at a time */
1186                 attr = nla_reserve(skb, DP_GENL_A_FLOW, 
1187                                 (sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action)) * NL_FLOWS_PER_MESSAGE);
1188                 if (!attr){
1189                         err = -ENOMEM;
1190                         goto error_free_skb;
1191                 }
1192
1193                 /* internal loop to fill NL_FLOWS_PER_MESSAGE flows */
1194                 ofm_ptr = nla_data(attr);
1195                 flow_extract_match(&in_flow, &matchme->match);
1196                 while (iter.flow && count < NL_FLOWS_PER_MESSAGE) {
1197                         if(flow_matches(&in_flow, &iter.flow->key)){
1198                                 if((err = dp_fill_flow((struct ofp_flow_mod*)ofm_ptr, &iter))) 
1199                                         goto error_free_skb;
1200                                 count++; 
1201                                 /* TODO support multiple actions  */
1202                                 ofm_ptr += sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action);
1203                         }
1204                         table->iterator_next(&iter);
1205                 }
1206
1207                 *((uint32_t*)nla_data(num_attr)) = count;
1208                 genlmsg_end(skb, data); 
1209
1210                 sum_count += count; 
1211                 count = 0;
1212
1213                 err = genlmsg_unicast(skb, info->snd_pid); 
1214                 skb = 0;
1215         }
1216
1217         /* send a sentinal message saying we're done */
1218         skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
1219         if (skb == NULL) {
1220                 return -ENOMEM;
1221         }
1222         data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb);
1223         if (data == NULL){
1224                 err= -ENOMEM;   
1225                 goto error_free_skb;
1226         } 
1227
1228         NLA_PUT_U32(skb, DP_GENL_A_NUMFLOWS,   0);
1229         /* dummy flow so nl doesn't complain */
1230         attr = nla_reserve(skb, DP_GENL_A_FLOW, sizeof(struct ofp_flow_mod));
1231         if (!attr){
1232                 err = -ENOMEM;
1233                 goto error_free_skb;
1234         }
1235         genlmsg_end(skb, data); 
1236         err = genlmsg_reply(skb, info); skb = 0;
1237
1238 nla_put_failure:
1239 error_free_skb:
1240         if(skb)
1241                 kfree_skb(skb);
1242         return err;
1243 }
1244
1245 /* Helper function to query_table which creates and sends a message packed with
1246  * table stats.  Message form is:
1247  *
1248  * u32 DP_IDX
1249  * u32 NUM_TABLES
1250  * OFP_TABLE (list of OFP_TABLES)
1251  *
1252  */
1253
1254 static 
1255 int 
1256 dp_dump_table_stats(struct datapath *dp, int dp_idx, struct genl_info *info) 
1257
1258         struct sk_buff   *skb = 0; 
1259         struct ofp_table *ot = 0;
1260         struct nlattr   *attr;
1261         struct sw_table_stats stats; 
1262         void *data; 
1263         int err = -ENOMEM;
1264         int i = 0;
1265         int nt = dp->chain->n_tables;
1266
1267         /* u32 IDX, u32 NUMTABLES, list-of-tables */
1268         skb = nlmsg_new(4 + 4 + (sizeof(struct ofp_table) * nt), GFP_ATOMIC);
1269         if (skb == NULL) {
1270                 return -ENOMEM;
1271         }
1272         
1273         data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, 
1274                                 DP_GENL_C_QUERY_TABLE);
1275         if (data == NULL){
1276                 return -ENOMEM;
1277         } 
1278
1279         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX,      dp_idx);
1280         NLA_PUT_U32(skb, DP_GENL_A_NUMTABLES, nt);
1281
1282         /* ... we assume that all tables can fit in a single message.
1283          * Probably a reasonable assumption seeing that we only have
1284          * 3 atm */
1285         attr = nla_reserve(skb, DP_GENL_A_TABLE, (sizeof(struct ofp_table) * nt));
1286         if (!attr){
1287                 err = -ENOMEM;
1288                 goto error_free_skb;
1289         }
1290
1291         ot = nla_data(attr);
1292
1293         for (i = 0; i < nt; ++i) {
1294                 dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
1295                 ot->header.version = OFP_VERSION;
1296                 ot->header.type    = OFPT_TABLE;
1297                 ot->header.length  = htons(sizeof(struct ofp_table));
1298                 ot->header.xid     = htonl(0);
1299
1300                 strncpy(ot->name, stats.name, OFP_MAX_TABLE_NAME_LEN); 
1301                 ot->table_id  = htons(i);
1302                 ot->n_flows   = htonl(stats.n_flows);
1303                 ot->max_flows = htonl(stats.max_flows);
1304                 ot++;
1305         }
1306
1307
1308         genlmsg_end(skb, data); 
1309         err = genlmsg_reply(skb, info); skb = 0;
1310
1311 nla_put_failure:
1312 error_free_skb:
1313         if(skb)
1314                 kfree_skb(skb);
1315         return err;
1316 }
1317
1318 /* 
1319  * Queries a datapath for flow-table statistics 
1320  */
1321
1322
1323 static int dp_genl_table_query(struct sk_buff *skb, struct genl_info *info)
1324 {
1325         struct   datapath* dp;
1326         int       err = 0;
1327
1328         if (!info->attrs[DP_GENL_A_DP_IDX]) {
1329                 dprintk("dp::dp_genl_table_query received message with missing attributes\n");
1330                 return -EINVAL;
1331         }
1332
1333         rcu_read_lock();
1334         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1335         if (!dp) {
1336                 err = -ENOENT;
1337                 goto err_out;
1338         }
1339
1340         err = dp_dump_table_stats(dp, nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]), info); 
1341
1342 err_out:
1343         rcu_read_unlock();
1344         return err;
1345 }
1346
1347 /* 
1348  * Queries a datapath for flow-table entries.
1349  */
1350
1351 static int dp_genl_flow_query(struct sk_buff *skb, struct genl_info *info)
1352 {
1353         struct datapath* dp;
1354         struct ofp_flow_mod*  ofm;
1355         u16     table_idx;
1356         int     err = 0;
1357
1358         if (!info->attrs[DP_GENL_A_DP_IDX]
1359                                 || !info->attrs[DP_GENL_A_TABLEIDX]
1360                                 || !info->attrs[DP_GENL_A_FLOW]) {
1361                 dprintk("dp::dp_genl_flow_query received message with missing attributes\n");
1362                 return -EINVAL;
1363         }
1364
1365         rcu_read_lock();
1366         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1367         if (!dp) {
1368                 err = -ENOENT;
1369                 goto err_out;
1370         }
1371
1372         table_idx = nla_get_u16(info->attrs[DP_GENL_A_TABLEIDX]);
1373
1374         if (dp->chain->n_tables <= table_idx){
1375                 printk("table index %d invalid (dp has %d tables)\n",
1376                                 table_idx, dp->chain->n_tables);
1377         err = -EINVAL;
1378                 goto err_out;
1379         }
1380
1381         ofm = nla_data(info->attrs[DP_GENL_A_FLOW]);
1382         err = dp_dump_table(dp, table_idx, info, ofm); 
1383
1384 err_out:
1385         rcu_read_unlock();
1386         return err;
1387 }
1388
1389 static struct nla_policy dp_genl_flow_policy[DP_GENL_A_MAX + 1] = {
1390         [DP_GENL_A_DP_IDX]      = { .type = NLA_U32 },
1391         [DP_GENL_A_TABLEIDX] = { .type = NLA_U16 },
1392         [DP_GENL_A_NUMFLOWS]  = { .type = NLA_U32 },
1393 };
1394
1395 static struct genl_ops dp_genl_ops_query_flow = {
1396         .cmd    = DP_GENL_C_QUERY_FLOW,
1397         .flags  = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1398         .policy = dp_genl_flow_policy,
1399         .doit   = dp_genl_flow_query,
1400         .dumpit = NULL,
1401 };
1402
1403 static struct nla_policy dp_genl_table_policy[DP_GENL_A_MAX + 1] = {
1404         [DP_GENL_A_DP_IDX]      = { .type = NLA_U32 },
1405 };
1406
1407 static struct genl_ops dp_genl_ops_query_table = {
1408         .cmd    = DP_GENL_C_QUERY_TABLE,
1409         .flags  = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1410         .policy = dp_genl_table_policy,
1411         .doit   = dp_genl_table_query,
1412         .dumpit = NULL,
1413 };
1414
1415
1416 static struct genl_ops dp_genl_ops_query_dp = {
1417         .cmd = DP_GENL_C_QUERY_DP,
1418         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1419         .policy = dp_genl_policy,
1420         .doit = dp_genl_query,
1421         .dumpit = NULL,
1422 };
1423
1424 static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
1425 {
1426         struct datapath *dp;
1427         struct net_device *port;
1428         int err;
1429
1430         if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_PORTNAME])
1431                 return -EINVAL;
1432
1433         /* Get datapath. */
1434         mutex_lock(&dp_mutex);
1435         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1436         if (!dp) {
1437                 err = -ENOENT;
1438                 goto out;
1439         }
1440
1441         /* Get interface to add/remove. */
1442         port = dev_get_by_name(&init_net, 
1443                         nla_data(info->attrs[DP_GENL_A_PORTNAME]));
1444         if (!port) {
1445                 err = -ENOENT;
1446                 goto out;
1447         }
1448
1449         /* Execute operation. */
1450         if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT)
1451                 err = add_switch_port(dp, port);
1452         else {
1453                 if (port->br_port == NULL || port->br_port->dp != dp) {
1454                         err = -ENOENT;
1455                         goto out_put;
1456                 }
1457                 err = del_switch_port(port->br_port);
1458         }
1459
1460 out_put:
1461         dev_put(port);
1462 out:
1463         mutex_unlock(&dp_mutex);
1464         return err;
1465 }
1466
1467 static struct genl_ops dp_genl_ops_add_port = {
1468         .cmd = DP_GENL_C_ADD_PORT,
1469         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1470         .policy = dp_genl_policy,
1471         .doit = dp_genl_add_del_port,
1472         .dumpit = NULL,
1473 };
1474
1475 static struct genl_ops dp_genl_ops_del_port = {
1476         .cmd = DP_GENL_C_DEL_PORT,
1477         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1478         .policy = dp_genl_policy,
1479         .doit = dp_genl_add_del_port,
1480         .dumpit = NULL,
1481 };
1482
1483 static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
1484 {
1485         struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW];
1486         struct datapath *dp;
1487         int err;
1488
1489         if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
1490                 return -EINVAL;
1491
1492         rcu_read_lock();
1493         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1494         if (!dp) {
1495                 err = -ENOENT;
1496                 goto out;
1497         }
1498
1499         va = info->attrs[DP_GENL_A_OPENFLOW];
1500
1501         err = fwd_control_input(dp->chain, nla_data(va), nla_len(va));
1502
1503 out:
1504         rcu_read_unlock();
1505         return err;
1506 }
1507
1508 static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = {
1509         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
1510 };
1511
1512 static struct genl_ops dp_genl_ops_openflow = {
1513         .cmd = DP_GENL_C_OPENFLOW,
1514         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1515         .policy = dp_genl_openflow_policy,
1516         .doit = dp_genl_openflow,
1517         .dumpit = NULL,
1518 };
1519
1520 static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = {
1521         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
1522         [DP_GENL_A_NPACKETS] = { .type = NLA_U32 },
1523         [DP_GENL_A_PSIZE] = { .type = NLA_U32 },
1524 };
1525
1526 static struct genl_ops dp_genl_ops_benchmark_nl = {
1527         .cmd = DP_GENL_C_BENCHMARK_NL,
1528         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1529         .policy = dp_genl_benchmark_policy,
1530         .doit = dp_genl_benchmark_nl,
1531         .dumpit = NULL,
1532 };
1533
1534 static struct genl_ops *dp_genl_all_ops[] = {
1535         /* Keep this operation first.  Generic Netlink dispatching
1536          * looks up operations with linear search, so we want it at the
1537          * front. */
1538         &dp_genl_ops_openflow,
1539
1540         &dp_genl_ops_query_flow,
1541         &dp_genl_ops_query_table,
1542         &dp_genl_ops_show_dp,
1543         &dp_genl_ops_add_dp,
1544         &dp_genl_ops_del_dp,
1545         &dp_genl_ops_query_dp,
1546         &dp_genl_ops_add_port,
1547         &dp_genl_ops_del_port,
1548         &dp_genl_ops_benchmark_nl,
1549 };
1550
1551 static int dp_init_netlink(void)
1552 {
1553         int err;
1554         int i;
1555
1556         err = genl_register_family(&dp_genl_family);
1557         if (err)
1558                 return err;
1559
1560         for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) {
1561                 err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]);
1562                 if (err)
1563                         goto err_unregister;
1564         }
1565
1566         strcpy(mc_group.name, "openflow");
1567         err = genl_register_mc_group(&dp_genl_family, &mc_group);
1568         if (err < 0)
1569                 goto err_unregister;
1570
1571         return 0;
1572
1573 err_unregister:
1574         genl_unregister_family(&dp_genl_family);
1575                 return err;
1576 }
1577
1578 static void dp_uninit_netlink(void)
1579 {
1580         genl_unregister_family(&dp_genl_family);
1581 }
1582
1583 #define DRV_NAME                "openflow"
1584 #define DRV_VERSION      VERSION
1585 #define DRV_DESCRIPTION "OpenFlow switching datapath implementation"
1586 #define DRV_COPYRIGHT   "Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University"
1587
1588
1589 static int __init dp_init(void)
1590 {
1591         int err;
1592
1593         printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
1594         printk(KERN_INFO DRV_NAME ": " VERSION" built on "__DATE__" "__TIME__"\n");
1595         printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n");
1596
1597         err = flow_init();
1598         if (err)
1599                 goto error;
1600
1601         err = dp_init_netlink();
1602         if (err)
1603                 goto error_flow_exit;
1604
1605         /* Hook into callback used by the bridge to intercept packets.
1606          * Parasites we are. */
1607         if (br_handle_frame_hook)
1608                 printk("openflow: hijacking bridge hook\n");
1609         br_handle_frame_hook = dp_frame_hook;
1610
1611         return 0;
1612
1613 error_flow_exit:
1614         flow_exit();
1615 error:
1616         printk(KERN_EMERG "openflow: failed to install!");
1617         return err;
1618 }
1619
1620 static void dp_cleanup(void)
1621 {
1622         fwd_exit();
1623         dp_uninit_netlink();
1624         flow_exit();
1625         br_handle_frame_hook = NULL;
1626 }
1627
1628 module_init(dp_init);
1629 module_exit(dp_cleanup);
1630
1631 MODULE_DESCRIPTION(DRV_DESCRIPTION);
1632 MODULE_AUTHOR(DRV_COPYRIGHT);
1633 MODULE_LICENSE("GPL");