f42ead18308c6f9610a875c37137de092401019f
[openvswitch] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/fs.h>
16 #include <linux/if_arp.h>
17 #include <linux/if_vlan.h>
18 #include <linux/in.h>
19 #include <linux/ip.h>
20 #include <linux/jhash.h>
21 #include <linux/delay.h>
22 #include <linux/time.h>
23 #include <linux/etherdevice.h>
24 #include <linux/genetlink.h>
25 #include <linux/kernel.h>
26 #include <linux/kthread.h>
27 #include <linux/mutex.h>
28 #include <linux/percpu.h>
29 #include <linux/rcupdate.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/version.h>
33 #include <linux/ethtool.h>
34 #include <linux/wait.h>
35 #include <asm/system.h>
36 #include <asm/div64.h>
37 #include <asm/bug.h>
38 #include <linux/highmem.h>
39 #include <linux/netfilter_bridge.h>
40 #include <linux/netfilter_ipv4.h>
41 #include <linux/inetdevice.h>
42 #include <linux/list.h>
43 #include <linux/rculist.h>
44 #include <linux/dmi.h>
45 #include <net/inet_ecn.h>
46 #include <net/genetlink.h>
47 #include <linux/compat.h>
48
49 #include "openvswitch/datapath-protocol.h"
50 #include "checksum.h"
51 #include "datapath.h"
52 #include "actions.h"
53 #include "flow.h"
54 #include "loop_counter.h"
55 #include "table.h"
56 #include "vport-internal_dev.h"
57
58 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
59 EXPORT_SYMBOL(dp_ioctl_hook);
60
61 /**
62  * DOC: Locking:
63  *
64  * Writes to device state (add/remove datapath, port, set operations on vports,
65  * etc.) are protected by RTNL.
66  *
67  * Writes to other state (flow table modifications, set miscellaneous datapath
68  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
69  * lock nests inside genl_mutex.
70  *
71  * Reads are protected by RCU.
72  *
73  * There are a few special cases (mostly stats) that have their own
74  * synchronization but they nest under all of above and don't interact with
75  * each other.
76  */
77
78 /* Protected by genl_mutex. */
79 static struct datapath __rcu *dps[256];
80
81 static struct vport *new_vport(const struct vport_parms *);
82
83 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
84 struct datapath *get_dp(int dp_idx)
85 {
86         if (dp_idx < 0 || dp_idx >= ARRAY_SIZE(dps))
87                 return NULL;
88
89         return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() ||
90                                          lockdep_rtnl_is_held() ||
91                                          lockdep_genl_is_held());
92 }
93 EXPORT_SYMBOL_GPL(get_dp);
94
95 /* Must be called with genl_mutex. */
96 static struct tbl *get_table_protected(struct datapath *dp)
97 {
98         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
99 }
100
101 /* Must be called with rcu_read_lock or RTNL lock. */
102 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
103 {
104         return rcu_dereference_rtnl(dp->ports[port_no]);
105 }
106
107 /* Must be called with rcu_read_lock or RTNL lock. */
108 const char *dp_name(const struct datapath *dp)
109 {
110         return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
111 }
112
113 static inline size_t br_nlmsg_size(void)
114 {
115         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
116                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
117                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
118                + nla_total_size(4) /* IFLA_MASTER */
119                + nla_total_size(4) /* IFLA_MTU */
120                + nla_total_size(4) /* IFLA_LINK */
121                + nla_total_size(1); /* IFLA_OPERSTATE */
122 }
123
124 /* Caller must hold RTNL lock. */
125 static int dp_fill_ifinfo(struct sk_buff *skb,
126                           const struct vport *port,
127                           int event, unsigned int flags)
128 {
129         struct datapath *dp = port->dp;
130         int ifindex = vport_get_ifindex(port);
131         int iflink = vport_get_iflink(port);
132         struct ifinfomsg *hdr;
133         struct nlmsghdr *nlh;
134
135         if (ifindex < 0)
136                 return ifindex;
137
138         if (iflink < 0)
139                 return iflink;
140
141         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
142         if (nlh == NULL)
143                 return -EMSGSIZE;
144
145         hdr = nlmsg_data(nlh);
146         hdr->ifi_family = AF_BRIDGE;
147         hdr->__ifi_pad = 0;
148         hdr->ifi_type = ARPHRD_ETHER;
149         hdr->ifi_index = ifindex;
150         hdr->ifi_flags = vport_get_flags(port);
151         hdr->ifi_change = 0;
152
153         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
154         NLA_PUT_U32(skb, IFLA_MASTER,
155                 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
156         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
157 #ifdef IFLA_OPERSTATE
158         NLA_PUT_U8(skb, IFLA_OPERSTATE,
159                    vport_is_running(port)
160                         ? vport_get_operstate(port)
161                         : IF_OPER_DOWN);
162 #endif
163
164         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
165
166         if (ifindex != iflink)
167                 NLA_PUT_U32(skb, IFLA_LINK,iflink);
168
169         return nlmsg_end(skb, nlh);
170
171 nla_put_failure:
172         nlmsg_cancel(skb, nlh);
173         return -EMSGSIZE;
174 }
175
176 /* Caller must hold RTNL lock. */
177 static void dp_ifinfo_notify(int event, struct vport *port)
178 {
179         struct sk_buff *skb;
180         int err = -ENOBUFS;
181
182         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
183         if (skb == NULL)
184                 goto errout;
185
186         err = dp_fill_ifinfo(skb, port, event, 0);
187         if (err < 0) {
188                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
189                 WARN_ON(err == -EMSGSIZE);
190                 kfree_skb(skb);
191                 goto errout;
192         }
193         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
194         return;
195 errout:
196         if (err < 0)
197                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
198 }
199
200 static void release_dp(struct kobject *kobj)
201 {
202         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
203         kfree(dp);
204 }
205
206 static struct kobj_type dp_ktype = {
207         .release = release_dp
208 };
209
210 static void destroy_dp_rcu(struct rcu_head *rcu)
211 {
212         struct datapath *dp = container_of(rcu, struct datapath, rcu);
213
214         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
215         free_percpu(dp->stats_percpu);
216         kobject_put(&dp->ifobj);
217 }
218
219 /* Called with RTNL lock and genl_lock. */
220 static struct vport *new_vport(const struct vport_parms *parms)
221 {
222         struct vport *vport;
223
224         vport = vport_add(parms);
225         if (!IS_ERR(vport)) {
226                 struct datapath *dp = parms->dp;
227
228                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
229                 list_add(&vport->node, &dp->port_list);
230
231                 dp_ifinfo_notify(RTM_NEWLINK, vport);
232         }
233
234         return vport;
235 }
236
237 /* Called with RTNL lock. */
238 int dp_detach_port(struct vport *p)
239 {
240         ASSERT_RTNL();
241
242         if (p->port_no != ODPP_LOCAL)
243                 dp_sysfs_del_if(p);
244         dp_ifinfo_notify(RTM_DELLINK, p);
245
246         /* First drop references to device. */
247         list_del(&p->node);
248         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
249
250         /* Then destroy it. */
251         return vport_del(p);
252 }
253
254 /* Must be called with rcu_read_lock. */
255 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
256 {
257         struct datapath *dp = p->dp;
258         struct dp_stats_percpu *stats;
259         int stats_counter_off;
260         struct sw_flow_actions *acts;
261         struct loop_counter *loop;
262         int error;
263
264         OVS_CB(skb)->vport = p;
265
266         if (!OVS_CB(skb)->flow) {
267                 struct sw_flow_key key;
268                 struct tbl_node *flow_node;
269                 bool is_frag;
270
271                 /* Extract flow from 'skb' into 'key'. */
272                 error = flow_extract(skb, p->port_no, &key, &is_frag);
273                 if (unlikely(error)) {
274                         kfree_skb(skb);
275                         return;
276                 }
277
278                 if (is_frag && dp->drop_frags) {
279                         kfree_skb(skb);
280                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
281                         goto out;
282                 }
283
284                 /* Look up flow. */
285                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
286                                         flow_hash(&key), flow_cmp);
287                 if (unlikely(!flow_node)) {
288                         struct dp_upcall_info upcall;
289
290                         upcall.cmd = ODP_PACKET_CMD_MISS;
291                         upcall.key = &key;
292                         upcall.userdata = 0;
293                         upcall.sample_pool = 0;
294                         upcall.actions = NULL;
295                         upcall.actions_len = 0;
296                         dp_upcall(dp, skb, &upcall);
297                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
298                         goto out;
299                 }
300
301                 OVS_CB(skb)->flow = flow_cast(flow_node);
302         }
303
304         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
305         flow_used(OVS_CB(skb)->flow, skb);
306
307         acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
308
309         /* Check whether we've looped too much. */
310         loop = loop_get_counter();
311         if (unlikely(++loop->count > MAX_LOOPS))
312                 loop->looping = true;
313         if (unlikely(loop->looping)) {
314                 loop_suppress(dp, acts);
315                 kfree_skb(skb);
316                 goto out_loop;
317         }
318
319         /* Execute actions. */
320         execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
321                         acts->actions_len);
322
323         /* Check whether sub-actions looped too much. */
324         if (unlikely(loop->looping))
325                 loop_suppress(dp, acts);
326
327 out_loop:
328         /* Decrement loop counter. */
329         if (!--loop->count)
330                 loop->looping = false;
331         loop_put_counter();
332
333 out:
334         /* Update datapath statistics. */
335         local_bh_disable();
336         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
337
338         write_seqcount_begin(&stats->seqlock);
339         (*(u64 *)((u8 *)stats + stats_counter_off))++;
340         write_seqcount_end(&stats->seqlock);
341
342         local_bh_enable();
343 }
344
345 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
346 {
347         u16 csum_start, csum_offset;
348         __wsum csum;
349
350         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
351         csum_start -= skb_headroom(skb);
352         BUG_ON(csum_start >= skb_headlen(skb));
353
354         skb_copy_bits(skb, 0, to, csum_start);
355
356         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
357                                       skb->len - csum_start, 0);
358         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
359 }
360
361 static struct genl_family dp_packet_genl_family;
362 #define PACKET_N_MC_GROUPS 16
363
364 static int packet_mc_group(struct datapath *dp, u8 cmd)
365 {
366         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
367         return jhash_2words(dp->dp_idx, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
368 }
369
370 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
371  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
372  * packet.
373  */
374 static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
375                                  const struct dp_upcall_info *upcall_info)
376 {
377         u32 group = packet_mc_group(dp, upcall_info->cmd);
378         struct sk_buff *nskb;
379         int port_no;
380         int err;
381
382         if (OVS_CB(skb)->vport)
383                 port_no = OVS_CB(skb)->vport->port_no;
384         else
385                 port_no = ODPP_LOCAL;
386
387         do {
388                 struct odp_header *upcall;
389                 struct sk_buff *user_skb; /* to be queued to userspace */
390                 struct nlattr *nla;
391                 unsigned int len;
392
393                 nskb = skb->next;
394                 skb->next = NULL;
395
396                 len = sizeof(struct odp_header);
397                 len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
398                 len += nla_total_size(skb->len);
399                 len += nla_total_size(FLOW_BUFSIZE);
400                 if (upcall_info->userdata)
401                         len += nla_total_size(8);
402                 if (upcall_info->sample_pool)
403                         len += nla_total_size(4);
404                 if (upcall_info->actions_len)
405                         len += nla_total_size(upcall_info->actions_len);
406
407                 user_skb = genlmsg_new(len, GFP_ATOMIC);
408                 if (!user_skb) {
409                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
410                         goto err_kfree_skbs;
411                 }
412
413                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
414                 upcall->dp_idx = dp->dp_idx;
415
416                 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
417                 flow_to_nlattrs(upcall_info->key, user_skb);
418                 nla_nest_end(user_skb, nla);
419
420                 if (upcall_info->userdata)
421                         nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
422                 if (upcall_info->sample_pool)
423                         nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
424                 if (upcall_info->actions_len) {
425                         const struct nlattr *actions = upcall_info->actions;
426                         u32 actions_len = upcall_info->actions_len;
427
428                         nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
429                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
430                         nla_nest_end(user_skb, nla);
431                 }
432
433                 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
434                 if (skb->ip_summed == CHECKSUM_PARTIAL)
435                         copy_and_csum_skb(skb, nla_data(nla));
436                 else
437                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
438
439                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
440                 if (err)
441                         goto err_kfree_skbs;
442
443                 kfree_skb(skb);
444                 skb = nskb;
445         } while (skb);
446         return 0;
447
448 err_kfree_skbs:
449         kfree_skb(skb);
450         while ((skb = nskb) != NULL) {
451                 nskb = skb->next;
452                 kfree_skb(skb);
453         }
454         return err;
455 }
456
457 /* Generic Netlink multicast groups for upcalls.
458  *
459  * We really want three unique multicast groups per datapath, but we can't even
460  * get one, because genl_register_mc_group() takes genl_lock, which is also
461  * held during Generic Netlink message processing, so trying to acquire
462  * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
463  * preallocate a few groups and use them round-robin for datapaths.  Collision
464  * isn't fatal--multicast listeners should check that the family is the one
465  * that they want and discard others--but it wastes time and memory to receive
466  * unwanted messages.
467  */
468 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
469
470 static struct genl_family dp_packet_genl_family = {
471         .id = GENL_ID_GENERATE,
472         .hdrsize = sizeof(struct odp_header),
473         .name = ODP_PACKET_FAMILY,
474         .version = 1,
475         .maxattr = ODP_PACKET_ATTR_MAX
476 };
477
478 static int packet_register_mc_groups(void)
479 {
480         int i;
481
482         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
483                 struct genl_multicast_group *group = &packet_mc_groups[i];
484                 int error;
485
486                 sprintf(group->name, "packet%d", i);
487                 error = genl_register_mc_group(&dp_packet_genl_family, group);
488                 if (error)
489                         return error;
490         }
491         return 0;
492 }
493
494 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
495 {
496         struct dp_stats_percpu *stats;
497         int err;
498
499         WARN_ON_ONCE(skb_shared(skb));
500
501         forward_ip_summed(skb);
502
503         err = vswitch_skb_checksum_setup(skb);
504         if (err)
505                 goto err_kfree_skb;
506
507         /* Break apart GSO packets into their component pieces.  Otherwise
508          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
509         if (skb_is_gso(skb)) {
510                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
511                 
512                 kfree_skb(skb);
513                 skb = nskb;
514                 if (IS_ERR(skb)) {
515                         err = PTR_ERR(skb);
516                         goto err;
517                 }
518         }
519
520         return queue_control_packets(dp, skb, upcall_info);
521
522 err_kfree_skb:
523         kfree_skb(skb);
524 err:
525         local_bh_disable();
526         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
527
528         write_seqcount_begin(&stats->seqlock);
529         stats->n_lost++;
530         write_seqcount_end(&stats->seqlock);
531
532         local_bh_enable();
533
534         return err;
535 }
536
537 /* Called with genl_mutex. */
538 static int flush_flows(int dp_idx)
539 {
540         struct tbl *old_table;
541         struct tbl *new_table;
542         struct datapath *dp;
543
544         dp = get_dp(dp_idx);
545         if (!dp)
546                 return -ENODEV;
547
548         old_table = get_table_protected(dp);
549         new_table = tbl_create(TBL_MIN_BUCKETS);
550         if (!new_table)
551                 return -ENOMEM;
552
553         rcu_assign_pointer(dp->table, new_table);
554
555         tbl_deferred_destroy(old_table, flow_free_tbl);
556
557         return 0;
558 }
559
560 static int validate_actions(const struct nlattr *actions, u32 actions_len)
561 {
562         const struct nlattr *a;
563         int rem;
564
565         nla_for_each_attr(a, actions, actions_len, rem) {
566                 static const u32 action_lens[ODPAT_MAX + 1] = {
567                         [ODPAT_OUTPUT] = 4,
568                         [ODPAT_CONTROLLER] = 8,
569                         [ODPAT_SET_DL_TCI] = 2,
570                         [ODPAT_STRIP_VLAN] = 0,
571                         [ODPAT_SET_DL_SRC] = ETH_ALEN,
572                         [ODPAT_SET_DL_DST] = ETH_ALEN,
573                         [ODPAT_SET_NW_SRC] = 4,
574                         [ODPAT_SET_NW_DST] = 4,
575                         [ODPAT_SET_NW_TOS] = 1,
576                         [ODPAT_SET_TP_SRC] = 2,
577                         [ODPAT_SET_TP_DST] = 2,
578                         [ODPAT_SET_TUNNEL] = 8,
579                         [ODPAT_SET_PRIORITY] = 4,
580                         [ODPAT_POP_PRIORITY] = 0,
581                         [ODPAT_DROP_SPOOFED_ARP] = 0,
582                 };
583                 int type = nla_type(a);
584
585                 if (type > ODPAT_MAX || nla_len(a) != action_lens[type])
586                         return -EINVAL;
587
588                 switch (type) {
589                 case ODPAT_UNSPEC:
590                         return -EINVAL;
591
592                 case ODPAT_CONTROLLER:
593                 case ODPAT_STRIP_VLAN:
594                 case ODPAT_SET_DL_SRC:
595                 case ODPAT_SET_DL_DST:
596                 case ODPAT_SET_NW_SRC:
597                 case ODPAT_SET_NW_DST:
598                 case ODPAT_SET_TP_SRC:
599                 case ODPAT_SET_TP_DST:
600                 case ODPAT_SET_TUNNEL:
601                 case ODPAT_SET_PRIORITY:
602                 case ODPAT_POP_PRIORITY:
603                 case ODPAT_DROP_SPOOFED_ARP:
604                         /* No validation needed. */
605                         break;
606
607                 case ODPAT_OUTPUT:
608                         if (nla_get_u32(a) >= DP_MAX_PORTS)
609                                 return -EINVAL;
610                         break;
611
612                 case ODPAT_SET_DL_TCI:
613                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
614                                 return -EINVAL;
615                         break;
616
617                 case ODPAT_SET_NW_TOS:
618                         if (nla_get_u8(a) & INET_ECN_MASK)
619                                 return -EINVAL;
620                         break;
621
622                 default:
623                         return -EOPNOTSUPP;
624                 }
625         }
626
627         if (rem > 0)
628                 return -EINVAL;
629
630         return 0;
631 }
632
633 struct dp_flowcmd {
634         u32 nlmsg_flags;
635         u32 dp_idx;
636         u32 total_len;
637         struct sw_flow_key key;
638         const struct nlattr *actions;
639         u32 actions_len;
640         bool clear;
641         u64 state;
642 };
643
644 static struct sw_flow_actions *get_actions(const struct dp_flowcmd *flowcmd)
645 {
646         struct sw_flow_actions *actions;
647
648         actions = flow_actions_alloc(flowcmd->actions_len);
649         if (!IS_ERR(actions) && flowcmd->actions_len)
650                 memcpy(actions->actions, flowcmd->actions, flowcmd->actions_len);
651         return actions;
652 }
653
654 static void clear_stats(struct sw_flow *flow)
655 {
656         flow->used = 0;
657         flow->tcp_flags = 0;
658         flow->packet_count = 0;
659         flow->byte_count = 0;
660 }
661
662 /* Called with genl_mutex. */
663 static int expand_table(struct datapath *dp)
664 {
665         struct tbl *old_table = get_table_protected(dp);
666         struct tbl *new_table;
667
668         new_table = tbl_expand(old_table);
669         if (IS_ERR(new_table))
670                 return PTR_ERR(new_table);
671
672         rcu_assign_pointer(dp->table, new_table);
673         tbl_deferred_destroy(old_table, NULL);
674
675         return 0;
676 }
677
678 static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
679 {
680         struct odp_header *odp_header = info->userhdr;
681         struct nlattr **a = info->attrs;
682         struct sk_buff *packet;
683         unsigned int actions_len;
684         struct nlattr *actions;
685         struct sw_flow_key key;
686         struct datapath *dp;
687         struct ethhdr *eth;
688         bool is_frag;
689         int err;
690
691         err = -EINVAL;
692         if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
693             nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
694                 goto exit;
695
696         actions = nla_data(a[ODP_PACKET_ATTR_ACTIONS]);
697         actions_len = nla_len(a[ODP_PACKET_ATTR_ACTIONS]);
698         err = validate_actions(actions, actions_len);
699         if (err)
700                 goto exit;
701
702         packet = skb_clone(skb, GFP_KERNEL);
703         err = -ENOMEM;
704         if (!packet)
705                 goto exit;
706         packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
707         packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
708
709         skb_reset_mac_header(packet);
710         eth = eth_hdr(packet);
711
712         /* Normally, setting the skb 'protocol' field would be handled by a
713          * call to eth_type_trans(), but it assumes there's a sending
714          * device, which we may not have. */
715         if (ntohs(eth->h_proto) >= 1536)
716                 packet->protocol = eth->h_proto;
717         else
718                 packet->protocol = htons(ETH_P_802_2);
719
720         err = flow_extract(packet, -1, &key, &is_frag);
721         if (err)
722                 goto exit;
723
724         rcu_read_lock();
725         dp = get_dp(odp_header->dp_idx);
726         err = -ENODEV;
727         if (dp)
728                 err = execute_actions(dp, packet, &key, actions, actions_len);
729         rcu_read_unlock();
730
731 exit:
732         return err;
733 }
734
735 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
736         [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
737         [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
738 };
739
740 static struct genl_ops dp_packet_genl_ops[] = {
741         { .cmd = ODP_PACKET_CMD_EXECUTE,
742           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
743           .policy = packet_policy,
744           .doit = odp_packet_cmd_execute
745         }
746 };
747
748 static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
749 {
750         int i;
751
752         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
753         for_each_possible_cpu(i) {
754                 const struct dp_stats_percpu *percpu_stats;
755                 struct dp_stats_percpu local_stats;
756                 unsigned seqcount;
757
758                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
759
760                 do {
761                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
762                         local_stats = *percpu_stats;
763                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
764
765                 stats->n_frags += local_stats.n_frags;
766                 stats->n_hit += local_stats.n_hit;
767                 stats->n_missed += local_stats.n_missed;
768                 stats->n_lost += local_stats.n_lost;
769         }
770 }
771
772 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
773  * Called with RTNL lock.
774  */
775 int dp_min_mtu(const struct datapath *dp)
776 {
777         struct vport *p;
778         int mtu = 0;
779
780         ASSERT_RTNL();
781
782         list_for_each_entry (p, &dp->port_list, node) {
783                 int dev_mtu;
784
785                 /* Skip any internal ports, since that's what we're trying to
786                  * set. */
787                 if (is_internal_vport(p))
788                         continue;
789
790                 dev_mtu = vport_get_mtu(p);
791                 if (!mtu || dev_mtu < mtu)
792                         mtu = dev_mtu;
793         }
794
795         return mtu ? mtu : ETH_DATA_LEN;
796 }
797
798 /* Sets the MTU of all datapath devices to the minimum of the ports
799  * Called with RTNL lock.
800  */
801 void set_internal_devs_mtu(const struct datapath *dp)
802 {
803         struct vport *p;
804         int mtu;
805
806         ASSERT_RTNL();
807
808         mtu = dp_min_mtu(dp);
809
810         list_for_each_entry (p, &dp->port_list, node) {
811                 if (is_internal_vport(p))
812                         vport_set_mtu(p, mtu);
813         }
814 }
815
816 static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
817         [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
818         [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
819         [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
820         [ODP_FLOW_ATTR_STATE] = { .type = NLA_U64 },
821 };
822
823
824 static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp,
825                              struct sw_flow *flow, u32 total_len, u64 state)
826 {
827         const struct sw_flow_actions *sf_acts;
828         struct odp_flow_stats stats;
829         struct odp_flow *odp_flow;
830         struct sk_buff *skb;
831         struct nlattr *nla;
832         unsigned long used;
833         u8 tcp_flags;
834         int err;
835
836         sf_acts = rcu_dereference_protected(flow->sf_acts,
837                                             lockdep_genl_is_held());
838
839         skb = alloc_skb(128 + FLOW_BUFSIZE + sf_acts->actions_len, GFP_KERNEL);
840         err = -ENOMEM;
841         if (!skb)
842                 goto exit;
843
844         odp_flow = (struct odp_flow*)__skb_put(skb, sizeof(struct odp_flow));
845         odp_flow->dp_idx = dp->dp_idx;
846         odp_flow->total_len = total_len;
847
848         nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
849         if (!nla)
850                 goto nla_put_failure;
851         err = flow_to_nlattrs(&flow->key, skb);
852         if (err)
853                 goto exit_free;
854         nla_nest_end(skb, nla);
855
856         nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
857         if (!nla || skb_tailroom(skb) < sf_acts->actions_len)
858                 goto nla_put_failure;
859         memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
860         nla_nest_end(skb, nla);
861
862         spin_lock_bh(&flow->lock);
863         used = flow->used;
864         stats.n_packets = flow->packet_count;
865         stats.n_bytes = flow->byte_count;
866         tcp_flags = flow->tcp_flags;
867         spin_unlock_bh(&flow->lock);
868
869         if (used)
870                 NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
871
872         if (stats.n_packets)
873                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
874
875         if (tcp_flags)
876                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
877
878         if (state)
879                 NLA_PUT_U64(skb, ODP_FLOW_ATTR_STATE, state);
880
881         if (skb->len > total_len)
882                 goto nla_put_failure;
883
884         odp_flow->len = skb->len;
885         err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0;
886         goto exit_free;
887
888 nla_put_failure:
889         err = -EMSGSIZE;
890 exit_free:
891         kfree_skb(skb);
892 exit:
893         return err;
894 }
895
896 /* Called with genl_mutex. */
897 static struct sk_buff *copy_flow_from_user(struct odp_flow __user *uodp_flow,
898                                            struct dp_flowcmd *flowcmd)
899 {
900         struct nlattr *a[ODP_FLOW_ATTR_MAX + 1];
901         struct odp_flow *odp_flow;
902         struct sk_buff *skb;
903         u32 len;
904         int err;
905
906         if (get_user(len, &uodp_flow->len))
907                 return ERR_PTR(-EFAULT);
908         if (len < sizeof(struct odp_flow))
909                 return ERR_PTR(-EINVAL);
910
911         skb = alloc_skb(len, GFP_KERNEL);
912         if (!skb)
913                 return ERR_PTR(-ENOMEM);
914
915         err = -EFAULT;
916         if (copy_from_user(__skb_put(skb, len), uodp_flow, len))
917                 goto error_free_skb;
918
919         odp_flow = (struct odp_flow *)skb->data;
920         err = -EINVAL;
921         if (odp_flow->len != len)
922                 goto error_free_skb;
923
924         flowcmd->nlmsg_flags = odp_flow->nlmsg_flags;
925         flowcmd->dp_idx = odp_flow->dp_idx;
926         flowcmd->total_len = odp_flow->total_len;
927
928         err = nla_parse(a, ODP_FLOW_ATTR_MAX,
929                         (struct nlattr *)(skb->data + sizeof(struct odp_flow)),
930                         skb->len - sizeof(struct odp_flow), flow_policy);
931         if (err)
932                 goto error_free_skb;
933
934         /* ODP_FLOW_ATTR_KEY. */
935         if (a[ODP_FLOW_ATTR_KEY]) {
936                 err = flow_from_nlattrs(&flowcmd->key, a[ODP_FLOW_ATTR_KEY]);
937                 if (err)
938                         goto error_free_skb;
939         } else
940                 memset(&flowcmd->key, 0, sizeof(struct sw_flow_key));
941
942         /* ODP_FLOW_ATTR_ACTIONS. */
943         if (a[ODP_FLOW_ATTR_ACTIONS]) {
944                 flowcmd->actions = nla_data(a[ODP_FLOW_ATTR_ACTIONS]);
945                 flowcmd->actions_len = nla_len(a[ODP_FLOW_ATTR_ACTIONS]);
946                 err = validate_actions(flowcmd->actions, flowcmd->actions_len);
947                 if (err)
948                         goto error_free_skb;
949         } else {
950                 flowcmd->actions = NULL;
951                 flowcmd->actions_len = 0;
952         }
953
954         flowcmd->clear = a[ODP_FLOW_ATTR_CLEAR] != NULL;
955
956         flowcmd->state = a[ODP_FLOW_ATTR_STATE] ? nla_get_u64(a[ODP_FLOW_ATTR_STATE]) : 0;
957
958         return skb;
959
960 error_free_skb:
961         kfree_skb(skb);
962         return ERR_PTR(err);
963 }
964
965 static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
966 {
967         struct tbl_node *flow_node;
968         struct dp_flowcmd flowcmd;
969         struct sw_flow *flow;
970         struct sk_buff *skb;
971         struct datapath *dp;
972         struct tbl *table;
973         u32 hash;
974         int error;
975
976         skb = copy_flow_from_user(uodp_flow, &flowcmd);
977         error = PTR_ERR(skb);
978         if (IS_ERR(skb))
979                 goto exit;
980
981         dp = get_dp(flowcmd.dp_idx);
982         error = -ENODEV;
983         if (!dp)
984                 goto exit;
985
986         hash = flow_hash(&flowcmd.key);
987         table = get_table_protected(dp);
988         flow_node = tbl_lookup(table, &flowcmd.key, hash, flow_cmp);
989         if (!flow_node) {
990                 struct sw_flow_actions *acts;
991
992                 /* Bail out if we're not allowed to create a new flow. */
993                 error = -ENOENT;
994                 if (cmd == ODP_FLOW_SET)
995                         goto exit;
996
997                 /* Expand table, if necessary, to make room. */
998                 if (tbl_count(table) >= tbl_n_buckets(table)) {
999                         error = expand_table(dp);
1000                         if (error)
1001                                 goto exit;
1002                         table = get_table_protected(dp);
1003                 }
1004
1005                 /* Allocate flow. */
1006                 flow = flow_alloc();
1007                 if (IS_ERR(flow)) {
1008                         error = PTR_ERR(flow);
1009                         goto exit;
1010                 }
1011                 flow->key = flowcmd.key;
1012                 clear_stats(flow);
1013
1014                 /* Obtain actions. */
1015                 acts = get_actions(&flowcmd);
1016                 error = PTR_ERR(acts);
1017                 if (IS_ERR(acts))
1018                         goto error_free_flow;
1019                 rcu_assign_pointer(flow->sf_acts, acts);
1020
1021                 error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
1022                 if (error)
1023                         goto error_free_flow;
1024
1025                 /* Put flow in bucket. */
1026                 error = tbl_insert(table, &flow->tbl_node, hash);
1027                 if (error)
1028                         goto error_free_flow;
1029         } else {
1030                 /* We found a matching flow. */
1031                 struct sw_flow_actions *old_acts;
1032
1033                 /* Bail out if we're not allowed to modify an existing flow.
1034                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1035                  * because Generic Netlink treats the latter as a dump
1036                  * request.  We also accept NLM_F_EXCL in case that bug ever
1037                  * gets fixed.
1038                  */
1039                 error = -EEXIST;
1040                 if (flowcmd.nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1041                         goto error_kfree_skb;
1042
1043                 /* Update actions. */
1044                 flow = flow_cast(flow_node);
1045                 old_acts = rcu_dereference_protected(flow->sf_acts,
1046                                                      lockdep_genl_is_held());
1047                 if (flowcmd.actions &&
1048                     (old_acts->actions_len != flowcmd.actions_len ||
1049                      memcmp(old_acts->actions, flowcmd.actions,
1050                             flowcmd.actions_len))) {
1051                         struct sw_flow_actions *new_acts;
1052
1053                         new_acts = get_actions(&flowcmd);
1054                         error = PTR_ERR(new_acts);
1055                         if (IS_ERR(new_acts))
1056                                 goto error_kfree_skb;
1057
1058                         rcu_assign_pointer(flow->sf_acts, new_acts);
1059                         flow_deferred_free_acts(old_acts);
1060                 }
1061
1062                 error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
1063                 if (error)
1064                         goto error_kfree_skb;
1065
1066                 /* Clear stats. */
1067                 if (flowcmd.clear) {
1068                         spin_lock_bh(&flow->lock);
1069                         clear_stats(flow);
1070                         spin_unlock_bh(&flow->lock);
1071                 }
1072         }
1073         kfree_skb(skb);
1074         return 0;
1075
1076 error_free_flow:
1077         flow_put(flow);
1078 error_kfree_skb:
1079         kfree_skb(skb);
1080 exit:
1081         return error;
1082 }
1083
1084 static int get_or_del_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
1085 {
1086         struct tbl_node *flow_node;
1087         struct dp_flowcmd flowcmd;
1088         struct sw_flow *flow;
1089         struct sk_buff *skb;
1090         struct datapath *dp;
1091         struct tbl *table;
1092         int err;
1093
1094         skb = copy_flow_from_user(uodp_flow, &flowcmd);
1095         if (IS_ERR(skb))
1096                 return PTR_ERR(skb);
1097
1098         dp = get_dp(flowcmd.dp_idx);
1099         if (!dp)
1100                 return -ENODEV;
1101
1102         table = get_table_protected(dp);
1103         flow_node = tbl_lookup(table, &flowcmd.key, flow_hash(&flowcmd.key), flow_cmp);
1104         if (!flow_node)
1105                 return -ENOENT;
1106
1107         if (cmd == ODP_FLOW_DEL) {
1108                 err = tbl_remove(table, flow_node);
1109                 if (err)
1110                         return err;
1111         }
1112
1113         flow = flow_cast(flow_node);
1114         err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
1115         if (!err && cmd == ODP_FLOW_DEL)
1116                 flow_deferred_free(flow);
1117
1118         return err;
1119 }
1120
1121 static int dump_flow(struct odp_flow __user *uodp_flow)
1122 {
1123         struct tbl_node *flow_node;
1124         struct dp_flowcmd flowcmd;
1125         struct sw_flow *flow;
1126         struct sk_buff *skb;
1127         struct datapath *dp;
1128         u32 bucket, obj;
1129         int err;
1130
1131         skb = copy_flow_from_user(uodp_flow, &flowcmd);
1132         err = PTR_ERR(skb);
1133         if (IS_ERR(skb))
1134                 goto exit;
1135
1136         dp = get_dp(flowcmd.dp_idx);
1137         err = -ENODEV;
1138         if (!dp)
1139                 goto exit_kfree_skb;
1140
1141         bucket = flowcmd.state >> 32;
1142         obj = flowcmd.state;
1143         flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1144         err = -ENODEV;
1145         if (!flow_node)
1146                 goto exit_kfree_skb;
1147
1148         flow = flow_cast(flow_node);
1149         err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len,
1150                                 ((u64)bucket << 32) | obj);
1151
1152 exit_kfree_skb:
1153         kfree_skb(skb);
1154 exit:
1155         return err;
1156 }
1157
1158 static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
1159 #ifdef HAVE_NLA_NUL_STRING
1160         [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1161 #endif
1162         [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1163         [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1164 };
1165
1166 static struct genl_family dp_datapath_genl_family = {
1167         .id = GENL_ID_GENERATE,
1168         .hdrsize = sizeof(struct odp_header),
1169         .name = ODP_DATAPATH_FAMILY,
1170         .version = 1,
1171         .maxattr = ODP_DP_ATTR_MAX
1172 };
1173
1174 static struct genl_multicast_group dp_datapath_multicast_group = {
1175         .name = ODP_DATAPATH_MCGROUP
1176 };
1177
1178 static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1179                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1180 {
1181         struct odp_header *odp_header;
1182         struct nlattr *nla;
1183         int err;
1184
1185         odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1186                                    flags, cmd);
1187         if (!odp_header)
1188                 goto error;
1189
1190         odp_header->dp_idx = dp->dp_idx;
1191
1192         rcu_read_lock();
1193         err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1194         rcu_read_unlock();
1195         if (err)
1196                 goto nla_put_failure;
1197
1198         nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1199         if (!nla)
1200                 goto nla_put_failure;
1201         get_dp_stats(dp, nla_data(nla));
1202
1203         NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1204                     dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1205
1206         if (dp->sflow_probability)
1207                 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1208
1209         nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1210         if (!nla)
1211                 goto nla_put_failure;
1212         NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1213         NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1214         NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1215         nla_nest_end(skb, nla);
1216
1217         return genlmsg_end(skb, odp_header);
1218
1219 nla_put_failure:
1220         genlmsg_cancel(skb, odp_header);
1221 error:
1222         return -EMSGSIZE;
1223 }
1224
1225 static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1226                                              u32 seq, u8 cmd)
1227 {
1228         struct sk_buff *skb;
1229         int retval;
1230
1231         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1232         if (!skb)
1233                 return ERR_PTR(-ENOMEM);
1234
1235         retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1236         if (retval < 0) {
1237                 kfree_skb(skb);
1238                 return ERR_PTR(retval);
1239         }
1240         return skb;
1241 }
1242
1243 static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1244 {
1245         if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1246                 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1247
1248                 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
1249                         return -EINVAL;
1250         }
1251
1252         return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
1253 }
1254
1255 /* Called with genl_mutex and optionally with RTNL lock also. */
1256 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1257 {
1258         if (!a[ODP_DP_ATTR_NAME]) {
1259                 struct datapath *dp = get_dp(odp_header->dp_idx);
1260                 if (!dp)
1261                         return ERR_PTR(-ENODEV);
1262                 return dp;
1263         } else {
1264                 struct vport *vport;
1265                 int dp_idx;
1266
1267                 rcu_read_lock();
1268                 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1269                 dp_idx = vport && vport->port_no == ODPP_LOCAL ? vport->dp->dp_idx : -1;
1270                 rcu_read_unlock();
1271
1272                 if (dp_idx < 0)
1273                         return ERR_PTR(-ENODEV);
1274                 return vport->dp;
1275         }
1276 }
1277
1278 /* Called with genl_mutex. */
1279 static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1280 {
1281         if (a[ODP_DP_ATTR_IPV4_FRAGS])
1282                 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1283         if (a[ODP_DP_ATTR_SAMPLING])
1284                 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1285 }
1286
1287 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1288 {
1289         struct nlattr **a = info->attrs;
1290         struct odp_header *odp_header = info->userhdr;
1291         struct vport_parms parms;
1292         struct sk_buff *reply;
1293         struct datapath *dp;
1294         struct vport *vport;
1295         int dp_idx;
1296         int err;
1297
1298         err = -EINVAL;
1299         if (!a[ODP_DP_ATTR_NAME])
1300                 goto err;
1301
1302         err = odp_dp_cmd_validate(a);
1303         if (err)
1304                 goto err;
1305
1306         rtnl_lock();
1307         err = -ENODEV;
1308         if (!try_module_get(THIS_MODULE))
1309                 goto err_unlock_rtnl;
1310
1311         dp_idx = odp_header->dp_idx;
1312         if (dp_idx < 0) {
1313                 err = -EFBIG;
1314                 for (dp_idx = 0; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
1315                         if (get_dp(dp_idx))
1316                                 continue;
1317                         err = 0;
1318                         break;
1319                 }
1320         } else if (dp_idx < ARRAY_SIZE(dps))
1321                 err = get_dp(dp_idx) ? -EBUSY : 0;
1322         else
1323                 err = -EINVAL;
1324         if (err)
1325                 goto err_put_module;
1326
1327         err = -ENOMEM;
1328         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1329         if (dp == NULL)
1330                 goto err_put_module;
1331         INIT_LIST_HEAD(&dp->port_list);
1332         dp->dp_idx = dp_idx;
1333
1334         /* Initialize kobject for bridge.  This will be added as
1335          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1336         dp->ifobj.kset = NULL;
1337         kobject_init(&dp->ifobj, &dp_ktype);
1338
1339         /* Allocate table. */
1340         err = -ENOMEM;
1341         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1342         if (!dp->table)
1343                 goto err_free_dp;
1344
1345         /* Set up our datapath device. */
1346         parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1347         parms.type = ODP_VPORT_TYPE_INTERNAL;
1348         parms.options = NULL;
1349         parms.dp = dp;
1350         parms.port_no = ODPP_LOCAL;
1351         vport = new_vport(&parms);
1352         if (IS_ERR(vport)) {
1353                 err = PTR_ERR(vport);
1354                 if (err == -EBUSY)
1355                         err = -EEXIST;
1356
1357                 goto err_destroy_table;
1358         }
1359
1360         dp->drop_frags = 0;
1361         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1362         if (!dp->stats_percpu) {
1363                 err = -ENOMEM;
1364                 goto err_destroy_local_port;
1365         }
1366
1367         change_datapath(dp, a);
1368
1369         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1370         err = PTR_ERR(reply);
1371         if (IS_ERR(reply))
1372                 goto err_destroy_local_port;
1373
1374         rcu_assign_pointer(dps[dp_idx], dp);
1375         dp_sysfs_add_dp(dp);
1376
1377         rtnl_unlock();
1378
1379         genl_notify(reply, genl_info_net(info), info->snd_pid,
1380                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1381         return 0;
1382
1383 err_destroy_local_port:
1384         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1385 err_destroy_table:
1386         tbl_destroy(get_table_protected(dp), NULL);
1387 err_free_dp:
1388         kfree(dp);
1389 err_put_module:
1390         module_put(THIS_MODULE);
1391 err_unlock_rtnl:
1392         rtnl_unlock();
1393 err:
1394         return err;
1395 }
1396
1397 static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1398 {
1399         struct vport *vport, *next_vport;
1400         struct sk_buff *reply;
1401         struct datapath *dp;
1402         int err;
1403
1404         err = odp_dp_cmd_validate(info->attrs);
1405         if (err)
1406                 goto exit;
1407
1408         rtnl_lock();
1409         dp = lookup_datapath(info->userhdr, info->attrs);
1410         err = PTR_ERR(dp);
1411         if (IS_ERR(dp))
1412                 goto exit_unlock;
1413
1414         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1415         err = PTR_ERR(reply);
1416         if (IS_ERR(reply))
1417                 goto exit_unlock;
1418
1419         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1420                 if (vport->port_no != ODPP_LOCAL)
1421                         dp_detach_port(vport);
1422
1423         dp_sysfs_del_dp(dp);
1424         rcu_assign_pointer(dps[dp->dp_idx], NULL);
1425         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1426
1427         call_rcu(&dp->rcu, destroy_dp_rcu);
1428         module_put(THIS_MODULE);
1429
1430         genl_notify(reply, genl_info_net(info), info->snd_pid,
1431                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1432         err = 0;
1433
1434 exit_unlock:
1435         rtnl_unlock();
1436 exit:
1437         return err;
1438 }
1439
1440 static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1441 {
1442         struct sk_buff *reply;
1443         struct datapath *dp;
1444         int err;
1445
1446         err = odp_dp_cmd_validate(info->attrs);
1447         if (err)
1448                 return err;
1449
1450         dp = lookup_datapath(info->userhdr, info->attrs);
1451         if (IS_ERR(dp))
1452                 return PTR_ERR(dp);
1453
1454         change_datapath(dp, info->attrs);
1455
1456         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1457         if (IS_ERR(reply)) {
1458                 err = PTR_ERR(reply);
1459                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1460                                 dp_datapath_multicast_group.id, err);
1461                 return 0;
1462         }
1463
1464         genl_notify(reply, genl_info_net(info), info->snd_pid,
1465                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1466         return 0;
1467 }
1468
1469 static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1470 {
1471         struct sk_buff *reply;
1472         struct datapath *dp;
1473         int err;
1474
1475         err = odp_dp_cmd_validate(info->attrs);
1476         if (err)
1477                 return err;
1478
1479         dp = lookup_datapath(info->userhdr, info->attrs);
1480         if (IS_ERR(dp))
1481                 return PTR_ERR(dp);
1482
1483         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1484         if (IS_ERR(reply))
1485                 return PTR_ERR(reply);
1486
1487         return genlmsg_reply(reply, info);
1488 }
1489
1490 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1491 {
1492         u32 dp_idx;
1493
1494         for (dp_idx = cb->args[0]; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
1495                 struct datapath *dp = get_dp(dp_idx);
1496                 if (!dp)
1497                         continue;
1498                 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1499                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1500                                          ODP_DP_CMD_NEW) < 0)
1501                         break;
1502         }
1503
1504         cb->args[0] = dp_idx;
1505         return skb->len;
1506 }
1507
1508 static struct genl_ops dp_datapath_genl_ops[] = {
1509         { .cmd = ODP_DP_CMD_NEW,
1510           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1511           .policy = datapath_policy,
1512           .doit = odp_dp_cmd_new
1513         },
1514         { .cmd = ODP_DP_CMD_DEL,
1515           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1516           .policy = datapath_policy,
1517           .doit = odp_dp_cmd_del
1518         },
1519         { .cmd = ODP_DP_CMD_GET,
1520           .flags = 0,               /* OK for unprivileged users. */
1521           .policy = datapath_policy,
1522           .doit = odp_dp_cmd_get,
1523           .dumpit = odp_dp_cmd_dump
1524         },
1525         { .cmd = ODP_DP_CMD_SET,
1526           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1527           .policy = datapath_policy,
1528           .doit = odp_dp_cmd_set,
1529         },
1530 };
1531
1532 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
1533         [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1534         [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1535         [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1536         [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1537         [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1538         [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1539         [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1540 };
1541
1542 /* Called with RCU read lock. */
1543 static struct sk_buff *odp_vport_build_info(struct vport *vport, uint32_t total_len)
1544 {
1545         struct odp_vport *odp_vport;
1546         struct sk_buff *skb;
1547         struct nlattr *nla;
1548         int ifindex, iflink;
1549         int err;
1550
1551         skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1552         err = -ENOMEM;
1553         if (!skb)
1554                 goto err;
1555
1556         odp_vport = (struct odp_vport*)__skb_put(skb, sizeof(struct odp_vport));
1557         odp_vport->dp_idx = vport->dp->dp_idx;
1558         odp_vport->total_len = total_len;
1559
1560         NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1561         NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1562         NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1563
1564         nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1565         if (!nla)
1566                 goto nla_put_failure;
1567         if (vport_get_stats(vport, nla_data(nla)))
1568                 __skb_trim(skb, skb->len - nla->nla_len);
1569
1570         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1571
1572         NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
1573
1574         err = vport_get_options(vport, skb);
1575
1576         ifindex = vport_get_ifindex(vport);
1577         if (ifindex > 0)
1578                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1579
1580         iflink = vport_get_iflink(vport);
1581         if (iflink > 0)
1582                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1583
1584         err = -EMSGSIZE;
1585         if (skb->len > total_len)
1586                 goto err_free;
1587
1588         odp_vport->len = skb->len;
1589         return skb;
1590
1591 nla_put_failure:
1592         err = -EMSGSIZE;
1593 err_free:
1594         kfree_skb(skb);
1595 err:
1596         return ERR_PTR(err);
1597 }
1598
1599 static struct sk_buff *copy_vport_from_user(struct odp_vport __user *uodp_vport,
1600                                             struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1601 {
1602         struct odp_vport *odp_vport;
1603         struct sk_buff *skb;
1604         u32 len;
1605         int err;
1606
1607         if (get_user(len, &uodp_vport->len))
1608                 return ERR_PTR(-EFAULT);
1609         if (len < sizeof(struct odp_vport))
1610                 return ERR_PTR(-EINVAL);
1611
1612         skb = alloc_skb(len, GFP_KERNEL);
1613         if (!skb)
1614                 return ERR_PTR(-ENOMEM);
1615
1616         err = -EFAULT;
1617         if (copy_from_user(__skb_put(skb, len), uodp_vport, len))
1618                 goto error_free_skb;
1619
1620         odp_vport = (struct odp_vport *)skb->data;
1621         err = -EINVAL;
1622         if (odp_vport->len != len)
1623                 goto error_free_skb;
1624
1625         err = nla_parse(a, ODP_VPORT_ATTR_MAX, (struct nlattr *)(skb->data + sizeof(struct odp_vport)),
1626                         skb->len - sizeof(struct odp_vport), vport_policy);
1627         if (err)
1628                 goto error_free_skb;
1629
1630         err = VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1631         if (err)
1632                 goto error_free_skb;
1633
1634         return skb;
1635
1636 error_free_skb:
1637         kfree_skb(skb);
1638         return ERR_PTR(err);
1639 }
1640
1641 /* Called with RTNL lock or RCU read lock. */
1642 static struct vport *lookup_vport(struct odp_vport *odp_vport,
1643                                   struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1644 {
1645         struct datapath *dp;
1646         struct vport *vport;
1647
1648         if (a[ODP_VPORT_ATTR_NAME]) {
1649                 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
1650                 if (!vport)
1651                         return ERR_PTR(-ENODEV);
1652                 return vport;
1653         } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1654                 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1655
1656                 if (port_no >= DP_MAX_PORTS)
1657                         return ERR_PTR(-EINVAL);
1658
1659                 dp = get_dp(odp_vport->dp_idx);
1660                 if (!dp)
1661                         return ERR_PTR(-ENODEV);
1662
1663                 vport = get_vport_protected(dp, port_no);
1664                 if (!vport)
1665                         return ERR_PTR(-ENOENT);
1666                 return vport;
1667         } else
1668                 return ERR_PTR(-EINVAL);
1669 }
1670
1671 /* Called with RTNL lock. */
1672 static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1673 {
1674         int err = 0;
1675         if (a[ODP_VPORT_ATTR_STATS])
1676                 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1677         if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1678                 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1679         if (!err && a[ODP_VPORT_ATTR_MTU])
1680                 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1681         return err;
1682 }
1683
1684 static int attach_vport(struct odp_vport __user *uodp_vport)
1685 {
1686         struct nlattr *a[ODP_VPORT_ATTR_MAX + 1];
1687         struct odp_vport *odp_vport;
1688         struct vport_parms parms;
1689         struct sk_buff *reply;
1690         struct vport *vport;
1691         struct sk_buff *skb;
1692         struct datapath *dp;
1693         u32 port_no;
1694         int err;
1695
1696         skb = copy_vport_from_user(uodp_vport, a);
1697         err = PTR_ERR(skb);
1698         if (IS_ERR(skb))
1699                 goto exit;
1700         odp_vport = (struct odp_vport *)skb->data;
1701
1702         err = -EINVAL;
1703         if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
1704                 goto exit_kfree_skb;
1705
1706         rtnl_lock();
1707         dp = get_dp(odp_vport->dp_idx);
1708         err = -ENODEV;
1709         if (!dp)
1710                 goto exit_unlock;
1711
1712         if (a[ODP_VPORT_ATTR_PORT_NO]) {
1713                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1714
1715                 err = -EFBIG;
1716                 if (port_no >= DP_MAX_PORTS)
1717                         goto exit_unlock;
1718
1719                 vport = get_vport_protected(dp, port_no);
1720                 err = -EBUSY;
1721                 if (vport)
1722                         goto exit_unlock;
1723         } else {
1724                 for (port_no = 1; ; port_no++) {
1725                         if (port_no >= DP_MAX_PORTS) {
1726                                 err = -EFBIG;
1727                                 goto exit_unlock;
1728                         }
1729                         vport = get_vport_protected(dp, port_no);
1730                         if (!vport)
1731                                 break;
1732                 }
1733         }
1734
1735         parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1736         parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1737         parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1738         parms.dp = dp;
1739         parms.port_no = port_no;
1740
1741         vport = new_vport(&parms);
1742         err = PTR_ERR(vport);
1743         if (IS_ERR(vport))
1744                 goto exit_unlock;
1745
1746         set_internal_devs_mtu(dp);
1747         dp_sysfs_add_if(vport);
1748
1749         err = change_vport(vport, a);
1750         if (err) {
1751                 dp_detach_port(vport);
1752                 goto exit_unlock;
1753         }
1754
1755         reply = odp_vport_build_info(vport, odp_vport->total_len);
1756         err = PTR_ERR(reply);
1757         if (IS_ERR(reply))
1758                 goto exit_unlock;
1759
1760         err = copy_to_user(uodp_vport, reply->data, reply->len) ? -EFAULT : 0;
1761         kfree_skb(reply);
1762
1763 exit_unlock:
1764         rtnl_unlock();
1765 exit_kfree_skb:
1766         kfree_skb(skb);
1767 exit:
1768         return err;
1769 }
1770
1771 static int set_vport(unsigned int cmd, struct odp_vport __user *uodp_vport)
1772 {
1773         struct nlattr *a[ODP_VPORT_ATTR_MAX + 1];
1774         struct vport *vport;
1775         struct sk_buff *skb;
1776         int err;
1777
1778         skb = copy_vport_from_user(uodp_vport, a);
1779         err = PTR_ERR(skb);
1780         if (IS_ERR(skb))
1781                 goto exit;
1782
1783         rtnl_lock();
1784         vport = lookup_vport((struct odp_vport *)skb->data, a);
1785         err = PTR_ERR(vport);
1786         if (IS_ERR(vport))
1787                 goto exit_free;
1788
1789         err = 0;
1790         if (a[ODP_VPORT_ATTR_OPTIONS])
1791                 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1792         if (!err)
1793                 err = change_vport(vport, a);
1794
1795 exit_free:
1796         kfree_skb(skb);
1797         rtnl_unlock();
1798 exit:
1799         return err;
1800 }
1801
1802 static int del_vport(unsigned int cmd, struct odp_vport __user *uodp_vport)
1803 {
1804         struct nlattr *a[ODP_VPORT_ATTR_MAX + 1];
1805         struct vport *vport;
1806         struct sk_buff *skb;
1807         int err;
1808
1809         skb = copy_vport_from_user(uodp_vport, a);
1810         err = PTR_ERR(skb);
1811         if (IS_ERR(skb))
1812                 goto exit;
1813
1814         rtnl_lock();
1815         vport = lookup_vport((struct odp_vport *)skb->data, a);
1816         err = PTR_ERR(vport);
1817         if (!IS_ERR(vport))
1818                 err = dp_detach_port(vport);
1819
1820         kfree_skb(skb);
1821         rtnl_unlock();
1822 exit:
1823         return err;
1824 }
1825
1826 static int get_vport(struct odp_vport __user *uodp_vport)
1827 {
1828         struct nlattr *a[ODP_VPORT_ATTR_MAX + 1];
1829         struct odp_vport *odp_vport;
1830         struct sk_buff *reply;
1831         struct vport *vport;
1832         struct sk_buff *skb;
1833         int err;
1834
1835         skb = copy_vport_from_user(uodp_vport, a);
1836         err = PTR_ERR(skb);
1837         if (IS_ERR(skb))
1838                 goto err;
1839         odp_vport = (struct odp_vport *)skb->data;
1840
1841         rcu_read_lock();
1842         vport = lookup_vport(odp_vport, a);
1843         err = PTR_ERR(vport);
1844         if (IS_ERR(vport))
1845                 goto err_unlock_rcu;
1846         reply = odp_vport_build_info(vport, odp_vport->total_len);
1847         rcu_read_unlock();
1848
1849         err = PTR_ERR(reply);
1850         if (IS_ERR(reply))
1851                 goto err_kfree_skb;
1852
1853         err = copy_to_user(uodp_vport, reply->data, reply->len) ? -EFAULT : 0;
1854         kfree_skb(reply);
1855         kfree_skb(skb);
1856
1857         return err;
1858
1859 err_unlock_rcu:
1860         rcu_read_unlock();
1861 err_kfree_skb:
1862         kfree_skb(skb);
1863 err:
1864         return err;
1865 }
1866
1867 static int dump_vport(struct odp_vport __user *uodp_vport)
1868 {
1869         struct nlattr *a[ODP_VPORT_ATTR_MAX + 1];
1870         struct odp_vport *odp_vport;
1871         struct sk_buff *skb;
1872         struct datapath *dp;
1873         u32 port_no;
1874         int err;
1875
1876         skb = copy_vport_from_user(uodp_vport, a);
1877         err = PTR_ERR(skb);
1878         if (IS_ERR(skb))
1879                 goto err;
1880         odp_vport = (struct odp_vport *)skb->data;
1881
1882         dp = get_dp(odp_vport->dp_idx);
1883         err = -ENODEV;
1884         if (!dp)
1885                 goto err_kfree_skb;
1886
1887         port_no = 0;
1888         if (a[ODP_VPORT_ATTR_PORT_NO])
1889                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1890
1891         rcu_read_lock();
1892         for (; port_no < DP_MAX_PORTS; port_no++) {
1893                 struct sk_buff *skb_out;
1894                 struct vport *vport;
1895                 int retval;
1896
1897                 vport = get_vport_protected(dp, port_no);
1898                 if (!vport)
1899                         continue;
1900
1901                 skb_out = odp_vport_build_info(vport, odp_vport->total_len);
1902                 rcu_read_unlock();
1903
1904                 err = PTR_ERR(skb_out);
1905                 if (IS_ERR(skb_out))
1906                         goto err_kfree_skb;
1907
1908                 retval = copy_to_user(uodp_vport, skb_out->data, skb_out->len);
1909                 kfree_skb(skb_out);
1910                 kfree_skb(skb);
1911
1912                 return retval ? -EFAULT : 0;
1913         }
1914         rcu_read_unlock();
1915         err = -ENODEV;
1916
1917 err_kfree_skb:
1918         kfree_skb(skb);
1919 err:
1920         return err;
1921 }
1922
1923 static long openvswitch_ioctl(struct file *f, unsigned int cmd,
1924                            unsigned long argp)
1925 {
1926         int err;
1927
1928         genl_lock();
1929         switch (cmd) {
1930         case ODP_VPORT_NEW:
1931                 err = attach_vport((struct odp_vport __user *)argp);
1932                 goto exit;
1933
1934         case ODP_VPORT_GET:
1935                 err = get_vport((struct odp_vport __user *)argp);
1936                 goto exit;
1937
1938         case ODP_VPORT_DEL:
1939                 err = del_vport(cmd, (struct odp_vport __user *)argp);
1940                 goto exit;
1941
1942         case ODP_VPORT_SET:
1943                 err = set_vport(cmd, (struct odp_vport __user *)argp);
1944                 goto exit;
1945
1946         case ODP_VPORT_DUMP:
1947                 err = dump_vport((struct odp_vport __user *)argp);
1948                 goto exit;
1949
1950         case ODP_FLOW_FLUSH:
1951                 err = flush_flows(argp);
1952                 goto exit;
1953
1954         case ODP_FLOW_NEW:
1955         case ODP_FLOW_SET:
1956                 err = new_flow(cmd, (struct odp_flow __user *)argp);
1957                 goto exit;
1958
1959         case ODP_FLOW_GET:
1960         case ODP_FLOW_DEL:
1961                 err = get_or_del_flow(cmd, (struct odp_flow __user *)argp);
1962                 goto exit;
1963
1964         case ODP_FLOW_DUMP:
1965                 err = dump_flow((struct odp_flow __user *)argp);
1966                 goto exit;
1967
1968         default:
1969                 err = -ENOIOCTLCMD;
1970                 break;
1971         }
1972 exit:
1973         genl_unlock();
1974         return err;
1975 }
1976
1977 #ifdef CONFIG_COMPAT
1978 static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
1979 {
1980         switch (cmd) {
1981         case ODP_FLOW_FLUSH:
1982                 /* Ioctls that don't need any translation at all. */
1983                 return openvswitch_ioctl(f, cmd, argp);
1984
1985         case ODP_VPORT_NEW:
1986         case ODP_VPORT_DEL:
1987         case ODP_VPORT_GET:
1988         case ODP_VPORT_SET:
1989         case ODP_VPORT_DUMP:
1990         case ODP_FLOW_NEW:
1991         case ODP_FLOW_DEL:
1992         case ODP_FLOW_GET:
1993         case ODP_FLOW_SET:
1994         case ODP_FLOW_DUMP:
1995                 /* Ioctls that just need their pointer argument extended. */
1996                 return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
1997
1998         default:
1999                 return -ENOIOCTLCMD;
2000         }
2001 }
2002 #endif
2003
2004 static struct file_operations openvswitch_fops = {
2005         .owner = THIS_MODULE,
2006         .unlocked_ioctl = openvswitch_ioctl,
2007 #ifdef CONFIG_COMPAT
2008         .compat_ioctl = openvswitch_compat_ioctl,
2009 #endif
2010 };
2011
2012 static int major;
2013
2014 struct genl_family_and_ops {
2015         struct genl_family *family;
2016         struct genl_ops *ops;
2017         int n_ops;
2018         struct genl_multicast_group *group;
2019 };
2020
2021 static const struct genl_family_and_ops dp_genl_families[] = {
2022         { &dp_datapath_genl_family,
2023           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
2024           &dp_datapath_multicast_group },
2025         { &dp_packet_genl_family,
2026           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2027           NULL },
2028 };
2029
2030 static void dp_unregister_genl(int n_families)
2031 {
2032         int i;
2033
2034         for (i = 0; i < n_families; i++) {
2035                 genl_unregister_family(dp_genl_families[i].family);
2036         }
2037 }
2038
2039 static int dp_register_genl(void)
2040 {
2041         int n_registered;
2042         int err;
2043         int i;
2044
2045         n_registered = 0;
2046         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2047                 const struct genl_family_and_ops *f = &dp_genl_families[i];
2048
2049                 err = genl_register_family_with_ops(f->family, f->ops,
2050                                                     f->n_ops);
2051                 if (err)
2052                         goto error;
2053                 n_registered++;
2054
2055                 if (f->group) {
2056                         err = genl_register_mc_group(f->family, f->group);
2057                         if (err)
2058                                 goto error;
2059                 }
2060         }
2061
2062         err = packet_register_mc_groups();
2063         if (err)
2064                 goto error;
2065         return 0;
2066
2067 error:
2068         dp_unregister_genl(n_registered);
2069         return err;
2070 }
2071
2072 static int __init dp_init(void)
2073 {
2074         struct sk_buff *dummy_skb;
2075         int err;
2076
2077         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2078
2079         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2080
2081         err = flow_init();
2082         if (err)
2083                 goto error;
2084
2085         err = vport_init();
2086         if (err)
2087                 goto error_flow_exit;
2088
2089         err = register_netdevice_notifier(&dp_device_notifier);
2090         if (err)
2091                 goto error_vport_exit;
2092
2093         major = register_chrdev(0, "openvswitch", &openvswitch_fops);
2094         if (err < 0)
2095                 goto error_unreg_notifier;
2096
2097         err = dp_register_genl();
2098         if (err < 0)
2099                 goto error_unreg_chrdev;
2100
2101         return 0;
2102
2103 error_unreg_chrdev:
2104         unregister_chrdev(major, "openvswitch");
2105 error_unreg_notifier:
2106         unregister_netdevice_notifier(&dp_device_notifier);
2107 error_vport_exit:
2108         vport_exit();
2109 error_flow_exit:
2110         flow_exit();
2111 error:
2112         return err;
2113 }
2114
2115 static void dp_cleanup(void)
2116 {
2117         rcu_barrier();
2118         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2119         unregister_chrdev(major, "openvswitch");
2120         unregister_netdevice_notifier(&dp_device_notifier);
2121         vport_exit();
2122         flow_exit();
2123 }
2124
2125 module_init(dp_init);
2126 module_exit(dp_cleanup);
2127
2128 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2129 MODULE_LICENSE("GPL");