ofproto: Reinterpret meaning of OpenFlow hard timeouts with OFPFC_MODIFY.
[openvswitch] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "vlan.h"
53 #include "tunnel.h"
54 #include "vport-internal_dev.h"
55
56 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
57     LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
58 #error Kernels before 2.6.18 or after 3.0 are not supported by this version of Open vSwitch.
59 #endif
60
61 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
62 EXPORT_SYMBOL(dp_ioctl_hook);
63
64 /**
65  * DOC: Locking:
66  *
67  * Writes to device state (add/remove datapath, port, set operations on vports,
68  * etc.) are protected by RTNL.
69  *
70  * Writes to other state (flow table modifications, set miscellaneous datapath
71  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
72  * lock nests inside genl_mutex.
73  *
74  * Reads are protected by RCU.
75  *
76  * There are a few special cases (mostly stats) that have their own
77  * synchronization but they nest under all of above and don't interact with
78  * each other.
79  */
80
81 /* Global list of datapaths to enable dumping them all out.
82  * Protected by genl_mutex.
83  */
84 static LIST_HEAD(dps);
85
86 static struct vport *new_vport(const struct vport_parms *);
87 static int queue_userspace_packets(struct datapath *, struct sk_buff *,
88                                  const struct dp_upcall_info *);
89
90 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
91 struct datapath *get_dp(int dp_ifindex)
92 {
93         struct datapath *dp = NULL;
94         struct net_device *dev;
95
96         rcu_read_lock();
97         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
98         if (dev) {
99                 struct vport *vport = internal_dev_get_vport(dev);
100                 if (vport)
101                         dp = vport->dp;
102         }
103         rcu_read_unlock();
104
105         return dp;
106 }
107 EXPORT_SYMBOL_GPL(get_dp);
108
109 /* Must be called with genl_mutex. */
110 static struct flow_table *get_table_protected(struct datapath *dp)
111 {
112         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
113 }
114
115 /* Must be called with rcu_read_lock or RTNL lock. */
116 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
117 {
118         return rcu_dereference_rtnl(dp->ports[port_no]);
119 }
120
121 /* Must be called with rcu_read_lock or RTNL lock. */
122 const char *dp_name(const struct datapath *dp)
123 {
124         return vport_get_name(rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]));
125 }
126
127 static inline size_t br_nlmsg_size(void)
128 {
129         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
130                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
131                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
132                + nla_total_size(4) /* IFLA_MASTER */
133                + nla_total_size(4) /* IFLA_MTU */
134                + nla_total_size(1); /* IFLA_OPERSTATE */
135 }
136
137 /* Caller must hold RTNL lock. */
138 static int dp_fill_ifinfo(struct sk_buff *skb,
139                           const struct vport *port,
140                           int event, unsigned int flags)
141 {
142         struct datapath *dp = port->dp;
143         int ifindex = vport_get_ifindex(port);
144         struct ifinfomsg *hdr;
145         struct nlmsghdr *nlh;
146
147         if (ifindex < 0)
148                 return ifindex;
149
150         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
151         if (nlh == NULL)
152                 return -EMSGSIZE;
153
154         hdr = nlmsg_data(nlh);
155         hdr->ifi_family = AF_BRIDGE;
156         hdr->__ifi_pad = 0;
157         hdr->ifi_type = ARPHRD_ETHER;
158         hdr->ifi_index = ifindex;
159         hdr->ifi_flags = vport_get_flags(port);
160         hdr->ifi_change = 0;
161
162         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
163         NLA_PUT_U32(skb, IFLA_MASTER,
164                 vport_get_ifindex(get_vport_protected(dp, OVSP_LOCAL)));
165         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
166 #ifdef IFLA_OPERSTATE
167         NLA_PUT_U8(skb, IFLA_OPERSTATE,
168                    vport_is_running(port)
169                         ? vport_get_operstate(port)
170                         : IF_OPER_DOWN);
171 #endif
172
173         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
174
175         return nlmsg_end(skb, nlh);
176
177 nla_put_failure:
178         nlmsg_cancel(skb, nlh);
179         return -EMSGSIZE;
180 }
181
182 /* Caller must hold RTNL lock. */
183 static void dp_ifinfo_notify(int event, struct vport *port)
184 {
185         struct sk_buff *skb;
186         int err = -ENOBUFS;
187
188         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
189         if (skb == NULL)
190                 goto errout;
191
192         err = dp_fill_ifinfo(skb, port, event, 0);
193         if (err < 0) {
194                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
195                 WARN_ON(err == -EMSGSIZE);
196                 kfree_skb(skb);
197                 goto errout;
198         }
199         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
200         return;
201 errout:
202         if (err < 0)
203                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
204 }
205
206 static void release_dp(struct kobject *kobj)
207 {
208         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
209         kfree(dp);
210 }
211
212 static struct kobj_type dp_ktype = {
213         .release = release_dp
214 };
215
216 static void destroy_dp_rcu(struct rcu_head *rcu)
217 {
218         struct datapath *dp = container_of(rcu, struct datapath, rcu);
219
220         flow_tbl_destroy(dp->table);
221         free_percpu(dp->stats_percpu);
222         kobject_put(&dp->ifobj);
223 }
224
225 /* Called with RTNL lock and genl_lock. */
226 static struct vport *new_vport(const struct vport_parms *parms)
227 {
228         struct vport *vport;
229
230         vport = vport_add(parms);
231         if (!IS_ERR(vport)) {
232                 struct datapath *dp = parms->dp;
233
234                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
235                 list_add(&vport->node, &dp->port_list);
236
237                 dp_ifinfo_notify(RTM_NEWLINK, vport);
238         }
239
240         return vport;
241 }
242
243 /* Called with RTNL lock. */
244 void dp_detach_port(struct vport *p)
245 {
246         ASSERT_RTNL();
247
248         if (p->port_no != OVSP_LOCAL)
249                 dp_sysfs_del_if(p);
250         dp_ifinfo_notify(RTM_DELLINK, p);
251
252         /* First drop references to device. */
253         list_del(&p->node);
254         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
255
256         /* Then destroy it. */
257         vport_del(p);
258 }
259
260 /* Must be called with rcu_read_lock. */
261 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
262 {
263         struct datapath *dp = p->dp;
264         struct sw_flow *flow;
265         struct dp_stats_percpu *stats;
266         int stats_counter_off;
267         int error;
268
269         OVS_CB(skb)->vport = p;
270
271         if (!OVS_CB(skb)->flow) {
272                 struct sw_flow_key key;
273                 int key_len;
274                 bool is_frag;
275
276                 /* Extract flow from 'skb' into 'key'. */
277                 error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
278                 if (unlikely(error)) {
279                         kfree_skb(skb);
280                         return;
281                 }
282
283                 if (is_frag && dp->drop_frags) {
284                         consume_skb(skb);
285                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
286                         goto out;
287                 }
288
289                 /* Look up flow. */
290                 flow = flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
291                 if (unlikely(!flow)) {
292                         struct dp_upcall_info upcall;
293
294                         upcall.cmd = OVS_PACKET_CMD_MISS;
295                         upcall.key = &key;
296                         upcall.userdata = 0;
297                         upcall.sample_pool = 0;
298                         upcall.actions = NULL;
299                         upcall.actions_len = 0;
300                         dp_upcall(dp, skb, &upcall);
301                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
302                         goto out;
303                 }
304
305                 OVS_CB(skb)->flow = flow;
306         }
307
308         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
309         flow_used(OVS_CB(skb)->flow, skb);
310         execute_actions(dp, skb);
311
312 out:
313         /* Update datapath statistics. */
314         local_bh_disable();
315         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
316
317         write_seqcount_begin(&stats->seqlock);
318         (*(u64 *)((u8 *)stats + stats_counter_off))++;
319         write_seqcount_end(&stats->seqlock);
320
321         local_bh_enable();
322 }
323
324 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
325 {
326         u16 csum_start, csum_offset;
327         __wsum csum;
328
329         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
330         csum_start -= skb_headroom(skb);
331
332         skb_copy_bits(skb, 0, to, csum_start);
333
334         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
335                                       skb->len - csum_start, 0);
336         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
337 }
338
339 static struct genl_family dp_packet_genl_family = {
340         .id = GENL_ID_GENERATE,
341         .hdrsize = sizeof(struct ovs_header),
342         .name = OVS_PACKET_FAMILY,
343         .version = 1,
344         .maxattr = OVS_PACKET_ATTR_MAX
345 };
346
347 /* Generic Netlink multicast groups for upcalls.
348  *
349  * We really want three unique multicast groups per datapath, but we can't even
350  * get one, because genl_register_mc_group() takes genl_lock, which is also
351  * held during Generic Netlink message processing, so trying to acquire
352  * multicast groups during OVS_DP_NEW processing deadlocks.  Instead, we
353  * preallocate a few groups and use them round-robin for datapaths.  Collision
354  * isn't fatal--multicast listeners should check that the family is the one
355  * that they want and discard others--but it wastes time and memory to receive
356  * unwanted messages.
357  */
358 #define PACKET_N_MC_GROUPS 16
359 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
360
361 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
362 {
363         u32 idx;
364         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
365
366         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
367         return packet_mc_groups[idx].id;
368 }
369
370 static int packet_register_mc_groups(void)
371 {
372         int i;
373
374         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
375                 struct genl_multicast_group *group = &packet_mc_groups[i];
376                 int error;
377
378                 sprintf(group->name, "packet%d", i);
379                 error = genl_register_mc_group(&dp_packet_genl_family, group);
380                 if (error)
381                         return error;
382         }
383         return 0;
384 }
385
386 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
387 {
388         struct dp_stats_percpu *stats;
389         int err;
390
391         WARN_ON_ONCE(skb_shared(skb));
392
393         forward_ip_summed(skb, true);
394
395         /* Break apart GSO packets into their component pieces.  Otherwise
396          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
397         if (skb_is_gso(skb)) {
398                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
399                 
400                 if (IS_ERR(nskb)) {
401                         kfree_skb(skb);
402                         err = PTR_ERR(nskb);
403                         goto err;
404                 }
405                 consume_skb(skb);
406                 skb = nskb;
407         }
408
409         err = queue_userspace_packets(dp, skb, upcall_info);
410         if (err)
411                 goto err;
412
413         return 0;
414
415 err:
416         local_bh_disable();
417         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
418
419         write_seqcount_begin(&stats->seqlock);
420         stats->n_lost++;
421         write_seqcount_end(&stats->seqlock);
422
423         local_bh_enable();
424
425         return err;
426 }
427
428 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
429  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
430  * packet.
431  */
432 static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
433                                  const struct dp_upcall_info *upcall_info)
434 {
435         u32 group = packet_mc_group(dp, upcall_info->cmd);
436         struct sk_buff *nskb;
437         int err;
438
439         do {
440                 struct ovs_header *upcall;
441                 struct sk_buff *user_skb; /* to be queued to userspace */
442                 struct nlattr *nla;
443                 unsigned int len;
444
445                 nskb = skb->next;
446                 skb->next = NULL;
447
448                 err = vlan_deaccel_tag(skb);
449                 if (unlikely(err))
450                         goto err_kfree_skbs;
451
452                 if (nla_attr_size(skb->len) > USHRT_MAX)
453                         goto err_kfree_skbs;
454
455                 len = sizeof(struct ovs_header);
456                 len += nla_total_size(skb->len);
457                 len += nla_total_size(FLOW_BUFSIZE);
458                 if (upcall_info->userdata)
459                         len += nla_total_size(8);
460                 if (upcall_info->sample_pool)
461                         len += nla_total_size(4);
462                 if (upcall_info->actions_len)
463                         len += nla_total_size(upcall_info->actions_len);
464
465                 user_skb = genlmsg_new(len, GFP_ATOMIC);
466                 if (!user_skb) {
467                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
468                         goto err_kfree_skbs;
469                 }
470
471                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
472                 upcall->dp_ifindex = dp->dp_ifindex;
473
474                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
475                 flow_to_nlattrs(upcall_info->key, user_skb);
476                 nla_nest_end(user_skb, nla);
477
478                 if (upcall_info->userdata)
479                         nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, upcall_info->userdata);
480                 if (upcall_info->sample_pool)
481                         nla_put_u32(user_skb, OVS_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
482                 if (upcall_info->actions_len) {
483                         const struct nlattr *actions = upcall_info->actions;
484                         u32 actions_len = upcall_info->actions_len;
485
486                         nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
487                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
488                         nla_nest_end(user_skb, nla);
489                 }
490
491                 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
492                 if (skb->ip_summed == CHECKSUM_PARTIAL)
493                         copy_and_csum_skb(skb, nla_data(nla));
494                 else
495                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
496
497                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
498                 if (err)
499                         goto err_kfree_skbs;
500
501                 consume_skb(skb);
502                 skb = nskb;
503         } while (skb);
504         return 0;
505
506 err_kfree_skbs:
507         kfree_skb(skb);
508         while ((skb = nskb) != NULL) {
509                 nskb = skb->next;
510                 kfree_skb(skb);
511         }
512         return err;
513 }
514
515 /* Called with genl_mutex. */
516 static int flush_flows(int dp_ifindex)
517 {
518         struct flow_table *old_table;
519         struct flow_table *new_table;
520         struct datapath *dp;
521
522         dp = get_dp(dp_ifindex);
523         if (!dp)
524                 return -ENODEV;
525
526         old_table = get_table_protected(dp);
527         new_table = flow_tbl_alloc(TBL_MIN_BUCKETS);
528         if (!new_table)
529                 return -ENOMEM;
530
531         rcu_assign_pointer(dp->table, new_table);
532
533         flow_tbl_deferred_destroy(old_table);
534         return 0;
535 }
536
537 static int validate_actions(const struct nlattr *attr)
538 {
539         const struct nlattr *a;
540         int rem;
541
542         nla_for_each_nested(a, attr, rem) {
543                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
544                         [OVS_ACTION_ATTR_OUTPUT] = 4,
545                         [OVS_ACTION_ATTR_USERSPACE] = 8,
546                         [OVS_ACTION_ATTR_PUSH_VLAN] = 2,
547                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
548                         [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
549                         [OVS_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
550                         [OVS_ACTION_ATTR_SET_NW_SRC] = 4,
551                         [OVS_ACTION_ATTR_SET_NW_DST] = 4,
552                         [OVS_ACTION_ATTR_SET_NW_TOS] = 1,
553                         [OVS_ACTION_ATTR_SET_TP_SRC] = 2,
554                         [OVS_ACTION_ATTR_SET_TP_DST] = 2,
555                         [OVS_ACTION_ATTR_SET_TUNNEL] = 8,
556                         [OVS_ACTION_ATTR_SET_PRIORITY] = 4,
557                         [OVS_ACTION_ATTR_POP_PRIORITY] = 0,
558                 };
559                 int type = nla_type(a);
560
561                 if (type > OVS_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
562                         return -EINVAL;
563
564                 switch (type) {
565                 case OVS_ACTION_ATTR_UNSPEC:
566                         return -EINVAL;
567
568                 case OVS_ACTION_ATTR_USERSPACE:
569                 case OVS_ACTION_ATTR_POP_VLAN:
570                 case OVS_ACTION_ATTR_SET_DL_SRC:
571                 case OVS_ACTION_ATTR_SET_DL_DST:
572                 case OVS_ACTION_ATTR_SET_NW_SRC:
573                 case OVS_ACTION_ATTR_SET_NW_DST:
574                 case OVS_ACTION_ATTR_SET_TP_SRC:
575                 case OVS_ACTION_ATTR_SET_TP_DST:
576                 case OVS_ACTION_ATTR_SET_TUNNEL:
577                 case OVS_ACTION_ATTR_SET_PRIORITY:
578                 case OVS_ACTION_ATTR_POP_PRIORITY:
579                         /* No validation needed. */
580                         break;
581
582                 case OVS_ACTION_ATTR_OUTPUT:
583                         if (nla_get_u32(a) >= DP_MAX_PORTS)
584                                 return -EINVAL;
585                         break;
586
587                 case OVS_ACTION_ATTR_PUSH_VLAN:
588                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
589                                 return -EINVAL;
590                         break;
591
592                 case OVS_ACTION_ATTR_SET_NW_TOS:
593                         if (nla_get_u8(a) & INET_ECN_MASK)
594                                 return -EINVAL;
595                         break;
596
597                 default:
598                         return -EOPNOTSUPP;
599                 }
600         }
601
602         if (rem > 0)
603                 return -EINVAL;
604
605         return 0;
606 }
607 static void clear_stats(struct sw_flow *flow)
608 {
609         flow->used = 0;
610         flow->tcp_flags = 0;
611         flow->packet_count = 0;
612         flow->byte_count = 0;
613 }
614
615 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
616 {
617         struct ovs_header *ovs_header = info->userhdr;
618         struct nlattr **a = info->attrs;
619         struct sw_flow_actions *acts;
620         struct sk_buff *packet;
621         struct sw_flow *flow;
622         struct datapath *dp;
623         struct ethhdr *eth;
624         bool is_frag;
625         int len;
626         int err;
627         int key_len;
628
629         err = -EINVAL;
630         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
631             !a[OVS_PACKET_ATTR_ACTIONS] ||
632             nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
633                 goto err;
634
635         err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS]);
636         if (err)
637                 goto err;
638
639         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
640         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
641         err = -ENOMEM;
642         if (!packet)
643                 goto err;
644         skb_reserve(packet, NET_IP_ALIGN);
645
646         memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
647
648         skb_reset_mac_header(packet);
649         eth = eth_hdr(packet);
650
651         /* Normally, setting the skb 'protocol' field would be handled by a
652          * call to eth_type_trans(), but it assumes there's a sending
653          * device, which we may not have. */
654         if (ntohs(eth->h_proto) >= 1536)
655                 packet->protocol = eth->h_proto;
656         else
657                 packet->protocol = htons(ETH_P_802_2);
658
659         /* Build an sw_flow for sending this packet. */
660         flow = flow_alloc();
661         err = PTR_ERR(flow);
662         if (IS_ERR(flow))
663                 goto err_kfree_skb;
664
665         err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
666         if (err)
667                 goto err_flow_put;
668
669         err = flow_metadata_from_nlattrs(&flow->key.eth.in_port,
670                                          &flow->key.eth.tun_id,
671                                          a[OVS_PACKET_ATTR_KEY]);
672         if (err)
673                 goto err_flow_put;
674
675         flow->hash = flow_hash(&flow->key, key_len);
676
677         acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
678         err = PTR_ERR(acts);
679         if (IS_ERR(acts))
680                 goto err_flow_put;
681         rcu_assign_pointer(flow->sf_acts, acts);
682
683         OVS_CB(packet)->flow = flow;
684
685         rcu_read_lock();
686         dp = get_dp(ovs_header->dp_ifindex);
687         err = -ENODEV;
688         if (!dp)
689                 goto err_unlock;
690         err = execute_actions(dp, packet);
691         rcu_read_unlock();
692
693         flow_put(flow);
694         return err;
695
696 err_unlock:
697         rcu_read_unlock();
698 err_flow_put:
699         flow_put(flow);
700 err_kfree_skb:
701         kfree_skb(packet);
702 err:
703         return err;
704 }
705
706 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
707         [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
708         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
709         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
710 };
711
712 static struct genl_ops dp_packet_genl_ops[] = {
713         { .cmd = OVS_PACKET_CMD_EXECUTE,
714           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
715           .policy = packet_policy,
716           .doit = ovs_packet_cmd_execute
717         }
718 };
719
720 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
721 {
722         int i;
723         struct flow_table *table = get_table_protected(dp);
724
725         stats->n_flows = flow_tbl_count(table);
726
727         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
728         for_each_possible_cpu(i) {
729                 const struct dp_stats_percpu *percpu_stats;
730                 struct dp_stats_percpu local_stats;
731                 unsigned seqcount;
732
733                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
734
735                 do {
736                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
737                         local_stats = *percpu_stats;
738                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
739
740                 stats->n_frags += local_stats.n_frags;
741                 stats->n_hit += local_stats.n_hit;
742                 stats->n_missed += local_stats.n_missed;
743                 stats->n_lost += local_stats.n_lost;
744         }
745 }
746
747 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
748  * Called with RTNL lock.
749  */
750 int dp_min_mtu(const struct datapath *dp)
751 {
752         struct vport *p;
753         int mtu = 0;
754
755         ASSERT_RTNL();
756
757         list_for_each_entry (p, &dp->port_list, node) {
758                 int dev_mtu;
759
760                 /* Skip any internal ports, since that's what we're trying to
761                  * set. */
762                 if (is_internal_vport(p))
763                         continue;
764
765                 dev_mtu = vport_get_mtu(p);
766                 if (!dev_mtu)
767                         continue;
768                 if (!mtu || dev_mtu < mtu)
769                         mtu = dev_mtu;
770         }
771
772         return mtu ? mtu : ETH_DATA_LEN;
773 }
774
775 /* Sets the MTU of all datapath devices to the minimum of the ports
776  * Called with RTNL lock.
777  */
778 void set_internal_devs_mtu(const struct datapath *dp)
779 {
780         struct vport *p;
781         int mtu;
782
783         ASSERT_RTNL();
784
785         mtu = dp_min_mtu(dp);
786
787         list_for_each_entry (p, &dp->port_list, node) {
788                 if (is_internal_vport(p))
789                         vport_set_mtu(p, mtu);
790         }
791 }
792
793 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
794         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
795         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
796         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
797 };
798
799 static struct genl_family dp_flow_genl_family = {
800         .id = GENL_ID_GENERATE,
801         .hdrsize = sizeof(struct ovs_header),
802         .name = OVS_FLOW_FAMILY,
803         .version = 1,
804         .maxattr = OVS_FLOW_ATTR_MAX
805 };
806
807 static struct genl_multicast_group dp_flow_multicast_group = {
808         .name = OVS_FLOW_MCGROUP
809 };
810
811 /* Called with genl_lock. */
812 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
813                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
814 {
815         const int skb_orig_len = skb->len;
816         const struct sw_flow_actions *sf_acts;
817         struct ovs_flow_stats stats;
818         struct ovs_header *ovs_header;
819         struct nlattr *nla;
820         unsigned long used;
821         u8 tcp_flags;
822         int err;
823
824         sf_acts = rcu_dereference_protected(flow->sf_acts,
825                                             lockdep_genl_is_held());
826
827         ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
828         if (!ovs_header)
829                 return -EMSGSIZE;
830
831         ovs_header->dp_ifindex = dp->dp_ifindex;
832
833         nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
834         if (!nla)
835                 goto nla_put_failure;
836         err = flow_to_nlattrs(&flow->key, skb);
837         if (err)
838                 goto error;
839         nla_nest_end(skb, nla);
840
841         spin_lock_bh(&flow->lock);
842         used = flow->used;
843         stats.n_packets = flow->packet_count;
844         stats.n_bytes = flow->byte_count;
845         tcp_flags = flow->tcp_flags;
846         spin_unlock_bh(&flow->lock);
847
848         if (used)
849                 NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, flow_used_time(used));
850
851         if (stats.n_packets)
852                 NLA_PUT(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats);
853
854         if (tcp_flags)
855                 NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
856
857         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
858          * this is the first flow to be dumped into 'skb'.  This is unusual for
859          * Netlink but individual action lists can be longer than
860          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
861          * The userspace caller can always fetch the actions separately if it
862          * really wants them.  (Most userspace callers in fact don't care.)
863          *
864          * This can only fail for dump operations because the skb is always
865          * properly sized for single flows.
866          */
867         err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
868                       sf_acts->actions);
869         if (err < 0 && skb_orig_len)
870                 goto error;
871
872         return genlmsg_end(skb, ovs_header);
873
874 nla_put_failure:
875         err = -EMSGSIZE;
876 error:
877         genlmsg_cancel(skb, ovs_header);
878         return err;
879 }
880
881 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
882 {
883         const struct sw_flow_actions *sf_acts;
884         int len;
885
886         sf_acts = rcu_dereference_protected(flow->sf_acts,
887                                             lockdep_genl_is_held());
888
889         len = nla_total_size(FLOW_BUFSIZE); /* OVS_FLOW_ATTR_KEY */
890         len += nla_total_size(sf_acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
891         len += nla_total_size(sizeof(struct ovs_flow_stats)); /* OVS_FLOW_ATTR_STATS */
892         len += nla_total_size(1); /* OVS_FLOW_ATTR_TCP_FLAGS */
893         len += nla_total_size(8); /* OVS_FLOW_ATTR_USED */
894         return genlmsg_new(NLMSG_ALIGN(sizeof(struct ovs_header)) + len, GFP_KERNEL);
895 }
896
897 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
898                                                u32 pid, u32 seq, u8 cmd)
899 {
900         struct sk_buff *skb;
901         int retval;
902
903         skb = ovs_flow_cmd_alloc_info(flow);
904         if (!skb)
905                 return ERR_PTR(-ENOMEM);
906
907         retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
908         BUG_ON(retval < 0);
909         return skb;
910 }
911
912 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
913 {
914         struct nlattr **a = info->attrs;
915         struct ovs_header *ovs_header = info->userhdr;
916         struct sw_flow_key key;
917         struct sw_flow *flow;
918         struct sk_buff *reply;
919         struct datapath *dp;
920         struct flow_table *table;
921         int error;
922         int key_len;
923
924         /* Extract key. */
925         error = -EINVAL;
926         if (!a[OVS_FLOW_ATTR_KEY])
927                 goto error;
928         error = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
929         if (error)
930                 goto error;
931
932         /* Validate actions. */
933         if (a[OVS_FLOW_ATTR_ACTIONS]) {
934                 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS]);
935                 if (error)
936                         goto error;
937         } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
938                 error = -EINVAL;
939                 goto error;
940         }
941
942         dp = get_dp(ovs_header->dp_ifindex);
943         error = -ENODEV;
944         if (!dp)
945                 goto error;
946
947         table = get_table_protected(dp);
948         flow = flow_tbl_lookup(table, &key, key_len);
949         if (!flow) {
950                 struct sw_flow_actions *acts;
951
952                 /* Bail out if we're not allowed to create a new flow. */
953                 error = -ENOENT;
954                 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
955                         goto error;
956
957                 /* Expand table, if necessary, to make room. */
958                 if (flow_tbl_need_to_expand(table)) {
959                         struct flow_table *new_table;
960
961                         new_table = flow_tbl_expand(table);
962                         if (!IS_ERR(new_table)) {
963                                 rcu_assign_pointer(dp->table, new_table);
964                                 flow_tbl_deferred_destroy(table);
965                                 table = get_table_protected(dp);
966                         }
967                 }
968
969                 /* Allocate flow. */
970                 flow = flow_alloc();
971                 if (IS_ERR(flow)) {
972                         error = PTR_ERR(flow);
973                         goto error;
974                 }
975                 flow->key = key;
976                 clear_stats(flow);
977
978                 /* Obtain actions. */
979                 acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
980                 error = PTR_ERR(acts);
981                 if (IS_ERR(acts))
982                         goto error_free_flow;
983                 rcu_assign_pointer(flow->sf_acts, acts);
984
985                 /* Put flow in bucket. */
986                 flow->hash = flow_hash(&key, key_len);
987                 flow_tbl_insert(table, flow);
988
989                 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
990                                                 info->snd_seq, OVS_FLOW_CMD_NEW);
991         } else {
992                 /* We found a matching flow. */
993                 struct sw_flow_actions *old_acts;
994
995                 /* Bail out if we're not allowed to modify an existing flow.
996                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
997                  * because Generic Netlink treats the latter as a dump
998                  * request.  We also accept NLM_F_EXCL in case that bug ever
999                  * gets fixed.
1000                  */
1001                 error = -EEXIST;
1002                 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1003                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1004                         goto error;
1005
1006                 /* Update actions. */
1007                 old_acts = rcu_dereference_protected(flow->sf_acts,
1008                                                      lockdep_genl_is_held());
1009                 if (a[OVS_FLOW_ATTR_ACTIONS] &&
1010                     (old_acts->actions_len != nla_len(a[OVS_FLOW_ATTR_ACTIONS]) ||
1011                      memcmp(old_acts->actions, nla_data(a[OVS_FLOW_ATTR_ACTIONS]),
1012                             old_acts->actions_len))) {
1013                         struct sw_flow_actions *new_acts;
1014
1015                         new_acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
1016                         error = PTR_ERR(new_acts);
1017                         if (IS_ERR(new_acts))
1018                                 goto error;
1019
1020                         rcu_assign_pointer(flow->sf_acts, new_acts);
1021                         flow_deferred_free_acts(old_acts);
1022                 }
1023
1024                 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1025                                                 info->snd_seq, OVS_FLOW_CMD_NEW);
1026
1027                 /* Clear stats. */
1028                 if (a[OVS_FLOW_ATTR_CLEAR]) {
1029                         spin_lock_bh(&flow->lock);
1030                         clear_stats(flow);
1031                         spin_unlock_bh(&flow->lock);
1032                 }
1033         }
1034
1035         if (!IS_ERR(reply))
1036                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1037                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1038         else
1039                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1040                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1041         return 0;
1042
1043 error_free_flow:
1044         flow_put(flow);
1045 error:
1046         return error;
1047 }
1048
1049 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1050 {
1051         struct nlattr **a = info->attrs;
1052         struct ovs_header *ovs_header = info->userhdr;
1053         struct sw_flow_key key;
1054         struct sk_buff *reply;
1055         struct sw_flow *flow;
1056         struct datapath *dp;
1057         struct flow_table *table;
1058         int err;
1059         int key_len;
1060
1061         if (!a[OVS_FLOW_ATTR_KEY])
1062                 return -EINVAL;
1063         err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1064         if (err)
1065                 return err;
1066
1067         dp = get_dp(ovs_header->dp_ifindex);
1068         if (!dp)
1069                 return -ENODEV;
1070
1071         table = get_table_protected(dp);
1072         flow = flow_tbl_lookup(table, &key, key_len);
1073         if (!flow)
1074                 return -ENOENT;
1075
1076         reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, OVS_FLOW_CMD_NEW);
1077         if (IS_ERR(reply))
1078                 return PTR_ERR(reply);
1079
1080         return genlmsg_reply(reply, info);
1081 }
1082
1083 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1084 {
1085         struct nlattr **a = info->attrs;
1086         struct ovs_header *ovs_header = info->userhdr;
1087         struct sw_flow_key key;
1088         struct sk_buff *reply;
1089         struct sw_flow *flow;
1090         struct datapath *dp;
1091         struct flow_table *table;
1092         int err;
1093         int key_len;
1094
1095         if (!a[OVS_FLOW_ATTR_KEY])
1096                 return flush_flows(ovs_header->dp_ifindex);
1097         err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1098         if (err)
1099                 return err;
1100
1101         dp = get_dp(ovs_header->dp_ifindex);
1102         if (!dp)
1103                 return -ENODEV;
1104
1105         table = get_table_protected(dp);
1106         flow = flow_tbl_lookup(table, &key, key_len);
1107         if (!flow)
1108                 return -ENOENT;
1109
1110         reply = ovs_flow_cmd_alloc_info(flow);
1111         if (!reply)
1112                 return -ENOMEM;
1113
1114         flow_tbl_remove(table, flow);
1115
1116         err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1117                                      info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1118         BUG_ON(err < 0);
1119
1120         flow_deferred_free(flow);
1121
1122         genl_notify(reply, genl_info_net(info), info->snd_pid,
1123                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1124         return 0;
1125 }
1126
1127 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1128 {
1129         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1130         struct datapath *dp;
1131
1132         dp = get_dp(ovs_header->dp_ifindex);
1133         if (!dp)
1134                 return -ENODEV;
1135
1136         for (;;) {
1137                 struct sw_flow *flow;
1138                 u32 bucket, obj;
1139
1140                 bucket = cb->args[0];
1141                 obj = cb->args[1];
1142                 flow = flow_tbl_next(get_table_protected(dp), &bucket, &obj);
1143                 if (!flow)
1144                         break;
1145
1146                 if (ovs_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1147                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1148                                            OVS_FLOW_CMD_NEW) < 0)
1149                         break;
1150
1151                 cb->args[0] = bucket;
1152                 cb->args[1] = obj;
1153         }
1154         return skb->len;
1155 }
1156
1157 static struct genl_ops dp_flow_genl_ops[] = {
1158         { .cmd = OVS_FLOW_CMD_NEW,
1159           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1160           .policy = flow_policy,
1161           .doit = ovs_flow_cmd_new_or_set
1162         },
1163         { .cmd = OVS_FLOW_CMD_DEL,
1164           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1165           .policy = flow_policy,
1166           .doit = ovs_flow_cmd_del
1167         },
1168         { .cmd = OVS_FLOW_CMD_GET,
1169           .flags = 0,               /* OK for unprivileged users. */
1170           .policy = flow_policy,
1171           .doit = ovs_flow_cmd_get,
1172           .dumpit = ovs_flow_cmd_dump
1173         },
1174         { .cmd = OVS_FLOW_CMD_SET,
1175           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1176           .policy = flow_policy,
1177           .doit = ovs_flow_cmd_new_or_set,
1178         },
1179 };
1180
1181 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1182 #ifdef HAVE_NLA_NUL_STRING
1183         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1184 #endif
1185         [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1186         [OVS_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1187 };
1188
1189 static struct genl_family dp_datapath_genl_family = {
1190         .id = GENL_ID_GENERATE,
1191         .hdrsize = sizeof(struct ovs_header),
1192         .name = OVS_DATAPATH_FAMILY,
1193         .version = 1,
1194         .maxattr = OVS_DP_ATTR_MAX
1195 };
1196
1197 static struct genl_multicast_group dp_datapath_multicast_group = {
1198         .name = OVS_DATAPATH_MCGROUP
1199 };
1200
1201 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1202                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1203 {
1204         struct ovs_header *ovs_header;
1205         struct nlattr *nla;
1206         int err;
1207
1208         ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1209                                    flags, cmd);
1210         if (!ovs_header)
1211                 goto error;
1212
1213         ovs_header->dp_ifindex = dp->dp_ifindex;
1214
1215         rcu_read_lock();
1216         err = nla_put_string(skb, OVS_DP_ATTR_NAME, dp_name(dp));
1217         rcu_read_unlock();
1218         if (err)
1219                 goto nla_put_failure;
1220
1221         nla = nla_reserve(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats));
1222         if (!nla)
1223                 goto nla_put_failure;
1224         get_dp_stats(dp, nla_data(nla));
1225
1226         NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
1227                     dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
1228
1229         if (dp->sflow_probability)
1230                 NLA_PUT_U32(skb, OVS_DP_ATTR_SAMPLING, dp->sflow_probability);
1231
1232         nla = nla_nest_start(skb, OVS_DP_ATTR_MCGROUPS);
1233         if (!nla)
1234                 goto nla_put_failure;
1235         NLA_PUT_U32(skb, OVS_PACKET_CMD_MISS, packet_mc_group(dp, OVS_PACKET_CMD_MISS));
1236         NLA_PUT_U32(skb, OVS_PACKET_CMD_ACTION, packet_mc_group(dp, OVS_PACKET_CMD_ACTION));
1237         NLA_PUT_U32(skb, OVS_PACKET_CMD_SAMPLE, packet_mc_group(dp, OVS_PACKET_CMD_SAMPLE));
1238         nla_nest_end(skb, nla);
1239
1240         return genlmsg_end(skb, ovs_header);
1241
1242 nla_put_failure:
1243         genlmsg_cancel(skb, ovs_header);
1244 error:
1245         return -EMSGSIZE;
1246 }
1247
1248 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
1249                                              u32 seq, u8 cmd)
1250 {
1251         struct sk_buff *skb;
1252         int retval;
1253
1254         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1255         if (!skb)
1256                 return ERR_PTR(-ENOMEM);
1257
1258         retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1259         if (retval < 0) {
1260                 kfree_skb(skb);
1261                 return ERR_PTR(retval);
1262         }
1263         return skb;
1264 }
1265
1266 static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1267 {
1268         if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
1269                 u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
1270
1271                 if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP)
1272                         return -EINVAL;
1273         }
1274
1275         return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
1276 }
1277
1278 /* Called with genl_mutex and optionally with RTNL lock also. */
1279 static struct datapath *lookup_datapath(struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1280 {
1281         struct datapath *dp;
1282
1283         if (!a[OVS_DP_ATTR_NAME])
1284                 dp = get_dp(ovs_header->dp_ifindex);
1285         else {
1286                 struct vport *vport;
1287
1288                 rcu_read_lock();
1289                 vport = vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1290                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1291                 rcu_read_unlock();
1292         }
1293         return dp ? dp : ERR_PTR(-ENODEV);
1294 }
1295
1296 /* Called with genl_mutex. */
1297 static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1298 {
1299         if (a[OVS_DP_ATTR_IPV4_FRAGS])
1300                 dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
1301         if (a[OVS_DP_ATTR_SAMPLING])
1302                 dp->sflow_probability = nla_get_u32(a[OVS_DP_ATTR_SAMPLING]);
1303 }
1304
1305 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1306 {
1307         struct nlattr **a = info->attrs;
1308         struct vport_parms parms;
1309         struct sk_buff *reply;
1310         struct datapath *dp;
1311         struct vport *vport;
1312         int err;
1313
1314         err = -EINVAL;
1315         if (!a[OVS_DP_ATTR_NAME])
1316                 goto err;
1317
1318         err = ovs_dp_cmd_validate(a);
1319         if (err)
1320                 goto err;
1321
1322         rtnl_lock();
1323         err = -ENODEV;
1324         if (!try_module_get(THIS_MODULE))
1325                 goto err_unlock_rtnl;
1326
1327         err = -ENOMEM;
1328         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1329         if (dp == NULL)
1330                 goto err_put_module;
1331         INIT_LIST_HEAD(&dp->port_list);
1332
1333         /* Initialize kobject for bridge.  This will be added as
1334          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1335         dp->ifobj.kset = NULL;
1336         kobject_init(&dp->ifobj, &dp_ktype);
1337
1338         /* Allocate table. */
1339         err = -ENOMEM;
1340         rcu_assign_pointer(dp->table, flow_tbl_alloc(TBL_MIN_BUCKETS));
1341         if (!dp->table)
1342                 goto err_free_dp;
1343
1344         /* Set up our datapath device. */
1345         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1346         parms.type = OVS_VPORT_TYPE_INTERNAL;
1347         parms.options = NULL;
1348         parms.dp = dp;
1349         parms.port_no = OVSP_LOCAL;
1350         vport = new_vport(&parms);
1351         if (IS_ERR(vport)) {
1352                 err = PTR_ERR(vport);
1353                 if (err == -EBUSY)
1354                         err = -EEXIST;
1355
1356                 goto err_destroy_table;
1357         }
1358         dp->dp_ifindex = vport_get_ifindex(vport);
1359
1360         dp->drop_frags = 0;
1361         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1362         if (!dp->stats_percpu) {
1363                 err = -ENOMEM;
1364                 goto err_destroy_local_port;
1365         }
1366
1367         change_datapath(dp, a);
1368
1369         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1370         err = PTR_ERR(reply);
1371         if (IS_ERR(reply))
1372                 goto err_destroy_local_port;
1373
1374         list_add_tail(&dp->list_node, &dps);
1375         dp_sysfs_add_dp(dp);
1376
1377         rtnl_unlock();
1378
1379         genl_notify(reply, genl_info_net(info), info->snd_pid,
1380                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1381         return 0;
1382
1383 err_destroy_local_port:
1384         dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1385 err_destroy_table:
1386         flow_tbl_destroy(get_table_protected(dp));
1387 err_free_dp:
1388         kfree(dp);
1389 err_put_module:
1390         module_put(THIS_MODULE);
1391 err_unlock_rtnl:
1392         rtnl_unlock();
1393 err:
1394         return err;
1395 }
1396
1397 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1398 {
1399         struct vport *vport, *next_vport;
1400         struct sk_buff *reply;
1401         struct datapath *dp;
1402         int err;
1403
1404         err = ovs_dp_cmd_validate(info->attrs);
1405         if (err)
1406                 goto exit;
1407
1408         rtnl_lock();
1409         dp = lookup_datapath(info->userhdr, info->attrs);
1410         err = PTR_ERR(dp);
1411         if (IS_ERR(dp))
1412                 goto exit_unlock;
1413
1414         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_DEL);
1415         err = PTR_ERR(reply);
1416         if (IS_ERR(reply))
1417                 goto exit_unlock;
1418
1419         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1420                 if (vport->port_no != OVSP_LOCAL)
1421                         dp_detach_port(vport);
1422
1423         dp_sysfs_del_dp(dp);
1424         list_del(&dp->list_node);
1425         dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1426
1427         /* rtnl_unlock() will wait until all the references to devices that
1428          * are pending unregistration have been dropped.  We do it here to
1429          * ensure that any internal devices (which contain DP pointers) are
1430          * fully destroyed before freeing the datapath.
1431          */
1432         rtnl_unlock();
1433
1434         call_rcu(&dp->rcu, destroy_dp_rcu);
1435         module_put(THIS_MODULE);
1436
1437         genl_notify(reply, genl_info_net(info), info->snd_pid,
1438                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1439
1440         return 0;
1441
1442 exit_unlock:
1443         rtnl_unlock();
1444 exit:
1445         return err;
1446 }
1447
1448 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1449 {
1450         struct sk_buff *reply;
1451         struct datapath *dp;
1452         int err;
1453
1454         err = ovs_dp_cmd_validate(info->attrs);
1455         if (err)
1456                 return err;
1457
1458         dp = lookup_datapath(info->userhdr, info->attrs);
1459         if (IS_ERR(dp))
1460                 return PTR_ERR(dp);
1461
1462         change_datapath(dp, info->attrs);
1463
1464         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1465         if (IS_ERR(reply)) {
1466                 err = PTR_ERR(reply);
1467                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1468                                 dp_datapath_multicast_group.id, err);
1469                 return 0;
1470         }
1471
1472         genl_notify(reply, genl_info_net(info), info->snd_pid,
1473                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1474         return 0;
1475 }
1476
1477 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1478 {
1479         struct sk_buff *reply;
1480         struct datapath *dp;
1481         int err;
1482
1483         err = ovs_dp_cmd_validate(info->attrs);
1484         if (err)
1485                 return err;
1486
1487         dp = lookup_datapath(info->userhdr, info->attrs);
1488         if (IS_ERR(dp))
1489                 return PTR_ERR(dp);
1490
1491         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1492         if (IS_ERR(reply))
1493                 return PTR_ERR(reply);
1494
1495         return genlmsg_reply(reply, info);
1496 }
1497
1498 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1499 {
1500         struct datapath *dp;
1501         int skip = cb->args[0];
1502         int i = 0;
1503
1504         list_for_each_entry (dp, &dps, list_node) {
1505                 if (i < skip)
1506                         continue;
1507                 if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1508                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1509                                          OVS_DP_CMD_NEW) < 0)
1510                         break;
1511                 i++;
1512         }
1513
1514         cb->args[0] = i;
1515
1516         return skb->len;
1517 }
1518
1519 static struct genl_ops dp_datapath_genl_ops[] = {
1520         { .cmd = OVS_DP_CMD_NEW,
1521           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1522           .policy = datapath_policy,
1523           .doit = ovs_dp_cmd_new
1524         },
1525         { .cmd = OVS_DP_CMD_DEL,
1526           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1527           .policy = datapath_policy,
1528           .doit = ovs_dp_cmd_del
1529         },
1530         { .cmd = OVS_DP_CMD_GET,
1531           .flags = 0,               /* OK for unprivileged users. */
1532           .policy = datapath_policy,
1533           .doit = ovs_dp_cmd_get,
1534           .dumpit = ovs_dp_cmd_dump
1535         },
1536         { .cmd = OVS_DP_CMD_SET,
1537           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1538           .policy = datapath_policy,
1539           .doit = ovs_dp_cmd_set,
1540         },
1541 };
1542
1543 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1544 #ifdef HAVE_NLA_NUL_STRING
1545         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1546         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1547         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1548         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1549         [OVS_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1550 #else
1551         [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1552         [OVS_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1553 #endif
1554         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1555 };
1556
1557 static struct genl_family dp_vport_genl_family = {
1558         .id = GENL_ID_GENERATE,
1559         .hdrsize = sizeof(struct ovs_header),
1560         .name = OVS_VPORT_FAMILY,
1561         .version = 1,
1562         .maxattr = OVS_VPORT_ATTR_MAX
1563 };
1564
1565 struct genl_multicast_group dp_vport_multicast_group = {
1566         .name = OVS_VPORT_MCGROUP
1567 };
1568
1569 /* Called with RTNL lock or RCU read lock. */
1570 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1571                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1572 {
1573         struct ovs_header *ovs_header;
1574         struct nlattr *nla;
1575         int ifindex;
1576         int err;
1577
1578         ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1579                                  flags, cmd);
1580         if (!ovs_header)
1581                 return -EMSGSIZE;
1582
1583         ovs_header->dp_ifindex = vport->dp->dp_ifindex;
1584
1585         NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
1586         NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport_get_type(vport));
1587         NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport_get_name(vport));
1588
1589         nla = nla_reserve(skb, OVS_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1590         if (!nla)
1591                 goto nla_put_failure;
1592         if (vport_get_stats(vport, nla_data(nla)))
1593                 __skb_trim(skb, skb->len - nla->nla_len);
1594
1595         NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1596
1597         err = vport_get_options(vport, skb);
1598         if (err == -EMSGSIZE)
1599                 goto error;
1600
1601         ifindex = vport_get_ifindex(vport);
1602         if (ifindex > 0)
1603                 NLA_PUT_U32(skb, OVS_VPORT_ATTR_IFINDEX, ifindex);
1604
1605         return genlmsg_end(skb, ovs_header);
1606
1607 nla_put_failure:
1608         err = -EMSGSIZE;
1609 error:
1610         genlmsg_cancel(skb, ovs_header);
1611         return err;
1612 }
1613
1614 /* Called with RTNL lock or RCU read lock. */
1615 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1616                                          u32 seq, u8 cmd)
1617 {
1618         struct sk_buff *skb;
1619         int retval;
1620
1621         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1622         if (!skb)
1623                 return ERR_PTR(-ENOMEM);
1624
1625         retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1626         if (retval < 0) {
1627                 kfree_skb(skb);
1628                 return ERR_PTR(retval);
1629         }
1630         return skb;
1631 }
1632
1633 static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1634 {
1635         return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1636 }
1637
1638 /* Called with RTNL lock or RCU read lock. */
1639 static struct vport *lookup_vport(struct ovs_header *ovs_header,
1640                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1641 {
1642         struct datapath *dp;
1643         struct vport *vport;
1644
1645         if (a[OVS_VPORT_ATTR_NAME]) {
1646                 vport = vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
1647                 if (!vport)
1648                         return ERR_PTR(-ENODEV);
1649                 return vport;
1650         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1651                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1652
1653                 if (port_no >= DP_MAX_PORTS)
1654                         return ERR_PTR(-EFBIG);
1655
1656                 dp = get_dp(ovs_header->dp_ifindex);
1657                 if (!dp)
1658                         return ERR_PTR(-ENODEV);
1659
1660                 vport = get_vport_protected(dp, port_no);
1661                 if (!vport)
1662                         return ERR_PTR(-ENOENT);
1663                 return vport;
1664         } else
1665                 return ERR_PTR(-EINVAL);
1666 }
1667
1668 /* Called with RTNL lock. */
1669 static int change_vport(struct vport *vport, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1670 {
1671         int err = 0;
1672         if (a[OVS_VPORT_ATTR_STATS])
1673                 err = vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1674         if (!err && a[OVS_VPORT_ATTR_ADDRESS])
1675                 err = vport_set_addr(vport, nla_data(a[OVS_VPORT_ATTR_ADDRESS]));
1676         return err;
1677 }
1678
1679 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1680 {
1681         struct nlattr **a = info->attrs;
1682         struct ovs_header *ovs_header = info->userhdr;
1683         struct vport_parms parms;
1684         struct sk_buff *reply;
1685         struct vport *vport;
1686         struct datapath *dp;
1687         u32 port_no;
1688         int err;
1689
1690         err = -EINVAL;
1691         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE])
1692                 goto exit;
1693
1694         err = ovs_vport_cmd_validate(a);
1695         if (err)
1696                 goto exit;
1697
1698         rtnl_lock();
1699         dp = get_dp(ovs_header->dp_ifindex);
1700         err = -ENODEV;
1701         if (!dp)
1702                 goto exit_unlock;
1703
1704         if (a[OVS_VPORT_ATTR_PORT_NO]) {
1705                 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1706
1707                 err = -EFBIG;
1708                 if (port_no >= DP_MAX_PORTS)
1709                         goto exit_unlock;
1710
1711                 vport = get_vport_protected(dp, port_no);
1712                 err = -EBUSY;
1713                 if (vport)
1714                         goto exit_unlock;
1715         } else {
1716                 for (port_no = 1; ; port_no++) {
1717                         if (port_no >= DP_MAX_PORTS) {
1718                                 err = -EFBIG;
1719                                 goto exit_unlock;
1720                         }
1721                         vport = get_vport_protected(dp, port_no);
1722                         if (!vport)
1723                                 break;
1724                 }
1725         }
1726
1727         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1728         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1729         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1730         parms.dp = dp;
1731         parms.port_no = port_no;
1732
1733         vport = new_vport(&parms);
1734         err = PTR_ERR(vport);
1735         if (IS_ERR(vport))
1736                 goto exit_unlock;
1737
1738         set_internal_devs_mtu(dp);
1739         dp_sysfs_add_if(vport);
1740
1741         err = change_vport(vport, a);
1742         if (!err) {
1743                 reply = ovs_vport_cmd_build_info(vport, info->snd_pid,
1744                                                  info->snd_seq, OVS_VPORT_CMD_NEW);
1745                 if (IS_ERR(reply))
1746                         err = PTR_ERR(reply);
1747         }
1748         if (err) {
1749                 dp_detach_port(vport);
1750                 goto exit_unlock;
1751         }
1752         genl_notify(reply, genl_info_net(info), info->snd_pid,
1753                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1754
1755
1756 exit_unlock:
1757         rtnl_unlock();
1758 exit:
1759         return err;
1760 }
1761
1762 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1763 {
1764         struct nlattr **a = info->attrs;
1765         struct sk_buff *reply;
1766         struct vport *vport;
1767         int err;
1768
1769         err = ovs_vport_cmd_validate(a);
1770         if (err)
1771                 goto exit;
1772
1773         rtnl_lock();
1774         vport = lookup_vport(info->userhdr, a);
1775         err = PTR_ERR(vport);
1776         if (IS_ERR(vport))
1777                 goto exit_unlock;
1778
1779         err = 0;
1780         if (a[OVS_VPORT_ATTR_OPTIONS])
1781                 err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1782         if (!err)
1783                 err = change_vport(vport, a);
1784
1785         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1786                                          OVS_VPORT_CMD_NEW);
1787         if (IS_ERR(reply)) {
1788                 err = PTR_ERR(reply);
1789                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1790                                 dp_vport_multicast_group.id, err);
1791                 return 0;
1792         }
1793
1794         genl_notify(reply, genl_info_net(info), info->snd_pid,
1795                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1796
1797 exit_unlock:
1798         rtnl_unlock();
1799 exit:
1800         return err;
1801 }
1802
1803 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1804 {
1805         struct nlattr **a = info->attrs;
1806         struct sk_buff *reply;
1807         struct vport *vport;
1808         int err;
1809
1810         err = ovs_vport_cmd_validate(a);
1811         if (err)
1812                 goto exit;
1813
1814         rtnl_lock();
1815         vport = lookup_vport(info->userhdr, a);
1816         err = PTR_ERR(vport);
1817         if (IS_ERR(vport))
1818                 goto exit_unlock;
1819
1820         if (vport->port_no == OVSP_LOCAL) {
1821                 err = -EINVAL;
1822                 goto exit_unlock;
1823         }
1824
1825         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1826                                          OVS_VPORT_CMD_DEL);
1827         err = PTR_ERR(reply);
1828         if (IS_ERR(reply))
1829                 goto exit_unlock;
1830
1831         dp_detach_port(vport);
1832
1833         genl_notify(reply, genl_info_net(info), info->snd_pid,
1834                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1835
1836 exit_unlock:
1837         rtnl_unlock();
1838 exit:
1839         return err;
1840 }
1841
1842 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1843 {
1844         struct nlattr **a = info->attrs;
1845         struct ovs_header *ovs_header = info->userhdr;
1846         struct sk_buff *reply;
1847         struct vport *vport;
1848         int err;
1849
1850         err = ovs_vport_cmd_validate(a);
1851         if (err)
1852                 goto exit;
1853
1854         rcu_read_lock();
1855         vport = lookup_vport(ovs_header, a);
1856         err = PTR_ERR(vport);
1857         if (IS_ERR(vport))
1858                 goto exit_unlock;
1859
1860         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1861                                          OVS_VPORT_CMD_NEW);
1862         err = PTR_ERR(reply);
1863         if (IS_ERR(reply))
1864                 goto exit_unlock;
1865
1866         rcu_read_unlock();
1867
1868         return genlmsg_reply(reply, info);
1869
1870 exit_unlock:
1871         rcu_read_unlock();
1872 exit:
1873         return err;
1874 }
1875
1876 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1877 {
1878         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1879         struct datapath *dp;
1880         u32 port_no;
1881         int retval;
1882
1883         dp = get_dp(ovs_header->dp_ifindex);
1884         if (!dp)
1885                 return -ENODEV;
1886
1887         rcu_read_lock();
1888         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1889                 struct vport *vport;
1890
1891                 vport = get_vport_protected(dp, port_no);
1892                 if (!vport)
1893                         continue;
1894
1895                 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1896                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1897                                             OVS_VPORT_CMD_NEW) < 0)
1898                         break;
1899         }
1900         rcu_read_unlock();
1901
1902         cb->args[0] = port_no;
1903         retval = skb->len;
1904
1905         return retval;
1906 }
1907
1908 static struct genl_ops dp_vport_genl_ops[] = {
1909         { .cmd = OVS_VPORT_CMD_NEW,
1910           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1911           .policy = vport_policy,
1912           .doit = ovs_vport_cmd_new
1913         },
1914         { .cmd = OVS_VPORT_CMD_DEL,
1915           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1916           .policy = vport_policy,
1917           .doit = ovs_vport_cmd_del
1918         },
1919         { .cmd = OVS_VPORT_CMD_GET,
1920           .flags = 0,               /* OK for unprivileged users. */
1921           .policy = vport_policy,
1922           .doit = ovs_vport_cmd_get,
1923           .dumpit = ovs_vport_cmd_dump
1924         },
1925         { .cmd = OVS_VPORT_CMD_SET,
1926           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1927           .policy = vport_policy,
1928           .doit = ovs_vport_cmd_set,
1929         },
1930 };
1931
1932 struct genl_family_and_ops {
1933         struct genl_family *family;
1934         struct genl_ops *ops;
1935         int n_ops;
1936         struct genl_multicast_group *group;
1937 };
1938
1939 static const struct genl_family_and_ops dp_genl_families[] = {
1940         { &dp_datapath_genl_family,
1941           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1942           &dp_datapath_multicast_group },
1943         { &dp_vport_genl_family,
1944           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1945           &dp_vport_multicast_group },
1946         { &dp_flow_genl_family,
1947           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1948           &dp_flow_multicast_group },
1949         { &dp_packet_genl_family,
1950           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1951           NULL },
1952 };
1953
1954 static void dp_unregister_genl(int n_families)
1955 {
1956         int i;
1957
1958         for (i = 0; i < n_families; i++)
1959                 genl_unregister_family(dp_genl_families[i].family);
1960 }
1961
1962 static int dp_register_genl(void)
1963 {
1964         int n_registered;
1965         int err;
1966         int i;
1967
1968         n_registered = 0;
1969         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1970                 const struct genl_family_and_ops *f = &dp_genl_families[i];
1971
1972                 err = genl_register_family_with_ops(f->family, f->ops,
1973                                                     f->n_ops);
1974                 if (err)
1975                         goto error;
1976                 n_registered++;
1977
1978                 if (f->group) {
1979                         err = genl_register_mc_group(f->family, f->group);
1980                         if (err)
1981                                 goto error;
1982                 }
1983         }
1984
1985         err = packet_register_mc_groups();
1986         if (err)
1987                 goto error;
1988         return 0;
1989
1990 error:
1991         dp_unregister_genl(n_registered);
1992         return err;
1993 }
1994
1995 static int __init dp_init(void)
1996 {
1997         struct sk_buff *dummy_skb;
1998         int err;
1999
2000         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2001
2002         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2003
2004         err = tnl_init();
2005         if (err)
2006                 goto error;
2007
2008         err = flow_init();
2009         if (err)
2010                 goto error_tnl_exit;
2011
2012         err = vport_init();
2013         if (err)
2014                 goto error_flow_exit;
2015
2016         err = register_netdevice_notifier(&dp_device_notifier);
2017         if (err)
2018                 goto error_vport_exit;
2019
2020         err = dp_register_genl();
2021         if (err < 0)
2022                 goto error_unreg_notifier;
2023
2024         return 0;
2025
2026 error_unreg_notifier:
2027         unregister_netdevice_notifier(&dp_device_notifier);
2028 error_vport_exit:
2029         vport_exit();
2030 error_flow_exit:
2031         flow_exit();
2032 error_tnl_exit:
2033         tnl_exit();
2034 error:
2035         return err;
2036 }
2037
2038 static void dp_cleanup(void)
2039 {
2040         rcu_barrier();
2041         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2042         unregister_netdevice_notifier(&dp_device_notifier);
2043         vport_exit();
2044         flow_exit();
2045         tnl_exit();
2046 }
2047
2048 module_init(dp_init);
2049 module_exit(dp_cleanup);
2050
2051 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2052 MODULE_LICENSE("GPL");