2 * Copyright (c) 2007-2012 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/module.h>
22 #include <linux/kernel.h>
23 #include <linux/uaccess.h>
24 #include <linux/completion.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_bridge.h>
27 #include <linux/netdevice.h>
28 #include <linux/rtnetlink.h>
29 #include <net/genetlink.h>
31 #include "openvswitch/brcompat-netlink.h"
34 static struct genl_family brc_genl_family;
35 static struct genl_multicast_group brc_mc_group;
37 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
39 #define BRC_TIMEOUT (HZ * 5)
41 /* Mutex to serialize ovs-brcompatd callbacks. (Some callbacks naturally hold
42 * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
43 * ourselves and we don't want to hold the latter over a potentially long
45 static DEFINE_MUTEX(brc_serial);
47 /* Userspace communication. */
48 static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */
49 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
50 static struct sk_buff *brc_reply; /* Reply from userspace. */
51 static u32 brc_seq; /* Sequence number for current op. */
53 static struct sk_buff *brc_send_command(struct net *,
55 struct nlattr **attrs);
56 static int brc_send_simple_command(struct net *, struct sk_buff *);
58 static struct sk_buff *brc_make_request(int op, const char *bridge,
61 struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
65 genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
67 if (bridge && nla_put_string(skb, BRC_GENL_A_DP_NAME, bridge))
69 if (port && nla_put_string(skb, BRC_GENL_A_PORT_NAME, port))
80 static int brc_send_simple_command(struct net *net, struct sk_buff *request)
82 struct nlattr *attrs[BRC_GENL_A_MAX + 1];
83 struct sk_buff *reply;
86 reply = brc_send_command(net, request, attrs);
88 return PTR_ERR(reply);
90 error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
95 static int brc_add_del_bridge(struct net *net, char __user *uname, int add)
97 struct sk_buff *request;
100 if (!capable(CAP_NET_ADMIN))
103 if (copy_from_user(name, uname, IFNAMSIZ))
106 name[IFNAMSIZ - 1] = 0;
107 request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
112 return brc_send_simple_command(net, request);
115 static int brc_get_indices(struct net *net,
116 int op, const char *br_name,
117 int __user *uindices, int n)
119 struct nlattr *attrs[BRC_GENL_A_MAX + 1];
120 struct sk_buff *request, *reply;
130 request = brc_make_request(op, br_name, NULL);
134 reply = brc_send_command(net, request, attrs);
135 ret = PTR_ERR(reply);
139 ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
144 if (!attrs[BRC_GENL_A_IFINDEXES])
147 len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
148 indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
149 if (len % sizeof(int))
152 n = min_t(int, n, len / sizeof(int));
153 ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
161 /* Called with br_ioctl_mutex. */
162 static int brc_get_bridges(struct net *net, int __user *uindices, int n)
164 return brc_get_indices(net, BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
167 /* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */
168 static int old_deviceless(struct net *net, void __user *uarg)
170 unsigned long args[3];
172 if (copy_from_user(args, uarg, sizeof(args)))
176 case BRCTL_GET_BRIDGES:
177 return brc_get_bridges(net, (int __user *)args[1], args[2]);
179 case BRCTL_ADD_BRIDGE:
180 return brc_add_del_bridge(net, (void __user *)args[1], 1);
181 case BRCTL_DEL_BRIDGE:
182 return brc_add_del_bridge(net, (void __user *)args[1], 0);
188 /* Called with the br_ioctl_mutex. */
190 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
191 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
193 struct net *net = NULL;
195 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
201 return old_deviceless(net, uarg);
204 return brc_add_del_bridge(net, uarg, 1);
206 return brc_add_del_bridge(net, uarg, 0);
212 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
214 struct sk_buff *request;
215 struct net_device *port;
218 if (!capable(CAP_NET_ADMIN))
221 port = __dev_get_by_index(dev_net(dev), port_ifindex);
225 /* Save name of dev and port because there's a race between the
226 * rtnl_unlock() and the brc_send_simple_command(). */
227 request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
228 dev->name, port->name);
233 err = brc_send_simple_command(dev_net(dev), request);
239 static int brc_get_bridge_info(struct net_device *dev,
240 struct __bridge_info __user *ub)
242 struct __bridge_info b;
244 memset(&b, 0, sizeof(struct __bridge_info));
246 /* First two bytes are the priority, which we should skip. This comes
247 * from struct bridge_id in br_private.h, which is unavailable to us.
249 memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
252 if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
258 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
264 retval = brc_get_indices(dev_net(dev), BRC_GENL_C_GET_PORTS, dev->name,
272 * Format up to a page worth of forwarding table entries
273 * userbuf -- where to copy result
274 * maxnum -- maximum number of entries desired
275 * (limited to a page for sanity)
276 * offset -- number of records to skip
278 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
279 unsigned long maxnum, unsigned long offset)
281 struct nlattr *attrs[BRC_GENL_A_MAX + 1];
282 struct sk_buff *request, *reply;
286 /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
287 if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
288 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
290 request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
293 if (nla_put_u64(request, BRC_GENL_A_FDB_COUNT, maxnum) ||
294 nla_put_u64(request, BRC_GENL_A_FDB_SKIP, offset))
295 goto nla_put_failure;
298 reply = brc_send_command(dev_net(dev), request, attrs);
299 retval = PTR_ERR(reply);
303 retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
308 if (!attrs[BRC_GENL_A_FDB_DATA])
310 len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
311 if (len % sizeof(struct __fdb_entry) ||
312 len / sizeof(struct __fdb_entry) > maxnum)
315 retval = len / sizeof(struct __fdb_entry);
316 if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
330 /* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */
331 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
333 unsigned long args[4];
335 if (copy_from_user(args, rq->ifr_data, sizeof(args)))
340 return brc_add_del_port(dev, args[1], 1);
342 return brc_add_del_port(dev, args[1], 0);
344 case BRCTL_GET_BRIDGE_INFO:
345 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
347 case BRCTL_GET_PORT_LIST:
348 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
350 case BRCTL_GET_FDB_ENTRIES:
351 return brc_get_fdb_entries(dev, (void __user *)args[1],
358 /* Called with the rtnl_lock. */
359 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
365 err = old_dev_ioctl(dev, rq, cmd);
369 return brc_add_del_port(dev, rq->ifr_ifindex, 1);
371 return brc_add_del_port(dev, rq->ifr_ifindex, 0);
382 static struct genl_family brc_genl_family = {
383 .id = GENL_ID_GENERATE,
385 .name = BRC_GENL_FAMILY_NAME,
387 .maxattr = BRC_GENL_A_MAX,
391 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
394 struct sk_buff *ans_skb;
397 ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
401 data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
402 0, BRC_GENL_C_QUERY_MC);
407 if (nla_put_u32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id))
408 goto nla_put_failure;
410 genlmsg_end(ans_skb, data);
411 return genlmsg_reply(ans_skb, info);
419 /* Attribute policy: what each attribute may contain. */
420 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
421 [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
422 [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
425 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
427 unsigned long int flags;
430 if (!info->attrs[BRC_GENL_A_ERR_CODE])
433 skb = skb_clone(skb, GFP_KERNEL);
437 spin_lock_irqsave(&brc_lock, flags);
438 if (brc_seq == info->snd_seq) {
441 kfree_skb(brc_reply);
450 spin_unlock_irqrestore(&brc_lock, flags);
455 static struct genl_ops brc_genl_ops[] = {
456 { .cmd = BRC_GENL_C_QUERY_MC,
457 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
459 .doit = brc_genl_query,
461 { .cmd = BRC_GENL_C_DP_RESULT,
462 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
463 .policy = brc_genl_policy,
464 .doit = brc_genl_dp_result,
468 static struct sk_buff *brc_send_command(struct net *net,
469 struct sk_buff *request,
470 struct nlattr **attrs)
472 unsigned long int flags;
473 struct sk_buff *reply;
476 mutex_lock(&brc_serial);
478 /* Increment sequence number first, so that we ignore any replies
479 * to stale requests. */
480 spin_lock_irqsave(&brc_lock, flags);
481 nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
482 INIT_COMPLETION(brc_done);
483 spin_unlock_irqrestore(&brc_lock, flags);
485 nlmsg_end(request, nlmsg_hdr(request));
488 error = genlmsg_multicast_netns(net, request, 0,
489 brc_mc_group.id, GFP_KERNEL);
493 /* Wait for reply. */
495 if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
496 pr_warn("timed out waiting for userspace\n");
501 spin_lock_irqsave(&brc_lock, flags);
504 spin_unlock_irqrestore(&brc_lock, flags);
506 mutex_unlock(&brc_serial);
508 /* Re-parse message. Can't fail, since it parsed correctly once
510 error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
511 attrs, BRC_GENL_A_MAX, brc_genl_policy);
517 mutex_unlock(&brc_serial);
518 return ERR_PTR(error);
521 static int __init brc_init(void)
525 pr_info("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
527 /* Set the bridge ioctl handler */
528 brioctl_set(brc_ioctl_deviceless_stub);
530 /* Set the openvswitch device ioctl handler */
531 ovs_dp_ioctl_hook = brc_dev_ioctl;
533 /* Randomize the initial sequence number. This is not a security
534 * feature; it only helps avoid crossed wires between userspace and
535 * the kernel when the module is unloaded and reloaded. */
536 brc_seq = net_random();
538 /* Register generic netlink family to communicate changes to
540 err = genl_register_family_with_ops(&brc_genl_family,
541 brc_genl_ops, ARRAY_SIZE(brc_genl_ops));
545 strcpy(brc_mc_group.name, "brcompat");
546 err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
553 genl_unregister_family(&brc_genl_family);
555 pr_emerg("failed to install!\n");
559 static void brc_cleanup(void)
561 /* Unregister ioctl hooks */
562 ovs_dp_ioctl_hook = NULL;
565 genl_unregister_family(&brc_genl_family);
568 module_init(brc_init);
569 module_exit(brc_cleanup);
571 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
572 MODULE_AUTHOR("Nicira, Inc.");
573 MODULE_LICENSE("GPL");
575 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
577 * In kernels 2.6.36 and later, Open vSwitch can safely coexist with
578 * the Linux bridge module, but it does not make sense to load both bridge and
579 * brcompat, so this prevents it.
581 BRIDGE_MUTUAL_EXCLUSION;