From db322751d8bba32152e89eea476cb47a010616b1 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 7 Aug 2009 15:11:40 -0700 Subject: [PATCH] brcompat: Move BRCTL_GET_BRIDGES, BRCTL_GET_PORT_LIST into userspace. The Citrix QA scripts assume that "brctl show" will show a topology as if the Linux bridge were still in use; that is, as if there were one bridge per VLAN and as if bonds were network devices of their own instead of separate devices. However, we were showing the datapath topology, i.e. all VLANs and bond devices lumped together into a single datapath. This commit fixes the VLAN end of the problem, by moving the implementation of the ioctls that brctl uses into userspace in ovs-brcompatd and putting the necessary translation logic into ovs-brcompatd. By itself, this commit does not fix the problem for bonds: the port name for a bond does not (normally) under Open vSwitch exist as an actual Linux network device, and thus it has no ifindex, and so ovs-brcompatd can't pass it back to the kernel to report to brctl. This fix will have to wait for another commit. Bug NIC-19. --- datapath/brcompat.c | 98 +++++++-------- include/openvswitch/brcompat-netlink.h | 7 +- vswitchd/ovs-brcompatd.c | 166 +++++++++++++++++++++++-- 3 files changed, 202 insertions(+), 69 deletions(-) diff --git a/datapath/brcompat.c b/datapath/brcompat.c index be361ece..10b94f73 100644 --- a/datapath/brcompat.c +++ b/datapath/brcompat.c @@ -45,36 +45,6 @@ static u32 brc_seq; /* Sequence number for current op. */ static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs); static int brc_send_simple_command(struct sk_buff *); -static int -get_dp_ifindices(int *indices, int num) -{ - int i, index = 0; - - rcu_read_lock(); - for (i=0; i < ODP_MAX && index < num; i++) { - struct datapath *dp = get_dp(i); - if (!dp) - continue; - indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex; - } - rcu_read_unlock(); - - return index; -} - -static void -get_port_ifindices(struct datapath *dp, int *ifindices, int num) -{ - struct net_bridge_port *p; - - rcu_read_lock(); - list_for_each_entry_rcu (p, &dp->port_list, node) { - if (p->port_no < num) - ifindices[p->port_no] = p->dev->ifindex; - } - rcu_read_unlock(); -} - static struct sk_buff * brc_make_request(int op, const char *bridge, const char *port) { @@ -83,7 +53,8 @@ brc_make_request(int op, const char *bridge, const char *port) goto error; genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op); - NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge); + if (bridge) + NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge); if (port) NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port); return skb; @@ -126,26 +97,57 @@ static int brc_add_del_bridge(char __user *uname, int add) return brc_send_simple_command(request); } -static int brc_get_bridges(int __user *uindices, int n) +static int brc_get_indices(int op, const char *br_name, + int __user *uindices, int n) { + struct nlattr *attrs[BRC_GENL_A_MAX + 1]; + struct sk_buff *request, *reply; int *indices; int ret; + int len; + if (n < 0) + return -EINVAL; if (n >= 2048) return -ENOMEM; - indices = kcalloc(n, sizeof(int), GFP_KERNEL); - if (indices == NULL) + request = brc_make_request(op, br_name, NULL); + if (!request) return -ENOMEM; - n = get_dp_ifindices(indices, n); + reply = brc_send_command(request, attrs); + ret = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit; + + ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]); + if (ret < 0) + goto exit_free_skb; + + ret = -EINVAL; + if (!attrs[BRC_GENL_A_IFINDEXES]) + goto exit_free_skb; + + len = nla_len(attrs[BRC_GENL_A_IFINDEXES]); + indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]); + if (len % sizeof(int)) + goto exit_free_skb; + n = min_t(int, n, len / sizeof(int)); ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n; - kfree(indices); +exit_free_skb: + kfree_skb(reply); +exit: return ret; } +/* Called with br_ioctl_mutex. */ +static int brc_get_bridges(int __user *uindices, int n) +{ + return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n); +} + /* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */ static int old_deviceless(void __user *uarg) @@ -238,26 +240,14 @@ brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub) static int brc_get_port_list(struct net_device *dev, int __user *uindices, int num) { - struct dp_dev *dp_dev = netdev_priv(dev); - struct datapath *dp = dp_dev->dp; - int *indices; - - if (num < 0) - return -EINVAL; - if (num == 0) - num = 256; - if (num > DP_MAX_PORTS) - num = DP_MAX_PORTS; + int retval; - indices = kcalloc(num, sizeof(int), GFP_KERNEL); - if (indices == NULL) - return -ENOMEM; + rtnl_unlock(); + retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name, + uindices, num); + rtnl_lock(); - get_port_ifindices(dp, indices, num); - if (copy_to_user(uindices, indices, num * sizeof(int))) - num = -EFAULT; - kfree(indices); - return num; + return retval; } /* diff --git a/include/openvswitch/brcompat-netlink.h b/include/openvswitch/brcompat-netlink.h index 694bdcc5..7d5ac4cf 100644 --- a/include/openvswitch/brcompat-netlink.h +++ b/include/openvswitch/brcompat-netlink.h @@ -47,8 +47,8 @@ enum { BRC_GENL_A_UNSPEC, /* - * "K:" messages appear in messages from the kernel to userspace. - * "U:" messages appear in messages from userspace to the kernel. + * "K:" attributes appear in messages from the kernel to userspace. + * "U:" attributes appear in messages from userspace to the kernel. */ /* BRC_GENL_C_DP_ADD, BRC_GENL_C_DP_DEL. */ @@ -75,6 +75,7 @@ enum { /* BRC_GENL_C_DP_RESULT. */ BRC_GENL_A_FDB_DATA, /* U: FDB records. */ + BRC_GENL_A_IFINDEXES, /* U: "int" ifindexes of bridges or ports. */ __BRC_GENL_A_MAX, BRC_GENL_A_MAX = __BRC_GENL_A_MAX - 1 @@ -96,6 +97,8 @@ enum brc_genl_command { BRC_GENL_C_QUERY_MC, /* U: Get multicast group for brcompat. */ BRC_GENL_C_SET_PROC, /* U: Set contents of file in /proc. */ BRC_GENL_C_FDB_QUERY, /* K: Read records from forwarding database. */ + BRC_GENL_C_GET_BRIDGES, /* K: Get ifindexes of all bridges. */ + BRC_GENL_C_GET_PORTS, /* K: Get ifindexes of all ports on a bridge. */ __BRC_GENL_C_MAX, BRC_GENL_C_MAX = __BRC_GENL_C_MAX - 1 diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 3b24bc9f..a510f166 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -240,21 +240,14 @@ rewrite_and_reload_config(void) return 0; } -/* Get all the interfaces for 'bridge' as 'ifaces', breaking bonded interfaces - * down into their constituent parts. - * - * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, - * then only interfaces for trunk ports or ports with implicit VLAN 0 are - * reported. If 'vlan' > 0, only interfaces with implict VLAN 'vlan' are - * reported. */ static void -get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan) +do_get_bridge_parts(const char *bridge, struct svec *parts, int vlan, + bool break_down_bonds) { struct svec ports; int i; svec_init(&ports); - svec_init(ifaces); cfg_get_all_keys(&ports, "bridge.%s.port", bridge); for (i = 0; i < ports.n; i++) { const char *port_name = ports.names[i]; @@ -267,19 +260,44 @@ get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan) continue; } } - if (cfg_has_section("bonding.%s", port_name)) { + if (break_down_bonds && cfg_has_section("bonding.%s", port_name)) { struct svec slaves; svec_init(&slaves); cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name); - svec_append(ifaces, &slaves); + svec_append(parts, &slaves); svec_destroy(&slaves); } else { - svec_add(ifaces, port_name); + svec_add(parts, port_name); } } svec_destroy(&ports); } +/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces + * down into their constituent parts. + * + * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, + * then only interfaces for trunk ports or ports with implicit VLAN 0 are + * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are + * reported. */ +static void +get_bridge_ifaces(const char *bridge, struct svec *ifaces, int vlan) +{ + do_get_bridge_parts(bridge, ifaces, vlan, true); +} + +/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under + * the bond name, not broken down into their constituent interfaces. + * + * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then + * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, + * only port with implicit VLAN 'vlan' are reported. */ +static void +get_bridge_ports(const char *bridge, struct svec *ports, int vlan) +{ + do_get_bridge_parts(bridge, ports, vlan, false); +} + /* Go through the configuration file and remove any ports that no longer * exist associated with a bridge. */ static void @@ -302,6 +320,7 @@ prune_ports(void) struct svec ifaces; /* Check that each bridge interface exists. */ + svec_init(&ifaces); get_bridge_ifaces(br_name, &ifaces, -1); for (j = 0; j < ifaces.n; j++) { const char *iface_name = ifaces.names[j]; @@ -345,7 +364,6 @@ prune_ports(void) svec_destroy(&delete); } - /* Checks whether a network device named 'name' exists and returns true if so, * false otherwise. * @@ -660,6 +678,7 @@ handle_fdb_query_cmd(struct ofpbuf *buffer) /* Fetch the MAC address for each interface on the bridge, so that we can * fill in the is_local field in the response. */ + svec_init(&ifaces); get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); local_macs = xmalloc(ifaces.n * sizeof *local_macs); n_local_macs = 0; @@ -740,6 +759,119 @@ handle_fdb_query_cmd(struct ofpbuf *buffer) return 0; } +static void +send_ifindex_reply(uint32_t seq, struct svec *ifaces) +{ + struct ofpbuf *reply; + const char *iface; + size_t n_indices; + int *indices; + size_t i; + + /* Make sure that any given interface only occurs once. This shouldn't + * happen, but who knows what people put into their configuration files. */ + svec_sort_unique(ifaces); + + /* Convert 'ifaces' into ifindexes. */ + n_indices = 0; + indices = xmalloc(ifaces->n * sizeof *indices); + SVEC_FOR_EACH (i, iface, ifaces) { + int ifindex = if_nametoindex(iface); + if (ifindex) { + indices[n_indices++] = ifindex; + } + } + + /* Compose and send reply. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES, + indices, n_indices * sizeof *indices); + send_reply(reply); + + /* Free memory. */ + free(indices); +} + +static int +handle_get_bridges_cmd(struct ofpbuf *buffer) +{ + struct svec bridges; + const char *br_name; + size_t i; + + uint32_t seq; + + int error; + + /* Parse Netlink command. + * + * The command doesn't actually have any arguments, but we need the + * sequence number to send the reply. */ + error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL); + if (error) { + return error; + } + + /* Get all the real bridges and all the fake ones. */ + cfg_read(); + cfg_get_subsections(&bridges, "bridge"); + SVEC_FOR_EACH (i, br_name, &bridges) { + const char *iface_name; + struct svec ifaces; + size_t j; + + svec_init(&ifaces); + get_bridge_ifaces(br_name, &ifaces, -1); + SVEC_FOR_EACH (j, iface_name, &ifaces) { + if (cfg_get_bool(0, "iface.%s.fake-bridge", iface_name)) { + svec_add(&bridges, iface_name); + } + } + svec_destroy(&ifaces); + } + + send_ifindex_reply(seq, &bridges); + svec_destroy(&bridges); + + return 0; +} + +static int +handle_get_ports_cmd(struct ofpbuf *buffer) +{ + uint32_t seq; + + const char *linux_bridge; + char *ovs_bridge; + int br_vlan; + + struct svec ports; + + int error; + + /* Parse Netlink command. */ + error = parse_command(buffer, &seq, &linux_bridge, NULL, NULL, NULL); + if (error) { + return error; + } + + cfg_read(); + error = linux_bridge_to_ovs_bridge(linux_bridge, &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; + } + + svec_init(&ports); + get_bridge_ports(ovs_bridge, &ports, br_vlan); + send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ + svec_destroy(&ports); + + free(ovs_bridge); + + return 0; +} + static int brc_recv_update(void) { @@ -802,6 +934,14 @@ brc_recv_update(void) retval = handle_fdb_query_cmd(buffer); break; + case BRC_GENL_C_GET_BRIDGES: + retval = handle_get_bridges_cmd(buffer); + break; + + case BRC_GENL_C_GET_PORTS: + retval = handle_get_ports_cmd(buffer); + break; + default: retval = EPROTO; } -- 2.30.2