From 4ab581b949f787d3bb37b887a3e80c9b3ab60230 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 20 Mar 2009 13:50:51 -0700 Subject: [PATCH] vswitch: Fix duplicated DPIDs observed under XenServer. The procedure for choosing a bridge MAC address will, in the most ordinary case, also choose a unique MAC that we can use as a datapath ID. In some special cases, though, multiple bridges will end up with the same MAC address. This is OK for the bridges, but it will confuse the OpenFlow controller, because each datapath needs a unique datapath ID, so this commit adds some support for two common special cases on XenServer where DPIDs were not unique. It is also very desirable that they be stable from one run to the next, so that policy set on a datapath "sticks". The special cases handled by this commit are: 1. A bridge whose MAC address is taken from a VLAN network device (that is, a network device created with vconfig(8) or similar tool) will have the same MAC address as a bridge on the VLAN device's physical network device. We handle this case by hashing the physical network device MAC along with the VLAN identifier. 2. A purely internal bridge, that is, one that has no non-virtual network devices on it at all, is more difficult because it has no natural unique identifier at all. When the host is a XenServer, we handle this case by asking "xe" for the internal bridge's plaintext name (e.g. "Internal Network 1") and hashing that together with the XenServer's host UUID. When the host is not a XenServer, we punt by using a random MAC address on each run. --- lib/vlog-modules.def | 1 + vswitchd/automake.mk | 4 +- vswitchd/bridge.c | 126 +++++++++++++++++++++++++++++++++++--- vswitchd/xe.c | 140 +++++++++++++++++++++++++++++++++++++++++++ vswitchd/xe.h | 32 ++++++++++ 5 files changed, 293 insertions(+), 10 deletions(-) create mode 100644 vswitchd/xe.c create mode 100644 vswitchd/xe.h diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def index 573da753..9ce1e822 100644 --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@ -53,6 +53,7 @@ VLOG_MODULE(vlog) VLOG_MODULE(vlog_socket) VLOG_MODULE(wcelim) VLOG_MODULE(vswitchd) +VLOG_MODULE(xe) #ifdef HAVE_EXT #include "ext/vlogext-modules.def" diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk index 7594b3eb..143d4fcc 100644 --- a/vswitchd/automake.mk +++ b/vswitchd/automake.mk @@ -9,7 +9,9 @@ vswitchd_vswitchd_SOURCES = \ vswitchd/bridge.h \ vswitchd/mgmt.c \ vswitchd/mgmt.h \ - vswitchd/vswitchd.c + vswitchd/vswitchd.c \ + vswitchd/xe.c \ + vswitchd/xe.h vswitchd_vswitchd_LDADD = \ secchan/libsecchan.a \ lib/libopenflow.a \ diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 0248c8dd..e202ceec 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -65,6 +65,7 @@ #include "util.h" #include "vconn.h" #include "vconn-ssl.h" +#include "xe.h" #include "xtoxll.h" #define THIS_MODULE VLM_bridge @@ -203,7 +204,12 @@ static void bridge_get_all_ifaces(const struct bridge *, struct svec *ifaces); static void bridge_fetch_dp_ifaces(struct bridge *); static void bridge_flush(struct bridge *); static void bridge_pick_local_hw_addr(struct bridge *, - uint8_t ea[ETH_ADDR_LEN]); + uint8_t ea[ETH_ADDR_LEN], + const char **devname); +static uint64_t bridge_pick_datapath_id(struct bridge *, + const uint8_t bridge_ea[ETH_ADDR_LEN], + const char *devname); +static uint64_t dpid_from_hash(const void *, size_t nbytes); static void bond_run(struct bridge *); static void bond_wait(struct bridge *); @@ -434,6 +440,7 @@ bridge_reconfigure(void) for (i = 0; i < br->n_ports; ) { struct port *port = br->ports[i]; struct iface *local_iface = NULL; + const char *devname; uint8_t ea[8]; uint64_t dpid; struct svec nf_hosts; @@ -459,8 +466,8 @@ bridge_reconfigure(void) continue; } - /* Pick local port hardware address. */ - bridge_pick_local_hw_addr(br, ea); + /* Pick local port hardware address, datapath ID. */ + bridge_pick_local_hw_addr(br, ea, &devname); if (local_iface) { int error = netdev_nodev_set_etheraddr(local_iface->name, ea); if (error) { @@ -472,11 +479,7 @@ bridge_reconfigure(void) } } - /* Pick datapath ID. */ - dpid = cfg_get_dpid(0, "bridge.%s.datapath-id", br->name); - if (!dpid) { - dpid = eth_addr_to_uint64(ea); - } + dpid = bridge_pick_datapath_id(br, ea, devname); ofproto_set_datapath_id(br->ofproto, dpid); /* Set NetFlow configuration on this bridge. */ @@ -502,12 +505,15 @@ bridge_reconfigure(void) } static void -bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN]) +bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], + const char **devname) { uint64_t requested_ea; size_t i, j; int error; + *devname = NULL; + /* Did the user request a particular MAC? */ requested_ea = cfg_get_mac(0, "bridge.%s.mac", br->name); if (requested_ea) { @@ -544,6 +550,7 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN]) !eth_addr_is_zero(iface_ea) && memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) { memcpy(ea, iface_ea, ETH_ADDR_LEN); + *devname = iface->name; } } else { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -554,6 +561,7 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN]) } if (eth_addr_is_multicast(ea) || eth_addr_is_vif(ea)) { memcpy(ea, br->default_ea, ETH_ADDR_LEN); + *devname = NULL; VLOG_WARN("bridge %s: using default bridge Ethernet " "address "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(ea)); } else { @@ -562,6 +570,106 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN]) } } +/* Choose and returns the datapath ID for bridge 'br' given that the bridge + * Ethernet address is 'bridge_ea'. If 'bridge_ea' is the Ethernet address of + * a network device, then that network device's name must be passed in as + * 'devname'; if 'bridge_ea' was derived some other way, then 'devname' must be + * passed in as a null pointer. */ +static uint64_t +bridge_pick_datapath_id(struct bridge *br, + const uint8_t bridge_ea[ETH_ADDR_LEN], + const char *devname) +{ + /* + * The procedure for choosing a bridge MAC address will, in the most + * ordinary case, also choose a unique MAC that we can use as a datapath + * ID. In some special cases, though, multiple bridges will end up with + * the same MAC address. This is OK for the bridges, but it will confuse + * the OpenFlow controller, because each datapath needs a unique datapath + * ID. + * + * Datapath IDs must be unique. It is also very desirable that they be + * stable from one run to the next, so that policy set on a datapath + * "sticks". + */ + uint64_t dpid; + + dpid = cfg_get_dpid(0, "bridge.%s.datapath-id", br->name); + if (dpid) { + return dpid; + } + + if (devname) { + int vlan; + if (!netdev_get_vlan_vid(devname, &vlan)) { + /* + * A bridge whose MAC address is taken from a VLAN network device + * (that is, a network device created with vconfig(8) or similar + * tool) will have the same MAC address as a bridge on the VLAN + * device's physical network device. + * + * Handle this case by hashing the physical network device MAC + * along with the VLAN identifier. + */ + uint8_t buf[ETH_ADDR_LEN + 2]; + memcpy(buf, bridge_ea, ETH_ADDR_LEN); + buf[ETH_ADDR_LEN] = vlan >> 8; + buf[ETH_ADDR_LEN + 1] = vlan; + return dpid_from_hash(buf, sizeof buf); + } else { + /* + * Assume that this bridge's MAC address is unique, since it + * doesn't fit any of the cases we handle specially. + */ + } + } else { + /* + * A purely internal bridge, that is, one that has no non-virtual + * network devices on it at all, is more difficult because it has no + * natural unique identifier at all. + * + * When the host is a XenServer, we handle this case by asking "xe" for + * the internal bridge's plaintext name (e.g. "Internal Network 1") and + * hashing that together with the XenServer's host UUID. + * + * When the host is not a XenServer, we punt by using a random MAC + * address on each run. + */ + char *host_uuid; + char *network_query, *network_name; + + host_uuid = xe_query_simple("host-list params=uuid"); + network_query = xasprintf("network-list 'bridge=%s' params=name-label", + br->name); + network_name = xe_query_simple(network_query); + free(network_query); + + if (host_uuid && network_name) { + char *combined = xasprintf("%s%s", host_uuid, network_name); + dpid = dpid_from_hash(combined, strlen(combined)); + free(combined); + free(host_uuid); + free(network_name); + return dpid; + } + free(host_uuid); + free(network_name); + } + + return eth_addr_to_uint64(bridge_ea); +} + +static uint64_t +dpid_from_hash(const void *data, size_t n) +{ + uint8_t hash[SHA1HashSize]; + + BUILD_ASSERT_DECL(sizeof hash >= ETH_ADDR_LEN); + SHA1Bytes(data, n, hash); + eth_addr_mark_random(hash); + return eth_addr_to_uint64(hash); +} + int bridge_run(void) { diff --git a/vswitchd/xe.c b/vswitchd/xe.c new file mode 100644 index 00000000..ca623417 --- /dev/null +++ b/vswitchd/xe.c @@ -0,0 +1,140 @@ +/* Copyright (c) 2009 Nicira Networks + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In addition, as a special exception, Nicira Networks gives permission + * to link the code of its release of vswitchd with the OpenSSL project's + * "OpenSSL" library (or with modified versions of it that use the same + * license as the "OpenSSL" library), and distribute the linked + * executables. You must obey the GNU General Public License in all + * respects for all of the code used other than "OpenSSL". If you modify + * this file, you may extend this exception to your version of the file, + * but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. + */ + +#include +#include "xe.h" +#include +#include +#include +#include +#include +#include "dynamic-string.h" +#include "process.h" + +#include "vlog.h" +#define THIS_MODULE VLM_xe + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +static const char *find_xe(void); + +char * +xe_query_simple(const char *query) +{ + const char *binary; + char *cmd = NULL; + FILE *stream = NULL; + struct ds line = DS_EMPTY_INITIALIZER; + char *answer = NULL; + const char *p; + size_t len; + int status; + + binary = find_xe(); + if (!binary) { + goto exit; + } + + cmd = xasprintf("%s %s", binary, query); + stream = popen(cmd, "r"); + if (!stream) { + VLOG_ERR_RL(&rl, "failed to start \"%s\": %s", cmd, strerror(errno)); + goto exit; + } + + if (ds_get_line(&line, stream)) { + if (ferror(stream)) { + VLOG_ERR_RL(&rl, "error reading output from \"%s\": %s", + cmd, strerror(errno)); + } else { + VLOG_ERR_RL(&rl, "end of file reading output from \"%s\"", cmd); + } + goto exit; + } + + p = strchr(ds_cstr(&line), ':'); + if (!p) { + VLOG_ERR_RL(&rl, "parse error reading output from \"%s\" " + "(\"%s\" does not contain a colon)", cmd, line.string); + goto exit; + } + + p++; + while (isspace((unsigned char) *p)) { + p++; + } + len = strlen(p); + while (len > 0 && isspace((unsigned char) p[len - 1])) { + len--; + } + answer = xmemdup0(p, len); + + status = pclose(stream); + stream = NULL; + if (status == -1) { + VLOG_WARN_RL(&rl, "pclose of \"%s\" command failed: %s", + cmd, strerror(errno)); + } else if (status) { + char *msg = process_status_msg(status); + VLOG_WARN_RL(&rl, "pclose(\"%s\") reported subprocess failure: %s", + cmd, msg); + free(msg); + } + +exit: + if (answer) { + VLOG_DBG("\"%s\" answered \"%s\"", query, answer); + } + free(cmd); + if (stream) { + pclose(stream); + } + ds_destroy(&line); + return answer; +} + +static const char * +find_xe(void) +{ + static char *xe_binary; + static bool searched = false; + if (!searched) { + searched = true; + if (getenv("XE")) { + xe_binary = getenv("XE"); + VLOG_INFO("using xe binary %s from XE", + xe_binary); + } else { + xe_binary = process_search_path("xe"); + if (xe_binary) { + VLOG_INFO("using xe binary %s (from PATH)", xe_binary); + } else { + VLOG_WARN("xe binary not found in PATH or XE"); + } + } + } + return xe_binary; +} diff --git a/vswitchd/xe.h b/vswitchd/xe.h new file mode 100644 index 00000000..deb6ede9 --- /dev/null +++ b/vswitchd/xe.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2009 Nicira Networks + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In addition, as a special exception, Nicira Networks gives permission + * to link the code of its release of vswitchd with the OpenSSL project's + * "OpenSSL" library (or with modified versions of it that use the same + * license as the "OpenSSL" library), and distribute the linked + * executables. You must obey the GNU General Public License in all + * respects for all of the code used other than "OpenSSL". If you modify + * this file, you may extend this exception to your version of the file, + * but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. + */ + +#ifndef VSWITCHD_XE_H +#define VSWITCHD_XE_H 1 + +char *xe_query_simple(const char *query); + +#endif /* xe.h */ -- 2.30.2