X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=69c50901afbb4ac09e02b3bbdf114c3c4a2100c6;hb=0efaf4b50c99bfdc255a27bb55312744feb34cc6;hp=8d0a2ef6ec06c5101d7ae35f8d462f6f7bae43f1;hpb=2303f3b2fcbfdc8249dc2678dd3005890a81c72e;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 8d0a2ef6..69c50901 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -1,28 +1,16 @@ /* Copyright (c) 2008, 2009 Nicira Networks - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * In addition, as a special exception, Nicira Networks gives permission - * to link the code of its release of vswitchd with the OpenSSL project's - * "OpenSSL" library (or with modified versions of it that use the same - * license as the "OpenSSL" library), and distribute the linked - * executables. You must obey the GNU General Public License in all - * respects for all of the code used other than "OpenSSL". If you modify - * this file, you may extend this exception to your version of the file, - * but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ #include @@ -39,6 +27,7 @@ #include #include #include +#include #include #include "bitmap.h" #include "cfg.h" @@ -54,16 +43,18 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ofproto/ofproto.h" +#include "packets.h" #include "poll-loop.h" #include "port-array.h" #include "proc-net-compat.h" #include "process.h" -#include "secchan/ofproto.h" #include "socket-util.h" #include "stp.h" #include "svec.h" #include "timeval.h" #include "util.h" +#include "unixctl.h" #include "vconn.h" #include "vconn-ssl.h" #include "xenserver.h" @@ -167,7 +158,7 @@ struct bridge { struct ofproto *ofproto; /* OpenFlow switch. */ /* Kernel datapath information. */ - struct dpif dpif; /* Kernel datapath. */ + struct dpif *dpif; /* Datapath. */ struct port_array ifaces; /* Indexed by kernel datapath port number. */ /* Bridge ports. */ @@ -215,6 +206,9 @@ static uint64_t bridge_pick_datapath_id(struct bridge *, const char *devname); static uint64_t dpid_from_hash(const void *, size_t nbytes); +static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args); + +static void bond_init(void); static void bond_run(struct bridge *); static void bond_wait(struct bridge *); static void bond_rebalance_port(struct port *); @@ -224,6 +218,7 @@ static void port_create(struct bridge *, const char *name); static void port_reconfigure(struct port *); static void port_destroy(struct port *); static struct port *port_lookup(const struct bridge *, const char *name); +static struct iface *port_lookup_iface(const struct port *, const char *name); static struct port *port_from_dp_ifidx(const struct bridge *, uint16_t dp_ifidx); static void port_update_bond_compat(struct port *); @@ -266,8 +261,8 @@ bridge_get_ifaces(struct svec *svec) for (j = 0; j < port->n_ifaces; j++) { struct iface *iface = port->ifaces[j]; if (iface->dp_ifidx < 0) { - VLOG_ERR("%s interface not in dp%u, ignoring", - iface->name, dpif_id(&br->dpif)); + VLOG_ERR("%s interface not in datapath %s, ignoring", + iface->name, dpif_name(br->dpif)); } else { if (iface->dp_ifidx != ODPP_LOCAL) { svec_add(svec, iface->name); @@ -282,28 +277,37 @@ bridge_get_ifaces(struct svec *svec) void bridge_init(void) { - int retval; - int i; + struct svec dpif_names; + size_t i; - for (i = 0; i < DP_MAX; i++) { - struct dpif dpif; - char devname[16]; + unixctl_command_register("fdb/show", bridge_unixctl_fdb_show); - sprintf(devname, "dp%d", i); - retval = dpif_open(devname, &dpif); + dp_enumerate(&dpif_names); + for (i = 0; i < dpif_names.n; i++) { + const char *dpif_name = dpif_names.names[i]; + struct dpif *dpif; + int retval; + + retval = dpif_open(dpif_name, &dpif); if (!retval) { - char dpif_name[IF_NAMESIZE]; - if (dpif_get_name(&dpif, dpif_name, sizeof dpif_name) - || !cfg_has("bridge.%s.port", dpif_name)) { - dpif_delete(&dpif); + struct svec all_names; + size_t j; + + svec_init(&all_names); + dpif_get_all_names(dpif, &all_names); + for (j = 0; j < all_names.n; j++) { + if (cfg_has("bridge.%s.port", all_names.names[j])) { + goto found; + } } - dpif_close(&dpif); - } else if (retval != ENODEV) { - VLOG_ERR("failed to delete datapath dp%d: %s", - i, strerror(retval)); + dpif_delete(dpif); + found: + svec_destroy(&all_names); + dpif_close(dpif); } } + bond_init(); bridge_reconfigure(); } @@ -329,6 +333,7 @@ bridge_configure_ssl(void) static char *private_key_file; static char *certificate_file; static char *cacert_file; + struct stat s; if (config_string_change("ssl.private-key", &private_key_file)) { vconn_ssl_set_private_key_file(private_key_file); @@ -338,7 +343,13 @@ bridge_configure_ssl(void) vconn_ssl_set_certificate_file(certificate_file); } - if (config_string_change("ssl.ca-cert", &cacert_file)) { + /* We assume that even if the filename hasn't changed, if the CA cert + * file has been removed, that we want to move back into + * boot-strapping mode. This opens a small security hole, because + * the old certificate will still be trusted until vSwitch is + * restarted. We may want to address this in vconn's SSL library. */ + if (config_string_change("ssl.ca-cert", &cacert_file) + || (cacert_file && stat(cacert_file, &s) && errno == ENOENT)) { vconn_ssl_set_ca_cert_file(cacert_file, cfg_get_bool(0, "ssl.bootstrap-ca-cert")); } @@ -348,33 +359,19 @@ bridge_configure_ssl(void) void bridge_reconfigure(void) { - struct svec old_br, new_br, raw_new_br; + struct svec old_br, new_br; struct bridge *br, *next; size_t i, j; COVERAGE_INC(bridge_reconfigure); - /* Collect old bridges. */ + /* Collect old and new bridges. */ svec_init(&old_br); + svec_init(&new_br); LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { svec_add(&old_br, br->name); } - - /* Collect new bridges. */ - svec_init(&raw_new_br); - cfg_get_subsections(&raw_new_br, "bridge"); - svec_init(&new_br); - for (i = 0; i < raw_new_br.n; i++) { - const char *name = raw_new_br.names[i]; - if ((!strncmp(name, "dp", 2) && isdigit(name[2])) || - (!strncmp(name, "nl:", 3) && isdigit(name[3]))) { - VLOG_ERR("%s is not a valid bridge name (bridges may not be " - "named \"dp\" or \"nl:\" followed by a digit)", name); - } else { - svec_add(&new_br, name); - } - } - svec_destroy(&raw_new_br); + cfg_get_subsections(&new_br, "bridge"); /* Get rid of deleted bridges and add new bridges. */ svec_sort(&old_br); @@ -415,16 +412,17 @@ bridge_reconfigure(void) size_t n_dpif_ports; struct svec want_ifaces; - dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports); + dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); bridge_get_all_ifaces(br, &want_ifaces); for (i = 0; i < n_dpif_ports; i++) { const struct odp_port *p = &dpif_ports[i]; if (!svec_contains(&want_ifaces, p->devname) && strcmp(p->devname, br->name)) { - int retval = dpif_port_del(&br->dpif, p->port); + int retval = dpif_port_del(br->dpif, p->port); if (retval) { - VLOG_ERR("failed to remove %s interface from dp%u: %s", - p->devname, dpif_id(&br->dpif), strerror(retval)); + VLOG_ERR("failed to remove %s interface from %s: %s", + p->devname, dpif_name(br->dpif), + strerror(retval)); } } } @@ -435,9 +433,8 @@ bridge_reconfigure(void) struct odp_port *dpif_ports; size_t n_dpif_ports; struct svec cur_ifaces, want_ifaces, add_ifaces; - int next_port_no; - dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports); + dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); svec_init(&cur_ifaces); for (i = 0; i < n_dpif_ports; i++) { svec_add(&cur_ifaces, dpif_ports[i].devname); @@ -447,28 +444,20 @@ bridge_reconfigure(void) bridge_get_all_ifaces(br, &want_ifaces); svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL); - next_port_no = 1; for (i = 0; i < add_ifaces.n; i++) { const char *if_name = add_ifaces.names[i]; - for (;;) { - int internal = cfg_get_bool(0, "iface.%s.internal", if_name); - int error = dpif_port_add(&br->dpif, if_name, next_port_no++, - internal ? ODP_PORT_INTERNAL : 0); - if (error != EEXIST) { - if (next_port_no >= 256) { - VLOG_ERR("ran out of valid port numbers on dp%u", - dpif_id(&br->dpif)); - goto out; - } - if (error) { - VLOG_ERR("failed to add %s interface to dp%u: %s", - if_name, dpif_id(&br->dpif), strerror(error)); - } - break; - } + int internal = cfg_get_bool(0, "iface.%s.internal", if_name); + int flags = internal ? ODP_PORT_INTERNAL : 0; + int error = dpif_port_add(br->dpif, if_name, flags, NULL); + if (error == EXFULL) { + VLOG_ERR("ran out of valid port numbers on %s", + dpif_name(br->dpif)); + break; + } else if (error) { + VLOG_ERR("failed to add %s interface to %s: %s", + if_name, dpif_name(br->dpif), strerror(error)); } } - out: svec_destroy(&cur_ifaces); svec_destroy(&want_ifaces); svec_destroy(&add_ifaces); @@ -478,8 +467,7 @@ bridge_reconfigure(void) uint64_t dpid; struct iface *local_iface = NULL; const char *devname; - uint8_t engine_type = br->dpif.minor; - uint8_t engine_id = br->dpif.minor; + uint8_t engine_type, engine_id; bool add_id_to_iface = false; struct svec nf_hosts; @@ -490,15 +478,16 @@ bridge_reconfigure(void) for (j = 0; j < port->n_ifaces; ) { struct iface *iface = port->ifaces[j]; if (iface->dp_ifidx < 0) { - VLOG_ERR("%s interface not in dp%u, dropping", - iface->name, dpif_id(&br->dpif)); + VLOG_ERR("%s interface not in %s, dropping", + iface->name, dpif_name(br->dpif)); iface_destroy(iface); } else { if (iface->dp_ifidx == ODPP_LOCAL) { local_iface = iface; } - VLOG_DBG("dp%u has interface %s on port %d", - dpif_id(&br->dpif), iface->name, iface->dp_ifidx); + VLOG_DBG("%s has interface %s on port %d", + dpif_name(br->dpif), + iface->name, iface->dp_ifidx); j++; } } @@ -526,6 +515,7 @@ bridge_reconfigure(void) ofproto_set_datapath_id(br->ofproto, dpid); /* Set NetFlow configuration on this bridge. */ + dpif_get_netflow_ids(br->dpif, &engine_type, &engine_id); if (cfg_has("netflow.%s.engine-type", br->name)) { engine_type = cfg_get_int(0, "netflow.%s.engine-type", br->name); @@ -726,10 +716,10 @@ bridge_pick_datapath_id(struct bridge *br, static uint64_t dpid_from_hash(const void *data, size_t n) { - uint8_t hash[SHA1HashSize]; + uint8_t hash[SHA1_DIGEST_SIZE]; BUILD_ASSERT_DECL(sizeof hash >= ETH_ADDR_LEN); - SHA1Bytes(data, n, hash); + sha1_bytes(data, n, hash); eth_addr_mark_random(hash); return eth_addr_to_uint64(hash); } @@ -786,6 +776,35 @@ bridge_flush(struct bridge *br) } } +/* Bridge unixctl user interface functions. */ +static void +bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct bridge *br; + + br = bridge_lookup(args); + if (!br) { + unixctl_command_reply(conn, 501, "no such bridge"); + return; + } + + ds_put_cstr(&ds, " port VLAN MAC Age\n"); + if (br->ml) { + const struct mac_entry *e; + LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) { + if (e->port < 0 || e->port >= br->n_ports) { + continue; + } + ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n", + br->ports[e->port]->ifaces[0]->dp_ifidx, + e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e)); + } + } + unixctl_command_reply(conn, 200, ds_cstr(&ds)); + ds_destroy(&ds); +} + /* Bridge reconfiguration functions. */ static struct bridge * @@ -798,7 +817,7 @@ bridge_create(const char *name) br = xcalloc(1, sizeof *br); error = dpif_create(name, &br->dpif); - if (error == EEXIST) { + if (error == EEXIST || error == EBUSY) { error = dpif_open(name, &br->dpif); if (error) { VLOG_ERR("datapath %s already exists but cannot be opened: %s", @@ -806,7 +825,7 @@ bridge_create(const char *name) free(br); return NULL; } - dpif_flow_flush(&br->dpif); + dpif_flow_flush(br->dpif); } else if (error) { VLOG_ERR("failed to create datapath %s: %s", name, strerror(error)); free(br); @@ -816,8 +835,8 @@ bridge_create(const char *name) error = ofproto_create(name, &bridge_ofhooks, br, &br->ofproto); if (error) { VLOG_ERR("failed to create switch %s: %s", name, strerror(error)); - dpif_delete(&br->dpif); - dpif_close(&br->dpif); + dpif_delete(br->dpif); + dpif_close(br->dpif); free(br); return NULL; } @@ -834,7 +853,7 @@ bridge_create(const char *name) list_push_back(&all_bridges, &br->node); - VLOG_INFO("created bridge %s on dp%u", br->name, dpif_id(&br->dpif)); + VLOG_INFO("created bridge %s on %s", br->name, dpif_name(br->dpif)); return br; } @@ -849,12 +868,12 @@ bridge_destroy(struct bridge *br) port_destroy(br->ports[br->n_ports - 1]); } list_remove(&br->node); - error = dpif_delete(&br->dpif); + error = dpif_delete(br->dpif); if (error && error != ENOENT) { - VLOG_ERR("failed to delete dp%u: %s", - dpif_id(&br->dpif), strerror(error)); + VLOG_ERR("failed to delete %s: %s", + dpif_name(br->dpif), strerror(error)); } - dpif_close(&br->dpif); + dpif_close(br->dpif); ofproto_destroy(br->ofproto); free(br->controller); mac_learning_destroy(br->ml); @@ -945,9 +964,16 @@ bridge_reconfigure_one(struct bridge *br) svec_init(&new_ports); cfg_get_all_keys(&new_ports, "bridge.%s.port", br->name); svec_sort(&new_ports); - if (bridge_get_controller(br) && !svec_contains(&new_ports, br->name)) { - svec_add(&new_ports, br->name); - svec_sort(&new_ports); + if (bridge_get_controller(br)) { + char local_name[IF_NAMESIZE]; + int error; + + error = dpif_port_get_name(br->dpif, ODPP_LOCAL, + local_name, sizeof local_name); + if (!error && !svec_contains(&new_ports, local_name)) { + svec_add(&new_ports, local_name); + svec_sort(&new_ports); + } } if (!svec_is_unique(&new_ports)) { VLOG_WARN("bridge %s: %s specified twice as bridge port", @@ -1074,11 +1100,17 @@ bridge_reconfigure_controller(struct bridge *br) int rate_limit, burst_limit; if (!strcmp(controller, "discover")) { + bool update_resolv_conf = true; + + if (cfg_has("%s.update-resolv.conf", pfx)) { + update_resolv_conf = cfg_get_bool(0, "%s.update-resolv.conf", + pfx); + } ofproto_set_discovery(br->ofproto, true, cfg_get_string(0, "%s.accept-regex", pfx), - cfg_get_bool(0, "%s.update-resolv.conf", - pfx)); + update_resolv_conf); } else { + char local_name[IF_NAMESIZE]; struct netdev *netdev; bool in_band; int error; @@ -1089,7 +1121,11 @@ bridge_reconfigure_controller(struct bridge *br) ofproto_set_discovery(br->ofproto, false, NULL, NULL); ofproto_set_in_band(br->ofproto, in_band); - error = netdev_open(br->name, NETDEV_ETH_TYPE_NONE, &netdev); + error = dpif_port_get_name(br->dpif, ODPP_LOCAL, + local_name, sizeof local_name); + if (!error) { + error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev); + } if (!error) { if (cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) { struct in_addr ip, mask, gateway; @@ -1109,7 +1145,7 @@ bridge_reconfigure_controller(struct bridge *br) } if (gateway.s_addr) { - if (!netdev_add_router(gateway)) { + if (!netdev_add_router(netdev, gateway)) { VLOG_INFO("bridge %s: configured gateway "IP_FMT, br->name, IP_ARGS(&gateway.s_addr)); } @@ -1129,8 +1165,13 @@ bridge_reconfigure_controller(struct bridge *br) || !strcmp(fail_mode, "open"))); probe = cfg_get_int(0, "%s.inactivity-probe", pfx); - ofproto_set_probe_interval(br->ofproto, - probe ? probe : cfg_get_int(0, "mgmt.inactivity-probe")); + if (probe < 5) { + probe = cfg_get_int(0, "mgmt.inactivity-probe"); + if (probe < 5) { + probe = 15; + } + } + ofproto_set_probe_interval(br->ofproto, probe); max_backoff = cfg_get_int(0, "%s.max-backoff", pfx); if (!max_backoff) { @@ -1237,17 +1278,17 @@ bridge_fetch_dp_ifaces(struct bridge *br) } port_array_clear(&br->ifaces); - dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports); + dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); for (i = 0; i < n_dpif_ports; i++) { struct odp_port *p = &dpif_ports[i]; struct iface *iface = iface_lookup(br, p->devname); if (iface) { if (iface->dp_ifidx >= 0) { - VLOG_WARN("dp%u reported interface %s twice", - dpif_id(&br->dpif), p->devname); + VLOG_WARN("%s reported interface %s twice", + dpif_name(br->dpif), p->devname); } else if (iface_from_dp_ifidx(br, p->port)) { - VLOG_WARN("dp%u reported interface %"PRIu16" twice", - dpif_id(&br->dpif), p->port); + VLOG_WARN("%s reported interface %"PRIu16" twice", + dpif_name(br->dpif), p->port); } else { port_array_set(&br->ifaces, p->port, iface); iface->dp_ifidx = p->port; @@ -1259,11 +1300,16 @@ bridge_fetch_dp_ifaces(struct bridge *br) /* Bridge packet processing functions. */ +static int +bond_hash(const uint8_t mac[ETH_ADDR_LEN]) +{ + return hash_bytes(mac, ETH_ADDR_LEN, 0) & BOND_MASK; +} + static struct bond_entry * lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN]) { - size_t h = hash_bytes(mac, ETH_ADDR_LEN, 0); - return &port->bond_hash[h & BOND_MASK]; + return &port->bond_hash[bond_hash(mac)]; } static int @@ -1324,6 +1370,10 @@ bond_link_status_update(struct iface *iface, bool carrier) iface->delay_expires = LLONG_MAX; VLOG_INFO_RL(&rl, "interface %s: will not be %s", iface->name, carrier ? "disabled" : "enabled"); + } else if (carrier && port->updelay && port->active_iface < 0) { + iface->delay_expires = time_msec(); + VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no " + "other interface is up", iface->name, port->updelay); } else { int delay = carrier ? port->updelay : port->downdelay; iface->delay_expires = time_msec() + delay; @@ -1354,6 +1404,38 @@ bond_choose_active_iface(struct port *port) } } +static void +bond_enable_slave(struct iface *iface, bool enable) +{ + struct port *port = iface->port; + struct bridge *br = port->bridge; + + iface->delay_expires = LLONG_MAX; + if (enable == iface->enabled) { + return; + } + + iface->enabled = enable; + if (!iface->enabled) { + VLOG_WARN("interface %s: disabled", iface->name); + ofproto_revalidate(br->ofproto, iface->tag); + if (iface->port_ifidx == port->active_iface) { + ofproto_revalidate(br->ofproto, + port->active_iface_tag); + bond_choose_active_iface(port); + } + bond_send_learning_packets(port); + } else { + VLOG_WARN("interface %s: enabled", iface->name); + if (port->active_iface < 0) { + ofproto_revalidate(br->ofproto, port->no_ifaces_tag); + bond_choose_active_iface(port); + bond_send_learning_packets(port); + } + iface->tag = tag_create_random(); + } +} + static void bond_run(struct bridge *br) { @@ -1367,27 +1449,7 @@ bond_run(struct bridge *br) for (j = 0; j < port->n_ifaces; j++) { struct iface *iface = port->ifaces[j]; if (time_msec() >= iface->delay_expires) { - iface->delay_expires = LLONG_MAX; - iface->enabled = !iface->enabled; - VLOG_WARN("interface %s: %s", - iface->name, - iface->enabled ? "enabled" : "disabled"); - if (!iface->enabled) { - ofproto_revalidate(br->ofproto, iface->tag); - if (iface->port_ifidx == port->active_iface) { - ofproto_revalidate(br->ofproto, - port->active_iface_tag); - bond_choose_active_iface(port); - } - bond_send_learning_packets(port); - } else { - if (port->active_iface < 0) { - ofproto_revalidate(br->ofproto, port->no_ifaces_tag); - bond_choose_active_iface(port); - bond_send_learning_packets(port); - } - iface->tag = tag_create_random(); - } + bond_enable_slave(iface, !iface->enabled); } } } @@ -1721,12 +1783,32 @@ process_flow(struct bridge *br, const flow_t *flow, goto done; } - /* Drop multicast and broadcast packets on inactive bonded interfaces, to + /* Multicast (and broadcast) packets on bonds need special attention, to * avoid receiving duplicates. */ if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) { *tags |= in_port->active_iface_tag; if (in_port->active_iface != in_iface->port_ifidx) { + /* Drop all multicast packets on inactive slaves. */ goto done; + } else { + /* Drop all multicast packets for which we have learned a different + * input port, because we probably sent the packet on one slaves + * and got it back on the active slave. Broadcast ARP replies are + * an exception to this rule: the host has moved to another + * switch. */ + int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + if (src_idx != -1 && src_idx != in_port->port_idx) { + if (packet) { + if (!is_bcast_arp_reply(flow, packet)) { + goto done; + } + } else { + /* No way to know whether it's an ARP reply, because the + * flow entry doesn't include enough information and we + * don't have a packet. Punt. */ + return false; + } + } } } @@ -1734,27 +1816,9 @@ process_flow(struct bridge *br, const flow_t *flow, out_port = FLOOD_PORT; if (br->ml) { int out_port_idx; - bool may_learn; - - if (!packet) { - /* Don't try to learn from revalidation. */ - may_learn = false; - } else if (in_port->n_ifaces > 1) { - /* If the packet arrived on a bonded port, don't learn from it - * unless we haven't learned any port at all for that address - * (because we probably sent the packet on one bonded interface and - * got it back on the other). Broadcast ARP replies are an - * exception to this rule: the host has moved to another switch. */ - int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); - may_learn = (src_idx < 0 - || src_idx == in_port->port_idx - || is_bcast_arp_reply(flow, packet)); - } else { - may_learn = true; - } - /* Learn source MAC. */ - if (may_learn) { + /* Learn source MAC (but don't try to learn from revalidation). */ + if (packet) { tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx); if (rev_tag) { @@ -2062,7 +2126,6 @@ bond_shift_load(struct slave_balance *from, struct slave_balance *to, ofproto_revalidate(port->bridge->ofproto, hash->iface_tag); hash->iface_idx = to->iface->port_ifidx; hash->iface_tag = tag_create_random(); - } static void @@ -2255,6 +2318,286 @@ bond_send_learning_packets(struct port *port) } } +/* Bonding unixctl user interface functions. */ + +static void +bond_unixctl_list(struct unixctl_conn *conn, const char *args UNUSED) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct bridge *br; + + ds_put_cstr(&ds, "bridge\tbond\tslaves\n"); + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + size_t i; + + for (i = 0; i < br->n_ports; i++) { + const struct port *port = br->ports[i]; + if (port->n_ifaces > 1) { + size_t j; + + ds_put_format(&ds, "%s\t%s\t", br->name, port->name); + for (j = 0; j < port->n_ifaces; j++) { + const struct iface *iface = port->ifaces[j]; + if (j) { + ds_put_cstr(&ds, ", "); + } + ds_put_cstr(&ds, iface->name); + } + ds_put_char(&ds, '\n'); + } + } + } + unixctl_command_reply(conn, 200, ds_cstr(&ds)); + ds_destroy(&ds); +} + +static struct port * +bond_find(const char *name) +{ + const struct bridge *br; + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + size_t i; + + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + if (!strcmp(port->name, name) && port->n_ifaces > 1) { + return port; + } + } + } + return NULL; +} + +static void +bond_unixctl_show(struct unixctl_conn *conn, const char *args) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct port *port; + size_t j; + + port = bond_find(args); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + ds_put_format(&ds, "updelay: %d ms\n", port->updelay); + ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay); + ds_put_format(&ds, "next rebalance: %lld ms\n", + port->bridge->bond_next_rebalance - time_msec()); + for (j = 0; j < port->n_ifaces; j++) { + const struct iface *iface = port->ifaces[j]; + struct bond_entry *be; + + /* Basic info. */ + ds_put_format(&ds, "slave %s: %s\n", + iface->name, iface->enabled ? "enabled" : "disabled"); + if (j == port->active_iface) { + ds_put_cstr(&ds, "\tactive slave\n"); + } + if (iface->delay_expires != LLONG_MAX) { + ds_put_format(&ds, "\t%s expires in %lld ms\n", + iface->enabled ? "downdelay" : "updelay", + iface->delay_expires - time_msec()); + } + + /* Hashes. */ + for (be = port->bond_hash; be <= &port->bond_hash[BOND_MASK]; be++) { + int hash = be - port->bond_hash; + struct mac_entry *me; + + if (be->iface_idx != j) { + continue; + } + + ds_put_format(&ds, "\thash %d: %lld kB load\n", + hash, be->tx_bytes / 1024); + + /* MACs. */ + if (!port->bridge->ml) { + break; + } + + LIST_FOR_EACH (me, struct mac_entry, lru_node, + &port->bridge->ml->lrus) { + uint16_t dp_ifidx; + tag_type tags = 0; + if (bond_hash(me->mac) == hash + && me->port != port->port_idx + && choose_output_iface(port, me->mac, &dp_ifidx, &tags) + && dp_ifidx == iface->dp_ifidx) + { + ds_put_format(&ds, "\t\t"ETH_ADDR_FMT"\n", + ETH_ADDR_ARGS(me->mac)); + } + } + } + } + unixctl_command_reply(conn, 200, ds_cstr(&ds)); + ds_destroy(&ds); +} + +static void +bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) +{ + char *args = (char *) args_; + char *save_ptr = NULL; + char *bond_s, *hash_s, *slave_s; + uint8_t mac[ETH_ADDR_LEN]; + struct port *port; + struct iface *iface; + struct bond_entry *entry; + int hash; + + bond_s = strtok_r(args, " ", &save_ptr); + hash_s = strtok_r(NULL, " ", &save_ptr); + slave_s = strtok_r(NULL, " ", &save_ptr); + if (!slave_s) { + unixctl_command_reply(conn, 501, + "usage: bond/migrate BOND HASH SLAVE"); + return; + } + + port = bond_find(bond_s); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + if (sscanf(hash_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) + == ETH_ADDR_SCAN_COUNT) { + hash = bond_hash(mac); + } else if (strspn(hash_s, "0123456789") == strlen(hash_s)) { + hash = atoi(hash_s) & BOND_MASK; + } else { + unixctl_command_reply(conn, 501, "bad hash"); + return; + } + + iface = port_lookup_iface(port, slave_s); + if (!iface) { + unixctl_command_reply(conn, 501, "no such slave"); + return; + } + + if (!iface->enabled) { + unixctl_command_reply(conn, 501, "cannot migrate to disabled slave"); + return; + } + + entry = &port->bond_hash[hash]; + ofproto_revalidate(port->bridge->ofproto, entry->iface_tag); + entry->iface_idx = iface->port_ifidx; + entry->iface_tag = tag_create_random(); + unixctl_command_reply(conn, 200, "migrated"); +} + +static void +bond_unixctl_set_active_slave(struct unixctl_conn *conn, const char *args_) +{ + char *args = (char *) args_; + char *save_ptr = NULL; + char *bond_s, *slave_s; + struct port *port; + struct iface *iface; + + bond_s = strtok_r(args, " ", &save_ptr); + slave_s = strtok_r(NULL, " ", &save_ptr); + if (!slave_s) { + unixctl_command_reply(conn, 501, + "usage: bond/set-active-slave BOND SLAVE"); + return; + } + + port = bond_find(bond_s); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + iface = port_lookup_iface(port, slave_s); + if (!iface) { + unixctl_command_reply(conn, 501, "no such slave"); + return; + } + + if (!iface->enabled) { + unixctl_command_reply(conn, 501, "cannot make disabled slave active"); + return; + } + + if (port->active_iface != iface->port_ifidx) { + ofproto_revalidate(port->bridge->ofproto, port->active_iface_tag); + port->active_iface = iface->port_ifidx; + port->active_iface_tag = tag_create_random(); + VLOG_INFO("port %s: active interface is now %s", + port->name, iface->name); + bond_send_learning_packets(port); + unixctl_command_reply(conn, 200, "done"); + } else { + unixctl_command_reply(conn, 200, "no change"); + } +} + +static void +enable_slave(struct unixctl_conn *conn, const char *args_, bool enable) +{ + char *args = (char *) args_; + char *save_ptr = NULL; + char *bond_s, *slave_s; + struct port *port; + struct iface *iface; + + bond_s = strtok_r(args, " ", &save_ptr); + slave_s = strtok_r(NULL, " ", &save_ptr); + if (!slave_s) { + unixctl_command_reply(conn, 501, + "usage: bond/enable/disable-slave BOND SLAVE"); + return; + } + + port = bond_find(bond_s); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + iface = port_lookup_iface(port, slave_s); + if (!iface) { + unixctl_command_reply(conn, 501, "no such slave"); + return; + } + + bond_enable_slave(iface, enable); + unixctl_command_reply(conn, 501, enable ? "enabled" : "disabled"); +} + +static void +bond_unixctl_enable_slave(struct unixctl_conn *conn, const char *args) +{ + enable_slave(conn, args, true); +} + +static void +bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args) +{ + enable_slave(conn, args, false); +} + +static void +bond_init(void) +{ + unixctl_command_register("bond/list", bond_unixctl_list); + unixctl_command_register("bond/show", bond_unixctl_show); + unixctl_command_register("bond/migrate", bond_unixctl_migrate); + unixctl_command_register("bond/set-active-slave", + bond_unixctl_set_active_slave); + unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave); + unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave); +} + /* Port functions. */ static void @@ -2456,6 +2799,20 @@ port_lookup(const struct bridge *br, const char *name) return NULL; } +static struct iface * +port_lookup_iface(const struct port *port, const char *name) +{ + size_t j; + + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + if (!strcmp(iface->name, name)) { + return iface; + } + } + return NULL; +} + static void port_update_bonding(struct port *port) {