X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=ac993bfce05c0036ca5d7ddba761769374630787;hb=d0c9d304ffe9282c1f75d44c86e471d614af632c;hp=da80eed7e51cbc15c559673c4ded18bedba62c07;hpb=f7ef6533d8e48565f21a67021e8477b6f5a59ae3;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index da80eed7..ac993bfc 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -43,12 +43,12 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ofproto/ofproto.h" #include "packets.h" #include "poll-loop.h" #include "port-array.h" #include "proc-net-compat.h" #include "process.h" -#include "secchan/ofproto.h" #include "socket-util.h" #include "stp.h" #include "svec.h" @@ -71,17 +71,18 @@ struct dst { extern uint64_t mgmt_id; struct iface { + /* These members are always valid. */ struct port *port; /* Containing port. */ size_t port_ifidx; /* Index within containing port. */ - char *name; /* Host network device name. */ - int dp_ifidx; /* Index within kernel datapath. */ - - uint8_t mac[ETH_ADDR_LEN]; /* Ethernet address (all zeros if unknowns). */ - tag_type tag; /* Tag associated with this interface. */ - bool enabled; /* May be chosen for flows? */ long long delay_expires; /* Time after which 'enabled' may change. */ + + /* These members are valid only after bridge_reconfigure() causes them to + * be initialized.*/ + int dp_ifidx; /* Index within kernel datapath. */ + struct netdev *netdev; /* Network device. */ + bool enabled; /* May be chosen for flows? */ }; #define BOND_MASK 0xff @@ -130,6 +131,7 @@ struct port { tag_type active_iface_tag; /* Tag for bcast flows. */ tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */ int updelay, downdelay; /* Delay before iface goes up/down, in ms. */ + bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */ /* Port mirroring info. */ mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ @@ -192,6 +194,7 @@ enum { DP_MAX = 256 }; static struct bridge *bridge_create(const char *name); static void bridge_destroy(struct bridge *); static struct bridge *bridge_lookup(const char *name); +static void bridge_unixctl_dump_flows(struct unixctl_conn *, const char *); static int bridge_run_one(struct bridge *); static void bridge_reconfigure_one(struct bridge *); static void bridge_reconfigure_controller(struct bridge *); @@ -200,12 +203,15 @@ static void bridge_fetch_dp_ifaces(struct bridge *); static void bridge_flush(struct bridge *); static void bridge_pick_local_hw_addr(struct bridge *, uint8_t ea[ETH_ADDR_LEN], - const char **devname); + struct iface **hw_addr_iface); static uint64_t bridge_pick_datapath_id(struct bridge *, const uint8_t bridge_ea[ETH_ADDR_LEN], - const char *devname); + struct iface *hw_addr_iface); +static struct iface *bridge_get_local_iface(struct bridge *); static uint64_t dpid_from_hash(const void *, size_t nbytes); +static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args); + static void bond_init(void); static void bond_run(struct bridge *); static void bond_wait(struct bridge *); @@ -221,6 +227,7 @@ static struct port *port_from_dp_ifidx(const struct bridge *, uint16_t dp_ifidx); static void port_update_bond_compat(struct port *); static void port_update_vlan_compat(struct port *); +static void port_update_bonding(struct port *); static void mirror_create(struct bridge *, const char *name); static void mirror_destroy(struct mirror *); @@ -275,31 +282,41 @@ bridge_get_ifaces(struct svec *svec) void bridge_init(void) { - int retval; - int i; + struct svec dpif_names; + size_t i; - bond_init(); + unixctl_command_register("fdb/show", bridge_unixctl_fdb_show); - for (i = 0; i < DP_MAX; i++) { + svec_init(&dpif_names); + dp_enumerate(&dpif_names); + for (i = 0; i < dpif_names.n; i++) { + const char *dpif_name = dpif_names.names[i]; struct dpif *dpif; - char devname[16]; + int retval; - sprintf(devname, "dp%d", i); - retval = dpif_open(devname, &dpif); + retval = dpif_open(dpif_name, &dpif); if (!retval) { - char dpif_name[IF_NAMESIZE]; - if (dpif_port_get_name(dpif, ODPP_LOCAL, - dpif_name, sizeof dpif_name) - || !cfg_has("bridge.%s.port", dpif_name)) { - dpif_delete(dpif); + struct svec all_names; + size_t j; + + svec_init(&all_names); + dpif_get_all_names(dpif, &all_names); + for (j = 0; j < all_names.n; j++) { + if (cfg_has("bridge.%s.port", all_names.names[j])) { + goto found; + } } + dpif_delete(dpif); + found: + svec_destroy(&all_names); dpif_close(dpif); - } else if (retval != ENODEV) { - VLOG_ERR("failed to delete datapath dp%d: %s", - i, strerror(retval)); } } + svec_destroy(&dpif_names); + unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows); + + bond_init(); bridge_reconfigure(); } @@ -341,42 +358,105 @@ bridge_configure_ssl(void) * the old certificate will still be trusted until vSwitch is * restarted. We may want to address this in vconn's SSL library. */ if (config_string_change("ssl.ca-cert", &cacert_file) - || (stat(cacert_file, &s) && errno == ENOENT)) { + || (cacert_file && stat(cacert_file, &s) && errno == ENOENT)) { vconn_ssl_set_ca_cert_file(cacert_file, cfg_get_bool(0, "ssl.bootstrap-ca-cert")); } } #endif +/* iterate_and_prune_ifaces() callback function that opens the network device + * for 'iface', if it is not already open, and retrieves the interface's MAC + * address and carrier status. */ +static bool +init_iface_netdev(struct bridge *br UNUSED, struct iface *iface, + void *aux UNUSED) +{ + if (iface->netdev) { + return true; + } else if (!netdev_open(iface->name, NETDEV_ETH_TYPE_NONE, + &iface->netdev)) { + netdev_get_carrier(iface->netdev, &iface->enabled); + return true; + } else { + /* If the network device can't be opened, then we're not going to try + * to do anything with this interface. */ + return false; + } +} + +static bool +check_iface_dp_ifidx(struct bridge *br, struct iface *iface, void *aux UNUSED) +{ + if (iface->dp_ifidx >= 0) { + VLOG_DBG("%s has interface %s on port %d", + dpif_name(br->dpif), + iface->name, iface->dp_ifidx); + return true; + } else { + VLOG_ERR("%s interface not in %s, dropping", + iface->name, dpif_name(br->dpif)); + return false; + } +} + +static bool +set_iface_policing(struct bridge *br UNUSED, struct iface *iface, + void *aux UNUSED) +{ + int rate = cfg_get_int(0, "port.%s.ingress.policing-rate", iface->name); + int burst = cfg_get_int(0, "port.%s.ingress.policing-burst", iface->name); + netdev_set_policing(iface->netdev, rate, burst); + return true; +} + +/* Calls 'cb' for each interfaces in 'br', passing along the 'aux' argument. + * Deletes from 'br' all the interfaces for which 'cb' returns false, and then + * deletes from 'br' any ports that no longer have any interfaces. */ +static void +iterate_and_prune_ifaces(struct bridge *br, + bool (*cb)(struct bridge *, struct iface *, + void *aux), + void *aux) +{ + size_t i, j; + + for (i = 0; i < br->n_ports; ) { + struct port *port = br->ports[i]; + for (j = 0; j < port->n_ifaces; ) { + struct iface *iface = port->ifaces[j]; + if (cb(br, iface, aux)) { + j++; + } else { + iface_destroy(iface); + } + } + + if (port->n_ifaces) { + i++; + } else { + VLOG_ERR("%s port has no interfaces, dropping", port->name); + port_destroy(port); + } + } +} + void bridge_reconfigure(void) { - struct svec old_br, new_br, raw_new_br; + struct svec old_br, new_br; struct bridge *br, *next; - size_t i, j; + size_t i; COVERAGE_INC(bridge_reconfigure); - /* Collect old bridges. */ + /* Collect old and new bridges. */ svec_init(&old_br); + svec_init(&new_br); LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { svec_add(&old_br, br->name); } - - /* Collect new bridges. */ - svec_init(&raw_new_br); - cfg_get_subsections(&raw_new_br, "bridge"); - svec_init(&new_br); - for (i = 0; i < raw_new_br.n; i++) { - const char *name = raw_new_br.names[i]; - if (!strncmp(name, "dp", 2) && isdigit(name[2])) { - VLOG_ERR("%s is not a valid bridge name (bridges may not be " - "named \"dp\" followed by a digit)", name); - } else { - svec_add(&new_br, name); - } - } - svec_destroy(&raw_new_br); + cfg_get_subsections(&new_br, "bridge"); /* Get rid of deleted bridges and add new bridges. */ svec_sort(&old_br); @@ -438,7 +518,6 @@ bridge_reconfigure(void) struct odp_port *dpif_ports; size_t n_dpif_ports; struct svec cur_ifaces, want_ifaces, add_ifaces; - int next_port_no; dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); svec_init(&cur_ifaces); @@ -450,29 +529,34 @@ bridge_reconfigure(void) bridge_get_all_ifaces(br, &want_ifaces); svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL); - next_port_no = 1; for (i = 0; i < add_ifaces.n; i++) { const char *if_name = add_ifaces.names[i]; - for (;;) { - int internal = cfg_get_bool(0, "iface.%s.internal", if_name); - int error = dpif_port_add(br->dpif, if_name, next_port_no++, - internal ? ODP_PORT_INTERNAL : 0); - if (error != EEXIST) { - if (next_port_no >= 256) { - VLOG_ERR("ran out of valid port numbers on %s", - dpif_name(br->dpif)); - goto out; - } - if (error) { - VLOG_ERR("failed to add %s interface to %s: %s", - if_name, dpif_name(br->dpif), - strerror(error)); - } - break; + bool internal; + int error; + + /* It's an internal interface if it's marked that way, or if + * it's a bonded interface for which we're faking up a network + * device. */ + internal = cfg_get_bool(0, "iface.%s.internal", if_name); + if (cfg_get_bool(0, "bonding.%s.fake-iface", if_name)) { + struct port *port = port_lookup(br, if_name); + if (port && port->n_ifaces > 1) { + internal = true; } } + + /* Add to datapath. */ + error = dpif_port_add(br->dpif, if_name, + internal ? ODP_PORT_INTERNAL : 0, NULL); + if (error == EFBIG) { + VLOG_ERR("ran out of valid port numbers on %s", + dpif_name(br->dpif)); + break; + } else if (error) { + VLOG_ERR("failed to add %s interface to %s: %s", + if_name, dpif_name(br->dpif), strerror(error)); + } } - out: svec_destroy(&cur_ifaces); svec_destroy(&want_ifaces); svec_destroy(&add_ifaces); @@ -480,44 +564,22 @@ bridge_reconfigure(void) LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { uint8_t ea[8]; uint64_t dpid; - struct iface *local_iface = NULL; - const char *devname; + struct iface *local_iface; + struct iface *hw_addr_iface; uint8_t engine_type, engine_id; bool add_id_to_iface = false; struct svec nf_hosts; bridge_fetch_dp_ifaces(br); - for (i = 0; i < br->n_ports; ) { - struct port *port = br->ports[i]; + iterate_and_prune_ifaces(br, init_iface_netdev, NULL); - for (j = 0; j < port->n_ifaces; ) { - struct iface *iface = port->ifaces[j]; - if (iface->dp_ifidx < 0) { - VLOG_ERR("%s interface not in %s, dropping", - iface->name, dpif_name(br->dpif)); - iface_destroy(iface); - } else { - if (iface->dp_ifidx == ODPP_LOCAL) { - local_iface = iface; - } - VLOG_DBG("%s has interface %s on port %d", - dpif_name(br->dpif), - iface->name, iface->dp_ifidx); - j++; - } - } - if (!port->n_ifaces) { - VLOG_ERR("%s port has no interfaces, dropping", port->name); - port_destroy(port); - continue; - } - i++; - } + iterate_and_prune_ifaces(br, check_iface_dp_ifidx, NULL); /* Pick local port hardware address, datapath ID. */ - bridge_pick_local_hw_addr(br, ea, &devname); + bridge_pick_local_hw_addr(br, ea, &hw_addr_iface); + local_iface = bridge_get_local_iface(br); if (local_iface) { - int error = netdev_nodev_set_etheraddr(local_iface->name, ea); + int error = netdev_set_etheraddr(local_iface->netdev, ea); if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "bridge %s: failed to set bridge " @@ -526,7 +588,7 @@ bridge_reconfigure(void) } } - dpid = bridge_pick_datapath_id(br, ea, devname); + dpid = bridge_pick_datapath_id(br, ea, hw_addr_iface); ofproto_set_datapath_id(br->ofproto, dpid); /* Set NetFlow configuration on this bridge. */ @@ -575,22 +637,24 @@ bridge_reconfigure(void) for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; port_update_vlan_compat(port); + port_update_bonding(port); } } LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { brstp_reconfigure(br); + iterate_and_prune_ifaces(br, set_iface_policing, NULL); } } static void bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], - const char **devname) + struct iface **hw_addr_iface) { uint64_t requested_ea; size_t i, j; int error; - *devname = NULL; + *hw_addr_iface = NULL; /* Did the user request a particular MAC? */ requested_ea = cfg_get_mac(0, "bridge.%s.mac", br->name); @@ -612,35 +676,79 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], memset(ea, 0xff, sizeof ea); for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + uint8_t iface_ea[ETH_ADDR_LEN]; + uint64_t iface_ea_u64; + struct iface *iface; + + /* Mirror output ports don't participate. */ if (port->is_mirror_output_port) { continue; } - for (j = 0; j < port->n_ifaces; j++) { - struct iface *iface = port->ifaces[j]; - uint8_t iface_ea[ETH_ADDR_LEN]; + + /* Choose the MAC address to represent the port. */ + iface_ea_u64 = cfg_get_mac(0, "port.%s.mac", port->name); + if (iface_ea_u64) { + /* User specified explicitly. */ + eth_addr_from_uint64(iface_ea_u64, iface_ea); + + /* Find the interface with this Ethernet address (if any) so that + * we can provide the correct devname to the caller. */ + iface = NULL; + for (j = 0; j < port->n_ifaces; j++) { + struct iface *candidate = port->ifaces[j]; + uint8_t candidate_ea[ETH_ADDR_LEN]; + if (!netdev_get_etheraddr(candidate->netdev, candidate_ea) + && eth_addr_equals(iface_ea, candidate_ea)) { + iface = candidate; + } + } + } else { + /* Choose the interface whose MAC address will represent the port. + * The Linux kernel bonding code always chooses the MAC address of + * the first slave added to a bond, and the Fedora networking + * scripts always add slaves to a bond in alphabetical order, so + * for compatibility we choose the interface with the name that is + * first in alphabetical order. */ + iface = port->ifaces[0]; + for (j = 1; j < port->n_ifaces; j++) { + struct iface *candidate = port->ifaces[j]; + if (strcmp(candidate->name, iface->name) < 0) { + iface = candidate; + } + } + + /* The local port doesn't count (since we're trying to choose its + * MAC address anyway). Other internal ports don't count because + * we really want a physical MAC if we can get it, and internal + * ports typically have randomly generated MACs. */ if (iface->dp_ifidx == ODPP_LOCAL || cfg_get_bool(0, "iface.%s.internal", iface->name)) { continue; } - error = netdev_nodev_get_etheraddr(iface->name, iface_ea); - if (!error) { - if (!eth_addr_is_multicast(iface_ea) && - !eth_addr_is_reserved(iface_ea) && - !eth_addr_is_zero(iface_ea) && - memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) { - memcpy(ea, iface_ea, ETH_ADDR_LEN); - *devname = iface->name; - } - } else { + + /* Grab MAC. */ + error = netdev_get_etheraddr(iface->netdev, iface_ea); + if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "failed to obtain Ethernet address of %s: %s", iface->name, strerror(error)); + continue; } } + + /* Compare against our current choice. */ + if (!eth_addr_is_multicast(iface_ea) && + !eth_addr_is_reserved(iface_ea) && + !eth_addr_is_zero(iface_ea) && + memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) + { + memcpy(ea, iface_ea, ETH_ADDR_LEN); + *hw_addr_iface = iface; + } } if (eth_addr_is_multicast(ea) || eth_addr_is_vif(ea)) { memcpy(ea, br->default_ea, ETH_ADDR_LEN); - *devname = NULL; + *hw_addr_iface = NULL; VLOG_WARN("bridge %s: using default bridge Ethernet " "address "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(ea)); } else { @@ -651,13 +759,13 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], /* Choose and returns the datapath ID for bridge 'br' given that the bridge * Ethernet address is 'bridge_ea'. If 'bridge_ea' is the Ethernet address of - * a network device, then that network device's name must be passed in as - * 'devname'; if 'bridge_ea' was derived some other way, then 'devname' must be - * passed in as a null pointer. */ + * an interface on 'br', then that interface must be passed in as + * 'hw_addr_iface'; if 'bridge_ea' was derived some other way, then + * 'hw_addr_iface' must be passed in as a null pointer. */ static uint64_t bridge_pick_datapath_id(struct bridge *br, const uint8_t bridge_ea[ETH_ADDR_LEN], - const char *devname) + struct iface *hw_addr_iface) { /* * The procedure for choosing a bridge MAC address will, in the most @@ -678,9 +786,9 @@ bridge_pick_datapath_id(struct bridge *br, return dpid; } - if (devname) { + if (hw_addr_iface) { int vlan; - if (!netdev_get_vlan_vid(devname, &vlan)) { + if (!netdev_get_vlan_vid(hw_addr_iface->netdev, &vlan)) { /* * A bridge whose MAC address is taken from a VLAN network device * (that is, a network device created with vconfig(8) or similar @@ -790,6 +898,55 @@ bridge_flush(struct bridge *br) mac_learning_flush(br->ml); } } + +/* Returns the 'br' interface for the ODPP_LOCAL port, or null if 'br' has no + * such interface. */ +static struct iface * +bridge_get_local_iface(struct bridge *br) +{ + size_t i, j; + + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + if (iface->dp_ifidx == ODPP_LOCAL) { + return iface; + } + } + } + + return NULL; +} + +/* Bridge unixctl user interface functions. */ +static void +bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct bridge *br; + + br = bridge_lookup(args); + if (!br) { + unixctl_command_reply(conn, 501, "no such bridge"); + return; + } + + ds_put_cstr(&ds, " port VLAN MAC Age\n"); + if (br->ml) { + const struct mac_entry *e; + LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) { + if (e->port < 0 || e->port >= br->n_ports) { + continue; + } + ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n", + br->ports[e->port]->ifaces[0]->dp_ifidx, + e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e)); + } + } + unixctl_command_reply(conn, 200, ds_cstr(&ds)); + ds_destroy(&ds); +} /* Bridge reconfiguration functions. */ @@ -803,7 +960,7 @@ bridge_create(const char *name) br = xcalloc(1, sizeof *br); error = dpif_create(name, &br->dpif); - if (error == EEXIST) { + if (error == EEXIST || error == EBUSY) { error = dpif_open(name, &br->dpif); if (error) { VLOG_ERR("datapath %s already exists but cannot be opened: %s", @@ -896,6 +1053,27 @@ bridge_get_datapathid(const char *name) return br ? ofproto_get_datapath_id(br->ofproto) : 0; } +/* Handle requests for a listing of all flows known by the OpenFlow + * stack, including those normally hidden. */ +static void +bridge_unixctl_dump_flows(struct unixctl_conn *conn, const char *args) +{ + struct bridge *br; + struct ds results; + + br = bridge_lookup(args); + if (!br) { + unixctl_command_reply(conn, 501, "Unknown bridge"); + return; + } + + ds_init(&results); + ofproto_get_all_flows(br->ofproto, &results); + + unixctl_command_reply(conn, 200, ds_cstr(&results)); + ds_destroy(&results); +} + static int bridge_run_one(struct bridge *br) { @@ -930,13 +1108,29 @@ bridge_get_controller(const struct bridge *br) return controller && controller[0] ? controller : NULL; } +static bool +check_duplicate_ifaces(struct bridge *br, struct iface *iface, void *ifaces_) +{ + struct svec *ifaces = ifaces_; + if (!svec_contains(ifaces, iface->name)) { + svec_add(ifaces, iface->name); + svec_sort(ifaces); + return true; + } else { + VLOG_ERR("bridge %s: %s interface is on multiple ports, " + "removing from %s", + br->name, iface->name, iface->port->name); + return false; + } +} + static void bridge_reconfigure_one(struct bridge *br) { struct svec old_ports, new_ports, ifaces; struct svec listeners, old_listeners; struct svec snoops, old_snoops; - size_t i, j; + size_t i; /* Collect old ports. */ svec_init(&old_ports); @@ -950,9 +1144,16 @@ bridge_reconfigure_one(struct bridge *br) svec_init(&new_ports); cfg_get_all_keys(&new_ports, "bridge.%s.port", br->name); svec_sort(&new_ports); - if (bridge_get_controller(br) && !svec_contains(&new_ports, br->name)) { - svec_add(&new_ports, br->name); - svec_sort(&new_ports); + if (bridge_get_controller(br)) { + char local_name[IF_NAMESIZE]; + int error; + + error = dpif_port_get_name(br->dpif, ODPP_LOCAL, + local_name, sizeof local_name); + if (!error && !svec_contains(&new_ports, local_name)) { + svec_add(&new_ports, local_name); + svec_sort(&new_ports); + } } if (!svec_is_unique(&new_ports)) { VLOG_WARN("bridge %s: %s specified twice as bridge port", @@ -987,28 +1188,7 @@ bridge_reconfigure_one(struct bridge *br) /* Check and delete duplicate interfaces. */ svec_init(&ifaces); - for (i = 0; i < br->n_ports; ) { - struct port *port = br->ports[i]; - for (j = 0; j < port->n_ifaces; ) { - struct iface *iface = port->ifaces[j]; - if (svec_contains(&ifaces, iface->name)) { - VLOG_ERR("bridge %s: %s interface is on multiple ports, " - "removing from %s", - br->name, iface->name, port->name); - iface_destroy(iface); - } else { - svec_add(&ifaces, iface->name); - svec_sort(&ifaces); - j++; - } - } - if (!port->n_ifaces) { - VLOG_ERR("%s port has no interfaces, dropping", port->name); - port_destroy(port); - } else { - i++; - } - } + iterate_and_prune_ifaces(br, check_duplicate_ifaces, &ifaces); svec_destroy(&ifaces); /* Delete all flows if we're switching from connected to standalone or vice @@ -1079,14 +1259,18 @@ bridge_reconfigure_controller(struct bridge *br) int rate_limit, burst_limit; if (!strcmp(controller, "discover")) { + bool update_resolv_conf = true; + + if (cfg_has("%s.update-resolv.conf", pfx)) { + update_resolv_conf = cfg_get_bool(0, "%s.update-resolv.conf", + pfx); + } ofproto_set_discovery(br->ofproto, true, cfg_get_string(0, "%s.accept-regex", pfx), - cfg_get_bool(0, "%s.update-resolv.conf", - pfx)); + update_resolv_conf); } else { - struct netdev *netdev; + struct iface *local_iface; bool in_band; - int error; in_band = (!cfg_is_valid(CFG_BOOL | CFG_REQUIRED, "%s.in-band", pfx) @@ -1094,33 +1278,32 @@ bridge_reconfigure_controller(struct bridge *br) ofproto_set_discovery(br->ofproto, false, NULL, NULL); ofproto_set_in_band(br->ofproto, in_band); - error = netdev_open(br->name, NETDEV_ETH_TYPE_NONE, &netdev); - if (!error) { - if (cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) { - struct in_addr ip, mask, gateway; - ip.s_addr = cfg_get_ip(0, "%s.ip", pfx); - mask.s_addr = cfg_get_ip(0, "%s.netmask", pfx); - gateway.s_addr = cfg_get_ip(0, "%s.gateway", pfx); - - netdev_turn_flags_on(netdev, NETDEV_UP, true); - if (!mask.s_addr) { - mask.s_addr = guess_netmask(ip.s_addr); - } - if (!netdev_set_in4(netdev, ip, mask)) { - VLOG_INFO("bridge %s: configured IP address "IP_FMT", " - "netmask "IP_FMT, - br->name, IP_ARGS(&ip.s_addr), - IP_ARGS(&mask.s_addr)); - } + local_iface = bridge_get_local_iface(br); + if (local_iface + && cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) { + struct netdev *netdev = local_iface->netdev; + struct in_addr ip, mask, gateway; + ip.s_addr = cfg_get_ip(0, "%s.ip", pfx); + mask.s_addr = cfg_get_ip(0, "%s.netmask", pfx); + gateway.s_addr = cfg_get_ip(0, "%s.gateway", pfx); + + netdev_turn_flags_on(netdev, NETDEV_UP, true); + if (!mask.s_addr) { + mask.s_addr = guess_netmask(ip.s_addr); + } + if (!netdev_set_in4(netdev, ip, mask)) { + VLOG_INFO("bridge %s: configured IP address "IP_FMT", " + "netmask "IP_FMT, + br->name, IP_ARGS(&ip.s_addr), + IP_ARGS(&mask.s_addr)); + } - if (gateway.s_addr) { - if (!netdev_add_router(gateway)) { - VLOG_INFO("bridge %s: configured gateway "IP_FMT, - br->name, IP_ARGS(&gateway.s_addr)); - } + if (gateway.s_addr) { + if (!netdev_add_router(netdev, gateway)) { + VLOG_INFO("bridge %s: configured gateway "IP_FMT, + br->name, IP_ARGS(&gateway.s_addr)); } } - netdev_close(netdev); } } @@ -1137,7 +1320,7 @@ bridge_reconfigure_controller(struct bridge *br) if (probe < 5) { probe = cfg_get_int(0, "mgmt.inactivity-probe"); if (probe < 5) { - probe = 15; + probe = 5; } } ofproto_set_probe_interval(br->ofproto, probe); @@ -1146,7 +1329,7 @@ bridge_reconfigure_controller(struct bridge *br) if (!max_backoff) { max_backoff = cfg_get_int(0, "mgmt.max-backoff"); if (!max_backoff) { - max_backoff = 15; + max_backoff = 8; } } ofproto_set_max_backoff(br->ofproto, max_backoff); @@ -1217,9 +1400,12 @@ bridge_get_all_ifaces(const struct bridge *br, struct svec *ifaces) struct iface *iface = port->ifaces[j]; svec_add(ifaces, iface->name); } + if (port->n_ifaces > 1 + && cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) { + svec_add(ifaces, port->name); + } } - svec_sort(ifaces); - assert(svec_is_unique(ifaces)); + svec_sort_unique(ifaces); } /* For robustness, in case the administrator moves around datapath ports behind @@ -1314,6 +1500,7 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src, return false; } e->iface_tag = tag_create_random(); + ((struct port *) port)->bond_compat_is_stale = true; } *tags |= e->iface_tag; iface = port->ifaces[e->iface_idx]; @@ -1339,6 +1526,10 @@ bond_link_status_update(struct iface *iface, bool carrier) iface->delay_expires = LLONG_MAX; VLOG_INFO_RL(&rl, "interface %s: will not be %s", iface->name, carrier ? "disabled" : "enabled"); + } else if (carrier && port->updelay && port->active_iface < 0) { + iface->delay_expires = time_msec(); + VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no " + "other interface is up", iface->name, port->updelay); } else { int delay = carrier ? port->updelay : port->downdelay; iface->delay_expires = time_msec() + delay; @@ -1382,7 +1573,7 @@ bond_enable_slave(struct iface *iface, bool enable) iface->enabled = enable; if (!iface->enabled) { - VLOG_WARN("interface %s: enabled", iface->name); + VLOG_WARN("interface %s: disabled", iface->name); ofproto_revalidate(br->ofproto, iface->tag); if (iface->port_ifidx == port->active_iface) { ofproto_revalidate(br->ofproto, @@ -1391,7 +1582,7 @@ bond_enable_slave(struct iface *iface, bool enable) } bond_send_learning_packets(port); } else { - VLOG_WARN("interface %s: disabled", iface->name); + VLOG_WARN("interface %s: enabled", iface->name); if (port->active_iface < 0) { ofproto_revalidate(br->ofproto, port->no_ifaces_tag); bond_choose_active_iface(port); @@ -1408,6 +1599,12 @@ bond_run(struct bridge *br) for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + + if (port->bond_compat_is_stale) { + port->bond_compat_is_stale = false; + port_update_bond_compat(port); + } + if (port->n_ifaces < 2) { continue; } @@ -1578,12 +1775,14 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; if (port_includes_vlan(port, m->out_vlan) - && set_dst(dst, flow, in_port, port, tags) - && !dst_is_duplicate(dsts, dst - dsts, dst)) + && set_dst(dst, flow, in_port, port, tags)) { if (port->vlan < 0) { dst->vlan = m->out_vlan; } + if (dst_is_duplicate(dsts, dst - dsts, dst)) { + continue; + } if (dst->dp_ifidx == flow->in_port && dst->vlan == vlan) { /* Don't send out input port on same VLAN. */ @@ -1757,7 +1956,7 @@ process_flow(struct bridge *br, const flow_t *flow, goto done; } else { /* Drop all multicast packets for which we have learned a different - * input port, because we probably sent the packet on one slaves + * input port, because we probably sent the packet on one slave * and got it back on the active slave. Broadcast ARP replies are * an exception to this rule: the host has moved to another * switch. */ @@ -1856,7 +2055,6 @@ bridge_port_changed_ofhook_cb(enum ofp_port_reason reason, bridge_flush(br); } else { - memcpy(iface->mac, opp->hw_addr, ETH_ADDR_LEN); if (port->n_ifaces > 1) { bool up = !(opp->state & OFPPS_LINK_DOWN); bond_link_status_update(iface, up); @@ -2190,6 +2388,7 @@ bond_rebalance_port(struct port *port) } else { from++; } + port->bond_compat_is_stale = true; } } @@ -2431,8 +2630,8 @@ bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) return; } - if (sscanf(hash_s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8, - &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6) { + if (sscanf(hash_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) + == ETH_ADDR_SCAN_COUNT) { hash = bond_hash(mac); } else if (strspn(hash_s, "0123456789") == strlen(hash_s)) { hash = atoi(hash_s) & BOND_MASK; @@ -2456,6 +2655,7 @@ bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) ofproto_revalidate(port->bridge->ofproto, entry->iface_tag); entry->iface_idx = iface->port_ifidx; entry->iface_tag = tag_create_random(); + port->bond_compat_is_stale = true; unixctl_command_reply(conn, 200, "migrated"); } @@ -2551,6 +2751,25 @@ bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args) enable_slave(conn, args, false); } +static void +bond_unixctl_hash(struct unixctl_conn *conn, const char *args) +{ + uint8_t mac[ETH_ADDR_LEN]; + uint8_t hash; + char *hash_cstr; + + if (sscanf(args, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) + == ETH_ADDR_SCAN_COUNT) { + hash = bond_hash(mac); + + hash_cstr = xasprintf("%u", hash); + unixctl_command_reply(conn, 200, hash_cstr); + free(hash_cstr); + } else { + unixctl_command_reply(conn, 501, "invalid mac"); + } +} + static void bond_init(void) { @@ -2561,6 +2780,7 @@ bond_init(void) bond_unixctl_set_active_slave); unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave); unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave); + unixctl_command_register("bond/hash", bond_unixctl_hash); } /* Port functions. */ @@ -2720,6 +2940,7 @@ port_destroy(struct port *port) size_t i; proc_net_compat_update_vlan(port->name, NULL, 0); + proc_net_compat_update_bond(port->name, NULL); for (i = 0; i < MAX_MIRRORS; i++) { struct mirror *m = br->mirrors[i]; @@ -2786,7 +3007,7 @@ port_update_bonding(struct port *port) if (port->bond_hash) { free(port->bond_hash); port->bond_hash = NULL; - proc_net_compat_update_bond(port->name, NULL); + port->bond_compat_is_stale = true; } } else { if (!port->bond_hash) { @@ -2801,23 +3022,39 @@ port_update_bonding(struct port *port) port->no_ifaces_tag = tag_create_random(); bond_choose_active_iface(port); } - port_update_bond_compat(port); + port->bond_compat_is_stale = true; } } static void port_update_bond_compat(struct port *port) { + struct compat_bond_hash compat_hashes[BOND_MASK + 1]; struct compat_bond bond; size_t i; if (port->n_ifaces < 2) { + proc_net_compat_update_bond(port->name, NULL); return; } bond.up = false; bond.updelay = port->updelay; bond.downdelay = port->downdelay; + + bond.n_hashes = 0; + bond.hashes = compat_hashes; + if (port->bond_hash) { + const struct bond_entry *e; + for (e = port->bond_hash; e <= &port->bond_hash[BOND_MASK]; e++) { + if (e->iface_idx >= 0 && e->iface_idx < port->n_ifaces) { + struct compat_bond_hash *cbh = &bond.hashes[bond.n_hashes++]; + cbh->hash = e - port->bond_hash; + cbh->netdev_name = port->ifaces[e->iface_idx]->name; + } + } + } + bond.n_slaves = port->n_ifaces; bond.slaves = xmalloc(port->n_ifaces * sizeof *bond.slaves); for (i = 0; i < port->n_ifaces; i++) { @@ -2829,8 +3066,9 @@ port_update_bond_compat(struct port *port) if (slave->up) { bond.up = true; } - memcpy(slave->mac, iface->mac, ETH_ADDR_LEN); + netdev_get_etheraddr(iface->netdev, slave->mac); } + proc_net_compat_update_bond(port->name, &bond); free(bond.slaves); } @@ -2859,7 +3097,8 @@ port_update_vlan_compat(struct port *port) && p->n_ifaces && (!vlandev_name || strcmp(p->name, vlandev_name) <= 0)) { - const uint8_t *ea = p->ifaces[0]->mac; + uint8_t ea[ETH_ADDR_LEN]; + netdev_get_etheraddr(p->ifaces[0]->netdev, ea); if (!eth_addr_is_multicast(ea) && !eth_addr_is_reserved(ea) && !eth_addr_is_zero(ea)) { @@ -2885,9 +3124,7 @@ iface_create(struct port *port, const char *name) iface->dp_ifidx = -1; iface->tag = tag_create_random(); iface->delay_expires = LLONG_MAX; - - netdev_nodev_get_etheraddr(name, iface->mac); - netdev_nodev_get_carrier(name, &iface->enabled); + iface->netdev = NULL; if (port->n_ifaces >= port->allocated_ifaces) { port->ifaces = x2nrealloc(port->ifaces, &port->allocated_ifaces, @@ -2900,7 +3137,6 @@ iface_create(struct port *port, const char *name) VLOG_DBG("attached network device %s to port %s", iface->name, port->name); - port_update_bonding(port); bridge_flush(port->bridge); } @@ -2920,6 +3156,7 @@ iface_destroy(struct iface *iface) del = port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces]; del->port_ifidx = iface->port_ifidx; + netdev_close(iface->netdev); free(iface->name); free(iface); @@ -2929,7 +3166,6 @@ iface_destroy(struct iface *iface) bond_send_learning_packets(port); } - port_update_bonding(port); bridge_flush(port->bridge); } } @@ -3156,6 +3392,7 @@ mirror_reconfigure_one(struct mirror *m) int *vlans; size_t i; bool mirror_all_ports; + bool any_ports_specified; /* Get output port. */ out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port", @@ -3194,11 +3431,18 @@ mirror_reconfigure_one(struct mirror *m) cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx); cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx); cfg_get_all_keys(&ports, "%s.select.port", pfx); + any_ports_specified = src_ports.n || dst_ports.n || ports.n; svec_append(&src_ports, &ports); svec_append(&dst_ports, &ports); svec_destroy(&ports); prune_ports(m, &src_ports); prune_ports(m, &dst_ports); + if (any_ports_specified && !src_ports.n && !dst_ports.n) { + VLOG_ERR("%s: none of the specified ports exist; " + "disabling port mirror %s", pfx, pfx); + mirror_destroy(m); + goto exit; + } /* Get all the vlans, and drop duplicate and invalid vlans. */ svec_init(&vlan_strings); @@ -3250,6 +3494,7 @@ mirror_reconfigure_one(struct mirror *m) } /* Clean up. */ +exit: svec_destroy(&src_ports); svec_destroy(&dst_ports); free(pfx); @@ -3268,23 +3513,25 @@ brstp_send_bpdu(struct ofpbuf *pkt, int port_no, void *br_) if (!iface) { VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d", br->name, port_no); - } else if (eth_addr_is_zero(iface->mac)) { - VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d with unknown MAC", - br->name, port_no); } else { - union ofp_action action; struct eth_header *eth = pkt->l2; - flow_t flow; - memcpy(eth->eth_src, iface->mac, ETH_ADDR_LEN); + netdev_get_etheraddr(iface->netdev, eth->eth_src); + if (eth_addr_is_zero(eth->eth_src)) { + VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d " + "with unknown MAC", br->name, port_no); + } else { + union ofp_action action; + flow_t flow; - memset(&action, 0, sizeof action); - action.type = htons(OFPAT_OUTPUT); - action.output.len = htons(sizeof action); - action.output.port = htons(port_no); + memset(&action, 0, sizeof action); + action.type = htons(OFPAT_OUTPUT); + action.output.len = htons(sizeof action); + action.output.port = htons(port_no); - flow_extract(pkt, ODPP_NONE, &flow); - ofproto_send_packet(br->ofproto, &flow, &action, 1, pkt); + flow_extract(pkt, ODPP_NONE, &flow); + ofproto_send_packet(br->ofproto, &flow, &action, 1, pkt); + } } ofpbuf_delete(pkt); }