X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=933f4afa5d9604f0de8e34081781ac3b66b582c8;hb=1a487cec005de9f77cb9d1a2492681af293f73fb;hp=8a63ef8bdcfa6aaa8a3b74b35c2875629d5becc6;hpb=07c318f40481d0fd10516193fdf472c6356f0cd3;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 8a63ef8b..933f4afa 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -131,6 +131,7 @@ struct port { tag_type active_iface_tag; /* Tag for bcast flows. */ tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */ int updelay, downdelay; /* Delay before iface goes up/down, in ms. */ + bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */ /* Port mirroring info. */ mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ @@ -193,6 +194,7 @@ enum { DP_MAX = 256 }; static struct bridge *bridge_create(const char *name); static void bridge_destroy(struct bridge *); static struct bridge *bridge_lookup(const char *name); +static void bridge_unixctl_dump_flows(struct unixctl_conn *, const char *); static int bridge_run_one(struct bridge *); static void bridge_reconfigure_one(struct bridge *); static void bridge_reconfigure_controller(struct bridge *); @@ -285,6 +287,7 @@ bridge_init(void) unixctl_command_register("fdb/show", bridge_unixctl_fdb_show); + svec_init(&dpif_names); dp_enumerate(&dpif_names); for (i = 0; i < dpif_names.n; i++) { const char *dpif_name = dpif_names.names[i]; @@ -309,6 +312,9 @@ bridge_init(void) dpif_close(dpif); } } + svec_destroy(&dpif_names); + + unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows); bond_init(); bridge_reconfigure(); @@ -525,10 +531,24 @@ bridge_reconfigure(void) for (i = 0; i < add_ifaces.n; i++) { const char *if_name = add_ifaces.names[i]; - int internal = cfg_get_bool(0, "iface.%s.internal", if_name); - int flags = internal ? ODP_PORT_INTERNAL : 0; - int error = dpif_port_add(br->dpif, if_name, flags, NULL); - if (error == EXFULL) { + bool internal; + int error; + + /* It's an internal interface if it's marked that way, or if + * it's a bonded interface for which we're faking up a network + * device. */ + internal = cfg_get_bool(0, "iface.%s.internal", if_name); + if (cfg_get_bool(0, "bonding.%s.fake-iface", if_name)) { + struct port *port = port_lookup(br, if_name); + if (port && port->n_ifaces > 1) { + internal = true; + } + } + + /* Add to datapath. */ + error = dpif_port_add(br->dpif, if_name, + internal ? ODP_PORT_INTERNAL : 0, NULL); + if (error == EFBIG) { VLOG_ERR("ran out of valid port numbers on %s", dpif_name(br->dpif)); break; @@ -601,6 +621,7 @@ bridge_reconfigure(void) VLOG_ERR("bridge %s: problem setting netflow collectors", br->name); } + svec_destroy(&nf_hosts); /* Update the controller and related settings. It would be more * straightforward to call this from bridge_reconfigure_one(), but we @@ -656,31 +677,75 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], memset(ea, 0xff, sizeof ea); for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + uint8_t iface_ea[ETH_ADDR_LEN]; + uint64_t iface_ea_u64; + struct iface *iface; + + /* Mirror output ports don't participate. */ if (port->is_mirror_output_port) { continue; } - for (j = 0; j < port->n_ifaces; j++) { - struct iface *iface = port->ifaces[j]; - uint8_t iface_ea[ETH_ADDR_LEN]; + + /* Choose the MAC address to represent the port. */ + iface_ea_u64 = cfg_get_mac(0, "port.%s.mac", port->name); + if (iface_ea_u64) { + /* User specified explicitly. */ + eth_addr_from_uint64(iface_ea_u64, iface_ea); + + /* Find the interface with this Ethernet address (if any) so that + * we can provide the correct devname to the caller. */ + iface = NULL; + for (j = 0; j < port->n_ifaces; j++) { + struct iface *candidate = port->ifaces[j]; + uint8_t candidate_ea[ETH_ADDR_LEN]; + if (!netdev_get_etheraddr(candidate->netdev, candidate_ea) + && eth_addr_equals(iface_ea, candidate_ea)) { + iface = candidate; + } + } + } else { + /* Choose the interface whose MAC address will represent the port. + * The Linux kernel bonding code always chooses the MAC address of + * the first slave added to a bond, and the Fedora networking + * scripts always add slaves to a bond in alphabetical order, so + * for compatibility we choose the interface with the name that is + * first in alphabetical order. */ + iface = port->ifaces[0]; + for (j = 1; j < port->n_ifaces; j++) { + struct iface *candidate = port->ifaces[j]; + if (strcmp(candidate->name, iface->name) < 0) { + iface = candidate; + } + } + + /* The local port doesn't count (since we're trying to choose its + * MAC address anyway). Other internal ports don't count because + * we really want a physical MAC if we can get it, and internal + * ports typically have randomly generated MACs. */ if (iface->dp_ifidx == ODPP_LOCAL || cfg_get_bool(0, "iface.%s.internal", iface->name)) { continue; } + + /* Grab MAC. */ error = netdev_get_etheraddr(iface->netdev, iface_ea); - if (!error) { - if (!eth_addr_is_multicast(iface_ea) && - !eth_addr_is_reserved(iface_ea) && - !eth_addr_is_zero(iface_ea) && - memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) { - memcpy(ea, iface_ea, ETH_ADDR_LEN); - *hw_addr_iface = iface; - } - } else { + if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "failed to obtain Ethernet address of %s: %s", iface->name, strerror(error)); + continue; } } + + /* Compare against our current choice. */ + if (!eth_addr_is_multicast(iface_ea) && + !eth_addr_is_reserved(iface_ea) && + !eth_addr_is_zero(iface_ea) && + memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) + { + memcpy(ea, iface_ea, ETH_ADDR_LEN); + *hw_addr_iface = iface; + } } if (eth_addr_is_multicast(ea) || eth_addr_is_vif(ea)) { memcpy(ea, br->default_ea, ETH_ADDR_LEN); @@ -724,8 +789,7 @@ bridge_pick_datapath_id(struct bridge *br, if (hw_addr_iface) { int vlan; - if (!netdev_get_vlan_vid(netdev_get_name(hw_addr_iface->netdev), - &vlan)) { + if (!netdev_get_vlan_vid(hw_addr_iface->netdev, &vlan)) { /* * A bridge whose MAC address is taken from a VLAN network device * (that is, a network device created with vconfig(8) or similar @@ -990,6 +1054,27 @@ bridge_get_datapathid(const char *name) return br ? ofproto_get_datapath_id(br->ofproto) : 0; } +/* Handle requests for a listing of all flows known by the OpenFlow + * stack, including those normally hidden. */ +static void +bridge_unixctl_dump_flows(struct unixctl_conn *conn, const char *args) +{ + struct bridge *br; + struct ds results; + + br = bridge_lookup(args); + if (!br) { + unixctl_command_reply(conn, 501, "Unknown bridge"); + return; + } + + ds_init(&results); + ofproto_get_all_flows(br->ofproto, &results); + + unixctl_command_reply(conn, 200, ds_cstr(&results)); + ds_destroy(&results); +} + static int bridge_run_one(struct bridge *br) { @@ -1236,7 +1321,7 @@ bridge_reconfigure_controller(struct bridge *br) if (probe < 5) { probe = cfg_get_int(0, "mgmt.inactivity-probe"); if (probe < 5) { - probe = 15; + probe = 5; } } ofproto_set_probe_interval(br->ofproto, probe); @@ -1245,7 +1330,7 @@ bridge_reconfigure_controller(struct bridge *br) if (!max_backoff) { max_backoff = cfg_get_int(0, "mgmt.max-backoff"); if (!max_backoff) { - max_backoff = 15; + max_backoff = 8; } } ofproto_set_max_backoff(br->ofproto, max_backoff); @@ -1316,9 +1401,12 @@ bridge_get_all_ifaces(const struct bridge *br, struct svec *ifaces) struct iface *iface = port->ifaces[j]; svec_add(ifaces, iface->name); } + if (port->n_ifaces > 1 + && cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) { + svec_add(ifaces, port->name); + } } - svec_sort(ifaces); - assert(svec_is_unique(ifaces)); + svec_sort_unique(ifaces); } /* For robustness, in case the administrator moves around datapath ports behind @@ -1413,6 +1501,7 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src, return false; } e->iface_tag = tag_create_random(); + ((struct port *) port)->bond_compat_is_stale = true; } *tags |= e->iface_tag; iface = port->ifaces[e->iface_idx]; @@ -1502,6 +1591,7 @@ bond_enable_slave(struct iface *iface, bool enable) } iface->tag = tag_create_random(); } + port_update_bond_compat(port); } static void @@ -1511,6 +1601,12 @@ bond_run(struct bridge *br) for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + + if (port->bond_compat_is_stale) { + port->bond_compat_is_stale = false; + port_update_bond_compat(port); + } + if (port->n_ifaces < 2) { continue; } @@ -1681,12 +1777,14 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; if (port_includes_vlan(port, m->out_vlan) - && set_dst(dst, flow, in_port, port, tags) - && !dst_is_duplicate(dsts, dst - dsts, dst)) + && set_dst(dst, flow, in_port, port, tags)) { if (port->vlan < 0) { dst->vlan = m->out_vlan; } + if (dst_is_duplicate(dsts, dst - dsts, dst)) { + continue; + } if (dst->dp_ifidx == flow->in_port && dst->vlan == vlan) { /* Don't send out input port on same VLAN. */ @@ -1860,7 +1958,7 @@ process_flow(struct bridge *br, const flow_t *flow, goto done; } else { /* Drop all multicast packets for which we have learned a different - * input port, because we probably sent the packet on one slaves + * input port, because we probably sent the packet on one slave * and got it back on the active slave. Broadcast ARP replies are * an exception to this rule: the host has moved to another * switch. */ @@ -2292,6 +2390,7 @@ bond_rebalance_port(struct port *port) } else { from++; } + port->bond_compat_is_stale = true; } } @@ -2317,10 +2416,7 @@ bond_send_learning_packets(struct port *port) ofpbuf_init(&packet, 128); error = n_packets = n_errors = 0; LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) { - static const char s[] = "Open vSwitch Bond Failover"; union ofp_action actions[2], *a; - struct eth_header *eth; - struct llc_snap_header *llc_snap; uint16_t dp_ifidx; tag_type tags = 0; flow_t flow; @@ -2331,23 +2427,6 @@ bond_send_learning_packets(struct port *port) continue; } - /* Compose packet to send. */ - ofpbuf_clear(&packet); - eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN); - llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN); - ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */ - ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN); - - memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); - memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN); - eth->eth_type = htons(packet.size - ETH_HEADER_LEN); - - llc_snap->llc.llc_dsap = LLC_DSAP_SNAP; - llc_snap->llc.llc_ssap = LLC_SSAP_SNAP; - llc_snap->llc.llc_cntl = LLC_CNTL_SNAP; - memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3); - llc_snap->snap.snap_type = htons(0xf177); /* Random number. */ - /* Compose actions. */ memset(actions, 0, sizeof actions); a = actions; @@ -2364,6 +2443,8 @@ bond_send_learning_packets(struct port *port) /* Send packet. */ n_packets++; + compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177, + e->mac); flow_extract(&packet, ODPP_NONE, &flow); retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions, &packet); @@ -2558,6 +2639,7 @@ bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) ofproto_revalidate(port->bridge->ofproto, entry->iface_tag); entry->iface_idx = iface->port_ifidx; entry->iface_tag = tag_create_random(); + port->bond_compat_is_stale = true; unixctl_command_reply(conn, 200, "migrated"); } @@ -2653,6 +2735,25 @@ bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args) enable_slave(conn, args, false); } +static void +bond_unixctl_hash(struct unixctl_conn *conn, const char *args) +{ + uint8_t mac[ETH_ADDR_LEN]; + uint8_t hash; + char *hash_cstr; + + if (sscanf(args, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) + == ETH_ADDR_SCAN_COUNT) { + hash = bond_hash(mac); + + hash_cstr = xasprintf("%u", hash); + unixctl_command_reply(conn, 200, hash_cstr); + free(hash_cstr); + } else { + unixctl_command_reply(conn, 501, "invalid mac"); + } +} + static void bond_init(void) { @@ -2663,6 +2764,7 @@ bond_init(void) bond_unixctl_set_active_slave); unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave); unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave); + unixctl_command_register("bond/hash", bond_unixctl_hash); } /* Port functions. */ @@ -2822,6 +2924,7 @@ port_destroy(struct port *port) size_t i; proc_net_compat_update_vlan(port->name, NULL, 0); + proc_net_compat_update_bond(port->name, NULL); for (i = 0; i < MAX_MIRRORS; i++) { struct mirror *m = br->mirrors[i]; @@ -2888,7 +2991,7 @@ port_update_bonding(struct port *port) if (port->bond_hash) { free(port->bond_hash); port->bond_hash = NULL; - proc_net_compat_update_bond(port->name, NULL); + port->bond_compat_is_stale = true; } } else { if (!port->bond_hash) { @@ -2903,36 +3006,66 @@ port_update_bonding(struct port *port) port->no_ifaces_tag = tag_create_random(); bond_choose_active_iface(port); } - port_update_bond_compat(port); + port->bond_compat_is_stale = true; } } static void port_update_bond_compat(struct port *port) { + struct compat_bond_hash compat_hashes[BOND_MASK + 1]; struct compat_bond bond; size_t i; if (port->n_ifaces < 2) { + proc_net_compat_update_bond(port->name, NULL); return; } bond.up = false; bond.updelay = port->updelay; bond.downdelay = port->downdelay; + + bond.n_hashes = 0; + bond.hashes = compat_hashes; + if (port->bond_hash) { + const struct bond_entry *e; + for (e = port->bond_hash; e <= &port->bond_hash[BOND_MASK]; e++) { + if (e->iface_idx >= 0 && e->iface_idx < port->n_ifaces) { + struct compat_bond_hash *cbh = &bond.hashes[bond.n_hashes++]; + cbh->hash = e - port->bond_hash; + cbh->netdev_name = port->ifaces[e->iface_idx]->name; + } + } + } + bond.n_slaves = port->n_ifaces; bond.slaves = xmalloc(port->n_ifaces * sizeof *bond.slaves); for (i = 0; i < port->n_ifaces; i++) { struct iface *iface = port->ifaces[i]; struct compat_bond_slave *slave = &bond.slaves[i]; slave->name = iface->name; - slave->up = ((iface->enabled && iface->delay_expires == LLONG_MAX) || - (!iface->enabled && iface->delay_expires != LLONG_MAX)); + + /* We need to make the same determination as the Linux bonding + * code to determine whether a slave should be consider "up". + * The Linux function bond_miimon_inspect() supports four + * BOND_LINK_* states: + * + * - BOND_LINK_UP: carrier detected, updelay has passed. + * - BOND_LINK_FAIL: carrier lost, downdelay in progress. + * - BOND_LINK_DOWN: carrier lost, downdelay has passed. + * - BOND_LINK_BACK: carrier detected, updelay in progress. + * + * The function bond_info_show_slave() only considers BOND_LINK_UP + * to be "up" and anything else to be "down". + */ + slave->up = iface->enabled && iface->delay_expires == LLONG_MAX; if (slave->up) { bond.up = true; } netdev_get_etheraddr(iface->netdev, slave->mac); } + proc_net_compat_update_bond(port->name, &bond); free(bond.slaves); } @@ -3256,6 +3389,7 @@ mirror_reconfigure_one(struct mirror *m) int *vlans; size_t i; bool mirror_all_ports; + bool any_ports_specified; /* Get output port. */ out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port", @@ -3294,11 +3428,18 @@ mirror_reconfigure_one(struct mirror *m) cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx); cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx); cfg_get_all_keys(&ports, "%s.select.port", pfx); + any_ports_specified = src_ports.n || dst_ports.n || ports.n; svec_append(&src_ports, &ports); svec_append(&dst_ports, &ports); svec_destroy(&ports); prune_ports(m, &src_ports); prune_ports(m, &dst_ports); + if (any_ports_specified && !src_ports.n && !dst_ports.n) { + VLOG_ERR("%s: none of the specified ports exist; " + "disabling port mirror %s", pfx, pfx); + mirror_destroy(m); + goto exit; + } /* Get all the vlans, and drop duplicate and invalid vlans. */ svec_init(&vlan_strings); @@ -3350,6 +3491,7 @@ mirror_reconfigure_one(struct mirror *m) } /* Clean up. */ +exit: svec_destroy(&src_ports); svec_destroy(&dst_ports); free(pfx);