X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=vswitchd%2Fbridge.c;h=10c6fee25e85ae9c28985791d7029b4b8a9b3a63;hb=c93b1d6a4c7f96c5f75f7ec0972fe62e94d369dc;hp=ff5d35298af64f31933219e14b56a285c3b2fea5;hpb=c2633c26a1066c394cf71d579819bb230ee281b0;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index ff5d3529..10c6fee2 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -1,5 +1,5 @@ /* Copyright (c) 2008, 2009 Nicira Networks - * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -54,6 +54,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "packets.h" #include "poll-loop.h" #include "port-array.h" #include "proc-net-compat.h" @@ -64,6 +65,7 @@ #include "svec.h" #include "timeval.h" #include "util.h" +#include "unixctl.h" #include "vconn.h" #include "vconn-ssl.h" #include "xenserver.h" @@ -215,14 +217,17 @@ static uint64_t bridge_pick_datapath_id(struct bridge *, const char *devname); static uint64_t dpid_from_hash(const void *, size_t nbytes); +static void bond_init(void); static void bond_run(struct bridge *); static void bond_wait(struct bridge *); static void bond_rebalance_port(struct port *); +static void bond_send_learning_packets(struct port *); static void port_create(struct bridge *, const char *name); static void port_reconfigure(struct port *); static void port_destroy(struct port *); static struct port *port_lookup(const struct bridge *, const char *name); +static struct iface *port_lookup_iface(const struct port *, const char *name); static struct port *port_from_dp_ifidx(const struct bridge *, uint16_t dp_ifidx); static void port_update_bond_compat(struct port *); @@ -284,6 +289,8 @@ bridge_init(void) int retval; int i; + bond_init(); + for (i = 0; i < DP_MAX; i++) { struct dpif dpif; char devname[16]; @@ -482,7 +489,6 @@ bridge_reconfigure(void) bool add_id_to_iface = false; struct svec nf_hosts; - bridge_fetch_dp_ifaces(br); for (i = 0; i < br->n_ports; ) { struct port *port = br->ports[i]; @@ -1259,11 +1265,16 @@ bridge_fetch_dp_ifaces(struct bridge *br) /* Bridge packet processing functions. */ +static int +bond_hash(const uint8_t mac[ETH_ADDR_LEN]) +{ + return hash_bytes(mac, ETH_ADDR_LEN, 0) & BOND_MASK; +} + static struct bond_entry * lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN]) { - size_t h = hash_bytes(mac, ETH_ADDR_LEN, 0); - return &port->bond_hash[h & BOND_MASK]; + return &port->bond_hash[bond_hash(mac)]; } static int @@ -1279,7 +1290,7 @@ bond_choose_iface(const struct port *port) } static bool -choose_output_iface(const struct port *port, const flow_t *flow, +choose_output_iface(const struct port *port, const uint8_t *dl_src, uint16_t *dp_ifidx, tag_type *tags) { struct iface *iface; @@ -1288,7 +1299,7 @@ choose_output_iface(const struct port *port, const flow_t *flow, if (port->n_ifaces == 1) { iface = port->ifaces[0]; } else { - struct bond_entry *e = lookup_bond_entry(port, flow->dl_src); + struct bond_entry *e = lookup_bond_entry(port, dl_src); if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces || !port->ifaces[e->iface_idx]->enabled) { /* XXX select interface properly. The current interface selection @@ -1354,6 +1365,38 @@ bond_choose_active_iface(struct port *port) } } +static void +bond_enable_slave(struct iface *iface, bool enable) +{ + struct port *port = iface->port; + struct bridge *br = port->bridge; + + iface->delay_expires = LLONG_MAX; + if (enable == iface->enabled) { + return; + } + + iface->enabled = enable; + if (!iface->enabled) { + VLOG_WARN("interface %s: enabled", iface->name); + ofproto_revalidate(br->ofproto, iface->tag); + if (iface->port_ifidx == port->active_iface) { + ofproto_revalidate(br->ofproto, + port->active_iface_tag); + bond_choose_active_iface(port); + } + bond_send_learning_packets(port); + } else { + VLOG_WARN("interface %s: disabled", iface->name); + if (port->active_iface < 0) { + ofproto_revalidate(br->ofproto, port->no_ifaces_tag); + bond_choose_active_iface(port); + bond_send_learning_packets(port); + } + iface->tag = tag_create_random(); + } +} + static void bond_run(struct bridge *br) { @@ -1367,25 +1410,7 @@ bond_run(struct bridge *br) for (j = 0; j < port->n_ifaces; j++) { struct iface *iface = port->ifaces[j]; if (time_msec() >= iface->delay_expires) { - iface->delay_expires = LLONG_MAX; - iface->enabled = !iface->enabled; - VLOG_WARN("interface %s: %s", - iface->name, - iface->enabled ? "enabled" : "disabled"); - if (!iface->enabled) { - ofproto_revalidate(br->ofproto, iface->tag); - if (iface->port_ifidx == port->active_iface) { - ofproto_revalidate(br->ofproto, - port->active_iface_tag); - bond_choose_active_iface(port); - } - } else { - if (port->active_iface < 0) { - ofproto_revalidate(br->ofproto, port->no_ifaces_tag); - bond_choose_active_iface(port); - } - iface->tag = tag_create_random(); - } + bond_enable_slave(iface, !iface->enabled); } } } @@ -1430,7 +1455,7 @@ set_dst(struct dst *p, const flow_t *flow, p->vlan = (out_port->vlan >= 0 ? OFP_VLAN_NONE : in_port->vlan >= 0 ? in_port->vlan : ntohs(flow->dl_vlan)); - return choose_output_iface(out_port, flow, &p->dp_ifidx, tags); + return choose_output_iface(out_port, flow->dl_src, &p->dp_ifidx, tags); } static void @@ -1719,12 +1744,25 @@ process_flow(struct bridge *br, const flow_t *flow, goto done; } - /* Drop multicast and broadcast packets on inactive bonded interfaces, to + /* Multicast (and broadcast) packets on bonds need special attention, to * avoid receiving duplicates. */ if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) { *tags |= in_port->active_iface_tag; if (in_port->active_iface != in_iface->port_ifidx) { + /* Drop all multicast packets on inactive slaves. */ goto done; + } else { + /* Drop all multicast packets for which we have learned a different + * input port, because we probably sent the packet on one slaves + * and got it back on the active slave. Broadcast ARP replies are + * an exception to this rule: the host has moved to another + * switch. */ + int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + if (src_idx != -1 + && src_idx != in_port->port_idx + && !is_bcast_arp_reply(flow, packet)) { + goto done; + } } } @@ -1732,27 +1770,9 @@ process_flow(struct bridge *br, const flow_t *flow, out_port = FLOOD_PORT; if (br->ml) { int out_port_idx; - bool may_learn; - - if (!packet) { - /* Don't try to learn from revalidation. */ - may_learn = false; - } else if (in_port->n_ifaces > 1) { - /* If the packet arrived on a bonded port, don't learn from it - * unless we haven't learned any port at all for that address - * (because we probably sent the packet on one bonded interface and - * got it back on the other). Broadcast ARP replies are an - * exception to this rule: the host has moved to another switch. */ - int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); - may_learn = (src_idx < 0 - || src_idx == in_port->port_idx - || is_bcast_arp_reply(flow, packet)); - } else { - may_learn = true; - } - /* Learn source MAC. */ - if (may_learn) { + /* Learn source MAC (but don't try to learn from revalidation). */ + if (packet) { tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx); if (rev_tag) { @@ -1908,6 +1928,8 @@ static struct ofhooks bridge_ofhooks = { bridge_account_checkpoint_ofhook_cb, }; +/* Bonding functions. */ + /* Statistics for a single interface on a bonded port, used for load-based * bond rebalancing. */ struct slave_balance { @@ -2058,7 +2080,6 @@ bond_shift_load(struct slave_balance *from, struct slave_balance *to, ofproto_revalidate(port->bridge->ofproto, hash->iface_tag); hash->iface_idx = to->iface->port_ifidx; hash->iface_tag = tag_create_random(); - } static void @@ -2167,6 +2188,369 @@ bond_rebalance_port(struct port *port) e->tx_bytes /= 2; } } + +static void +bond_send_learning_packets(struct port *port) +{ + struct bridge *br = port->bridge; + struct mac_entry *e; + struct ofpbuf packet; + int error, n_packets, n_errors; + + if (!port->n_ifaces || port->active_iface < 0 || !br->ml) { + return; + } + + ofpbuf_init(&packet, 128); + error = n_packets = n_errors = 0; + LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) { + static const char s[] = "Open vSwitch Bond Failover"; + union ofp_action actions[2], *a; + struct eth_header *eth; + struct llc_snap_header *llc_snap; + uint16_t dp_ifidx; + tag_type tags = 0; + flow_t flow; + int retval; + + if (e->port == port->port_idx + || !choose_output_iface(port, e->mac, &dp_ifidx, &tags)) { + continue; + } + + /* Compose packet to send. */ + ofpbuf_clear(&packet); + eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN); + llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN); + ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */ + ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN); + + memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); + memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN); + eth->eth_type = htons(packet.size - ETH_HEADER_LEN); + + llc_snap->llc.llc_dsap = LLC_DSAP_SNAP; + llc_snap->llc.llc_ssap = LLC_SSAP_SNAP; + llc_snap->llc.llc_cntl = LLC_CNTL_SNAP; + memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3); + llc_snap->snap.snap_type = htons(0xf177); /* Random number. */ + + /* Compose actions. */ + memset(actions, 0, sizeof actions); + a = actions; + if (e->vlan) { + a->vlan_vid.type = htons(OFPAT_SET_VLAN_VID); + a->vlan_vid.len = htons(sizeof *a); + a->vlan_vid.vlan_vid = htons(e->vlan); + a++; + } + a->output.type = htons(OFPAT_OUTPUT); + a->output.len = htons(sizeof *a); + a->output.port = htons(odp_port_to_ofp_port(dp_ifidx)); + a++; + + /* Send packet. */ + n_packets++; + flow_extract(&packet, ODPP_NONE, &flow); + retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions, + &packet); + if (retval) { + error = retval; + n_errors++; + } + } + ofpbuf_uninit(&packet); + + if (n_errors) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning " + "packets, last error was: %s", + port->name, n_errors, n_packets, strerror(error)); + } else { + VLOG_DBG("bond %s: sent %d gratuitous learning packets", + port->name, n_packets); + } +} + +/* Bonding unixctl user interface functions. */ + +static void +bond_unixctl_list(struct unixctl_conn *conn, const char *args UNUSED) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct bridge *br; + + ds_put_cstr(&ds, "bridge\tbond\tslaves\n"); + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + size_t i; + + for (i = 0; i < br->n_ports; i++) { + const struct port *port = br->ports[i]; + if (port->n_ifaces > 1) { + size_t j; + + ds_put_format(&ds, "%s\t%s\t", br->name, port->name); + for (j = 0; j < port->n_ifaces; j++) { + const struct iface *iface = port->ifaces[j]; + if (j) { + ds_put_cstr(&ds, ", "); + } + ds_put_cstr(&ds, iface->name); + } + ds_put_char(&ds, '\n'); + } + } + } + unixctl_command_reply(conn, 200, ds_cstr(&ds)); + ds_destroy(&ds); +} + +static struct port * +bond_find(const char *name) +{ + const struct bridge *br; + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + size_t i; + + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + if (!strcmp(port->name, name) && port->n_ifaces > 1) { + return port; + } + } + } + return NULL; +} + +static void +bond_unixctl_show(struct unixctl_conn *conn, const char *args) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct port *port; + size_t j; + + port = bond_find(args); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + ds_put_format(&ds, "updelay: %d ms\n", port->updelay); + ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay); + ds_put_format(&ds, "next rebalance: %lld ms\n", + port->bridge->bond_next_rebalance - time_msec()); + for (j = 0; j < port->n_ifaces; j++) { + const struct iface *iface = port->ifaces[j]; + struct bond_entry *be; + + /* Basic info. */ + ds_put_format(&ds, "slave %s: %s\n", + iface->name, iface->enabled ? "enabled" : "disabled"); + if (j == port->active_iface) { + ds_put_cstr(&ds, "\tactive slave\n"); + } + if (iface->delay_expires != LLONG_MAX) { + ds_put_format(&ds, "\t%s expires in %lld ms\n", + iface->enabled ? "downdelay" : "updelay", + iface->delay_expires - time_msec()); + } + + /* Hashes. */ + for (be = port->bond_hash; be <= &port->bond_hash[BOND_MASK]; be++) { + int hash = be - port->bond_hash; + struct mac_entry *me; + + if (be->iface_idx != j) { + continue; + } + + ds_put_format(&ds, "\thash %d: %lld kB load\n", + hash, be->tx_bytes / 1024); + + /* MACs. */ + if (!port->bridge->ml) { + break; + } + + LIST_FOR_EACH (me, struct mac_entry, lru_node, + &port->bridge->ml->lrus) { + uint16_t dp_ifidx; + tag_type tags = 0; + if (bond_hash(me->mac) == hash + && me->port != port->port_idx + && choose_output_iface(port, me->mac, &dp_ifidx, &tags) + && dp_ifidx == iface->dp_ifidx) + { + ds_put_format(&ds, "\t\t"ETH_ADDR_FMT"\n", + ETH_ADDR_ARGS(me->mac)); + } + } + } + } + unixctl_command_reply(conn, 200, ds_cstr(&ds)); + ds_destroy(&ds); +} + +static void +bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) +{ + char *args = (char *) args_; + char *save_ptr = NULL; + char *bond_s, *hash_s, *slave_s; + uint8_t mac[ETH_ADDR_LEN]; + struct port *port; + struct iface *iface; + struct bond_entry *entry; + int hash; + + bond_s = strtok_r(args, " ", &save_ptr); + hash_s = strtok_r(NULL, " ", &save_ptr); + slave_s = strtok_r(NULL, " ", &save_ptr); + if (!slave_s) { + unixctl_command_reply(conn, 501, + "usage: bond/migrate BOND HASH SLAVE"); + return; + } + + port = bond_find(bond_s); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + if (sscanf(hash_s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8, + &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6) { + hash = bond_hash(mac); + } else if (strspn(hash_s, "0123456789") == strlen(hash_s)) { + hash = atoi(hash_s) & BOND_MASK; + } else { + unixctl_command_reply(conn, 501, "bad hash"); + return; + } + + iface = port_lookup_iface(port, slave_s); + if (!iface) { + unixctl_command_reply(conn, 501, "no such slave"); + return; + } + + if (!iface->enabled) { + unixctl_command_reply(conn, 501, "cannot migrate to disabled slave"); + return; + } + + entry = &port->bond_hash[hash]; + ofproto_revalidate(port->bridge->ofproto, entry->iface_tag); + entry->iface_idx = iface->port_ifidx; + entry->iface_tag = tag_create_random(); + unixctl_command_reply(conn, 200, "migrated"); +} + +static void +bond_unixctl_set_active_slave(struct unixctl_conn *conn, const char *args_) +{ + char *args = (char *) args_; + char *save_ptr = NULL; + char *bond_s, *slave_s; + struct port *port; + struct iface *iface; + + bond_s = strtok_r(args, " ", &save_ptr); + slave_s = strtok_r(NULL, " ", &save_ptr); + if (!slave_s) { + unixctl_command_reply(conn, 501, + "usage: bond/set-active-slave BOND SLAVE"); + return; + } + + port = bond_find(bond_s); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + iface = port_lookup_iface(port, slave_s); + if (!iface) { + unixctl_command_reply(conn, 501, "no such slave"); + return; + } + + if (!iface->enabled) { + unixctl_command_reply(conn, 501, "cannot make disabled slave active"); + return; + } + + if (port->active_iface != iface->port_ifidx) { + ofproto_revalidate(port->bridge->ofproto, port->active_iface_tag); + port->active_iface = iface->port_ifidx; + port->active_iface_tag = tag_create_random(); + VLOG_INFO("port %s: active interface is now %s", + port->name, iface->name); + bond_send_learning_packets(port); + unixctl_command_reply(conn, 200, "done"); + } else { + unixctl_command_reply(conn, 200, "no change"); + } +} + +static void +enable_slave(struct unixctl_conn *conn, const char *args_, bool enable) +{ + char *args = (char *) args_; + char *save_ptr = NULL; + char *bond_s, *slave_s; + struct port *port; + struct iface *iface; + + bond_s = strtok_r(args, " ", &save_ptr); + slave_s = strtok_r(NULL, " ", &save_ptr); + if (!slave_s) { + unixctl_command_reply(conn, 501, + "usage: bond/enable/disable-slave BOND SLAVE"); + return; + } + + port = bond_find(bond_s); + if (!port) { + unixctl_command_reply(conn, 501, "no such bond"); + return; + } + + iface = port_lookup_iface(port, slave_s); + if (!iface) { + unixctl_command_reply(conn, 501, "no such slave"); + return; + } + + bond_enable_slave(iface, enable); + unixctl_command_reply(conn, 501, enable ? "enabled" : "disabled"); +} + +static void +bond_unixctl_enable_slave(struct unixctl_conn *conn, const char *args) +{ + enable_slave(conn, args, true); +} + +static void +bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args) +{ + enable_slave(conn, args, false); +} + +static void +bond_init(void) +{ + unixctl_command_register("bond/list", bond_unixctl_list); + unixctl_command_register("bond/show", bond_unixctl_show); + unixctl_command_register("bond/migrate", bond_unixctl_migrate); + unixctl_command_register("bond/set-active-slave", + bond_unixctl_set_active_slave); + unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave); + unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave); +} /* Port functions. */ @@ -2369,6 +2753,20 @@ port_lookup(const struct bridge *br, const char *name) return NULL; } +static struct iface * +port_lookup_iface(const struct port *port, const char *name) +{ + size_t j; + + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + if (!strcmp(iface->name, name)) { + return iface; + } + } + return NULL; +} + static void port_update_bonding(struct port *port) { @@ -2517,6 +2915,7 @@ iface_destroy(struct iface *iface) if (del_active) { ofproto_revalidate(port->bridge->ofproto, port->active_iface_tag); bond_choose_active_iface(port); + bond_send_learning_packets(port); } port_update_bonding(port);