X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=b1e4e165f39cf91c6ccc212f05a89afae2d262af;hb=44528c546359a95c57e8a0e4b3703c5086b15f24;hp=bece2522070149d7e3b91a15e7ba6c0e5a1edaab;hpb=e8b52a913a8fc3d2ef0dae82eda5905668ebfae1;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index bece2522..b1e4e165 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -194,7 +194,7 @@ enum { DP_MAX = 256 }; static struct bridge *bridge_create(const char *name); static void bridge_destroy(struct bridge *); static struct bridge *bridge_lookup(const char *name); -static void bridge_unixctl_dump_flows(struct unixctl_conn *, const char *); +static unixctl_cb_func bridge_unixctl_dump_flows; static int bridge_run_one(struct bridge *); static void bridge_reconfigure_one(struct bridge *); static void bridge_reconfigure_controller(struct bridge *); @@ -210,7 +210,7 @@ static uint64_t bridge_pick_datapath_id(struct bridge *, static struct iface *bridge_get_local_iface(struct bridge *); static uint64_t dpid_from_hash(const void *, size_t nbytes); -static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args); +static unixctl_cb_func bridge_unixctl_fdb_show; static void bond_init(void); static void bond_run(struct bridge *); @@ -287,7 +287,7 @@ bridge_init(void) struct svec dpif_names; size_t i; - unixctl_command_register("fdb/show", bridge_unixctl_fdb_show); + unixctl_command_register("fdb/show", bridge_unixctl_fdb_show, NULL); svec_init(&dpif_names); dp_enumerate(&dpif_names); @@ -316,7 +316,8 @@ bridge_init(void) } svec_destroy(&dpif_names); - unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows); + unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows, + NULL); bond_init(); bridge_reconfigure(); @@ -926,7 +927,8 @@ bridge_get_local_iface(struct bridge *br) /* Bridge unixctl user interface functions. */ static void -bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args) +bridge_unixctl_fdb_show(struct unixctl_conn *conn, + const char *args, void *aux UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct bridge *br; @@ -962,7 +964,7 @@ bridge_create(const char *name) int error; assert(!bridge_lookup(name)); - br = xcalloc(1, sizeof *br); + br = xzalloc(sizeof *br); error = dpif_create(name, &br->dpif); if (error == EEXIST || error == EBUSY) { @@ -1061,7 +1063,8 @@ bridge_get_datapathid(const char *name) /* Handle requests for a listing of all flows known by the OpenFlow * stack, including those normally hidden. */ static void -bridge_unixctl_dump_flows(struct unixctl_conn *conn, const char *args) +bridge_unixctl_dump_flows(struct unixctl_conn *conn, + const char *args, void *aux UNUSED) { struct bridge *br; struct ds results; @@ -1783,14 +1786,26 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, if (port_includes_vlan(port, m->out_vlan) && set_dst(dst, flow, in_port, port, tags)) { + int flow_vlan; + if (port->vlan < 0) { dst->vlan = m->out_vlan; } if (dst_is_duplicate(dsts, dst - dsts, dst)) { continue; } - if (dst->dp_ifidx == flow->in_port - && dst->vlan == vlan) { + + /* Use the vlan tag on the original flow instead of + * the one passed in the vlan parameter. This ensures + * that we compare the vlan from before any implicit + * tagging tags place. This is necessary because + * dst->vlan is the final vlan, after removing implicit + * tags. */ + flow_vlan = ntohs(flow->dl_vlan); + if (flow_vlan == 0) { + flow_vlan = OFP_VLAN_NONE; + } + if (port == in_port && dst->vlan == flow_vlan) { /* Don't send out input port on same VLAN. */ continue; } @@ -1953,33 +1968,27 @@ process_flow(struct bridge *br, const flow_t *flow, goto done; } - /* Multicast (and broadcast) packets on bonds need special attention, to - * avoid receiving duplicates. */ - if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) { - *tags |= in_port->active_iface_tag; - if (in_port->active_iface != in_iface->port_ifidx) { - /* Drop all multicast packets on inactive slaves. */ - goto done; - } else { - /* Drop all multicast packets for which we have learned a different - * input port, because we probably sent the packet on one slave - * and got it back on the active slave. Broadcast ARP replies are - * an exception to this rule: the host has moved to another - * switch. */ - int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); - if (src_idx != -1 && src_idx != in_port->port_idx) { - if (packet) { - if (!is_bcast_arp_reply(flow, packet)) { - goto done; - } - } else { - /* No way to know whether it's an ARP reply, because the - * flow entry doesn't include enough information and we - * don't have a packet. Punt. */ - return false; - } + /* Packets received on bonds need special attention to avoid duplicates. */ + if (in_port->n_ifaces > 1) { + int src_idx; + + if (eth_addr_is_multicast(flow->dl_dst)) { + *tags |= in_port->active_iface_tag; + if (in_port->active_iface != in_iface->port_ifidx) { + /* Drop all multicast packets on inactive slaves. */ + goto done; } } + + /* Drop all packets for which we have learned a different input + * port, because we probably sent the packet on one slave and got + * it back on the other. Broadcast ARP replies are an exception + * to this rule: the host has moved to another switch. */ + src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + if (src_idx != -1 && src_idx != in_port->port_idx && + (!packet || !is_bcast_arp_reply(flow, packet))) { + goto done; + } } /* MAC learning. */ @@ -2009,6 +2018,11 @@ process_flow(struct bridge *br, const flow_t *flow, tags); if (out_port_idx >= 0 && out_port_idx < br->n_ports) { out_port = br->ports[out_port_idx]; + } else if (!packet) { + /* If we are revalidating but don't have a learning entry then + * eject the flow. Installing a flow that floods packets will + * prevent us from seeing future packets and learning properly. */ + return false; } } @@ -2259,8 +2273,9 @@ log_bals(const struct slave_balance *bals, size_t n_bals, struct port *port) /* Shifts 'hash' from 'from' to 'to' within 'port'. */ static void bond_shift_load(struct slave_balance *from, struct slave_balance *to, - struct bond_entry *hash) + int hash_idx) { + struct bond_entry *hash = from->hashes[hash_idx]; struct port *port = from->iface->port; uint64_t delta = hash->tx_bytes; @@ -2278,12 +2293,11 @@ bond_shift_load(struct slave_balance *from, struct slave_balance *to, * it require more work, the only purpose it would be to allow that hash to * be migrated to another slave in this rebalancing run, and there is no * point in doing that. */ - if (from->hashes[0] == hash) { + if (hash_idx == 0) { from->hashes++; } else { - int i = hash - from->hashes[0]; - memmove(from->hashes + i, from->hashes + i + 1, - (from->n_hashes - (i + 1)) * sizeof *from->hashes); + memmove(from->hashes + hash_idx, from->hashes + hash_idx + 1, + (from->n_hashes - (hash_idx + 1)) * sizeof *from->hashes); } from->n_hashes--; @@ -2368,22 +2382,60 @@ bond_rebalance_port(struct port *port) /* 'from' is carrying significantly more load than 'to', and that * load is split across at least two different hashes. Pick a hash * to migrate to 'to' (the least-loaded slave), given that doing so - * must not cause 'to''s load to exceed 'from''s load. + * must decrease the ratio of the load on the two slaves by at + * least 0.1. * * The sort order we use means that we prefer to shift away the * smallest hashes instead of the biggest ones. There is little * reason behind this decision; we could use the opposite sort * order to shift away big hashes ahead of small ones. */ size_t i; + bool order_swapped; for (i = 0; i < from->n_hashes; i++) { + double old_ratio, new_ratio; uint64_t delta = from->hashes[i]->tx_bytes; - if (to->tx_bytes + delta < from->tx_bytes - delta) { + + if (delta == 0 || from->tx_bytes - delta == 0) { + /* Pointless move. */ + continue; + } + + order_swapped = from->tx_bytes - delta < to->tx_bytes + delta; + + if (to->tx_bytes == 0) { + /* Nothing on the new slave, move it. */ + break; + } + + old_ratio = (double)from->tx_bytes / to->tx_bytes; + new_ratio = (double)(from->tx_bytes - delta) / + (to->tx_bytes + delta); + + if (new_ratio == 0) { + /* Should already be covered but check to prevent division + * by zero. */ + continue; + } + + if (new_ratio < 1) { + new_ratio = 1 / new_ratio; + } + + if (old_ratio - new_ratio > 0.1) { + /* Would decrease the ratio, move it. */ break; } } if (i < from->n_hashes) { - bond_shift_load(from, to, from->hashes[i]); + bond_shift_load(from, to, i); + port->bond_compat_is_stale = true; + + /* If the result of the migration changed the relative order of + * 'from' and 'to' swap them back to maintain invariants. */ + if (order_swapped) { + swap_bals(from, to); + } /* Re-sort 'bals'. Note that this may make 'from' and 'to' * point to different slave_balance structures. It is only @@ -2394,7 +2446,6 @@ bond_rebalance_port(struct port *port) } else { from++; } - port->bond_compat_is_stale = true; } } @@ -2473,7 +2524,8 @@ bond_send_learning_packets(struct port *port) /* Bonding unixctl user interface functions. */ static void -bond_unixctl_list(struct unixctl_conn *conn, const char *args UNUSED) +bond_unixctl_list(struct unixctl_conn *conn, + const char *args UNUSED, void *aux UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct bridge *br; @@ -2523,7 +2575,8 @@ bond_find(const char *name) } static void -bond_unixctl_show(struct unixctl_conn *conn, const char *args) +bond_unixctl_show(struct unixctl_conn *conn, + const char *args, void *aux UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct port *port; @@ -2592,7 +2645,8 @@ bond_unixctl_show(struct unixctl_conn *conn, const char *args) } static void -bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) +bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_, + void *aux UNUSED) { char *args = (char *) args_; char *save_ptr = NULL; @@ -2648,7 +2702,8 @@ bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_) } static void -bond_unixctl_set_active_slave(struct unixctl_conn *conn, const char *args_) +bond_unixctl_set_active_slave(struct unixctl_conn *conn, const char *args_, + void *aux UNUSED) { char *args = (char *) args_; char *save_ptr = NULL; @@ -2728,19 +2783,22 @@ enable_slave(struct unixctl_conn *conn, const char *args_, bool enable) } static void -bond_unixctl_enable_slave(struct unixctl_conn *conn, const char *args) +bond_unixctl_enable_slave(struct unixctl_conn *conn, const char *args, + void *aux UNUSED) { enable_slave(conn, args, true); } static void -bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args) +bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args, + void *aux UNUSED) { enable_slave(conn, args, false); } static void -bond_unixctl_hash(struct unixctl_conn *conn, const char *args) +bond_unixctl_hash(struct unixctl_conn *conn, const char *args, + void *aux UNUSED) { uint8_t mac[ETH_ADDR_LEN]; uint8_t hash; @@ -2761,14 +2819,16 @@ bond_unixctl_hash(struct unixctl_conn *conn, const char *args) static void bond_init(void) { - unixctl_command_register("bond/list", bond_unixctl_list); - unixctl_command_register("bond/show", bond_unixctl_show); - unixctl_command_register("bond/migrate", bond_unixctl_migrate); + unixctl_command_register("bond/list", bond_unixctl_list, NULL); + unixctl_command_register("bond/show", bond_unixctl_show, NULL); + unixctl_command_register("bond/migrate", bond_unixctl_migrate, NULL); unixctl_command_register("bond/set-active-slave", - bond_unixctl_set_active_slave); - unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave); - unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave); - unixctl_command_register("bond/hash", bond_unixctl_hash); + bond_unixctl_set_active_slave, NULL); + unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave, + NULL); + unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave, + NULL); + unixctl_command_register("bond/hash", bond_unixctl_hash, NULL); } /* Port functions. */ @@ -2778,7 +2838,7 @@ port_create(struct bridge *br, const char *name) { struct port *port; - port = xcalloc(1, sizeof *port); + port = xzalloc(sizeof *port); port->bridge = br; port->port_idx = br->n_ports; port->vlan = -1; @@ -3131,7 +3191,7 @@ iface_create(struct port *port, const char *name) { struct iface *iface; - iface = xcalloc(1, sizeof *iface); + iface = xzalloc(sizeof *iface); iface->port = port; iface->port_ifidx = port->n_ifaces; iface->name = xstrdup(name); @@ -3338,7 +3398,7 @@ mirror_create(struct bridge *br, const char *name) VLOG_INFO("created port mirror %s on bridge %s", name, br->name); bridge_flush(br); - br->mirrors[i] = m = xcalloc(1, sizeof *m); + br->mirrors[i] = m = xzalloc(sizeof *m); m->bridge = br; m->idx = i; m->name = xstrdup(name);