From 9f5073d8a8c6c21c4e72660b8478a0e9d78764be Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Thu, 27 Jan 2011 20:25:03 -0800 Subject: [PATCH] vswitchd: Implement balance-tcp bonding. This commit implements a new bonding mode "balance-tcp" which takes into account L4 flow information when hashing. If LACP negotiation is unsuccessful it automatically falls back to "balance-slb" bonding. Bug #4213. --- vswitchd/bridge.c | 98 +++++++++++++++++++++++++++----------- vswitchd/vswitch.ovsschema | 6 +-- vswitchd/vswitch.xml | 45 ++++++++++++----- 3 files changed, 108 insertions(+), 41 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 39a94e12..54830dcd 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -136,7 +136,8 @@ struct bond_entry { }; enum bond_mode { - BM_SLB, /* Source Load Balance (Default). */ + BM_TCP, /* Transport Layer Load Balance. */ + BM_SLB, /* Source Load Balance. */ BM_AB /* Active Backup. */ }; @@ -2105,18 +2106,42 @@ bridge_fetch_dp_ifaces(struct bridge *br) /* Bridge packet processing functions. */ +static bool +bond_is_tcp_hash(const struct port *port) +{ + return port->bond_mode == BM_TCP && port->lacp & LACP_NEGOTIATED; +} + static int -bond_hash(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) +bond_hash_src(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) { return hash_bytes(mac, ETH_ADDR_LEN, vlan) & BOND_MASK; } +static int bond_hash_tcp(const struct flow *flow, uint16_t vlan) +{ + struct flow hash_flow; + + memcpy(&hash_flow, flow, sizeof hash_flow); + hash_flow.vlan_tci = 0; + + /* The symmetric quality of this hash function is not required, but + * flow_hash_symmetric_l4 already exists, and is sufficient for our + * purposes, so we use it out of convenience. */ + return flow_hash_symmetric_l4(&hash_flow, vlan) & BOND_MASK; +} + static struct bond_entry * -lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN], +lookup_bond_entry(const struct port *port, const struct flow *flow, uint16_t vlan) { - assert(port->bond_mode == BM_SLB); - return &port->bond_hash[bond_hash(mac, vlan)]; + assert(port->bond_mode != BM_AB); + + if (bond_is_tcp_hash(port)) { + return &port->bond_hash[bond_hash_tcp(flow, vlan)]; + } else { + return &port->bond_hash[bond_hash_src(flow->dl_src, vlan)]; + } } static int @@ -2152,7 +2177,7 @@ bond_choose_iface(const struct port *port) } static bool -choose_output_iface(const struct port *port, const uint8_t *dl_src, +choose_output_iface(const struct port *port, const struct flow *flow, uint16_t vlan, uint16_t *dp_ifidx, tag_type *tags) { struct iface *iface; @@ -2166,8 +2191,8 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src, return false; } iface = port->ifaces[port->active_iface]; - } else if (port->bond_mode == BM_SLB){ - struct bond_entry *e = lookup_bond_entry(port, dl_src, vlan); + } else { + struct bond_entry *e = lookup_bond_entry(port, flow, vlan); if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces || !port->ifaces[e->iface_idx]->enabled) { /* XXX select interface properly. The current interface selection @@ -2182,8 +2207,6 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src, } *tags |= e->iface_tag; iface = port->ifaces[e->iface_idx]; - } else { - NOT_REACHED(); } *dp_ifidx = iface->dp_ifidx; *tags |= iface->tag; /* Currently only used for bonding. */ @@ -2470,7 +2493,7 @@ set_dst(struct dst *dst, const struct flow *flow, : in_port->vlan >= 0 ? in_port->vlan : flow->vlan_tci == 0 ? OFP_VLAN_NONE : vlan_tci_to_vid(flow->vlan_tci)); - return choose_output_iface(out_port, flow->dl_src, dst->vlan, + return choose_output_iface(out_port, flow, dst->vlan, &dst->dp_ifidx, tags); } @@ -3035,8 +3058,7 @@ bridge_account_flow_ofhook_cb(const struct flow *flow, tag_type tags, uint16_t vlan = (flow->vlan_tci ? vlan_tci_to_vid(flow->vlan_tci) : OFP_VLAN_NONE); - struct bond_entry *e = lookup_bond_entry(out_port, - flow->dl_src, vlan); + struct bond_entry *e = lookup_bond_entry(out_port, flow, vlan); e->tx_bytes += n_bytes; } } @@ -3275,10 +3297,12 @@ static const char * bond_mode_to_string(enum bond_mode bm) { static char *bm_slb = "balance-slb"; static char *bm_ab = "active-backup"; + static char *bm_tcp = "balance-tcp"; switch (bm) { case BM_SLB: return bm_slb; case BM_AB: return bm_ab; + case BM_TCP: return bm_tcp; } NOT_REACHED(); @@ -3437,7 +3461,7 @@ bond_rebalance_port(struct port *port) struct bond_entry *e; size_t i; - assert(port->bond_mode == BM_SLB); + assert(port->bond_mode != BM_AB); /* Sets up 'bals' to describe each of the port's interfaces, sorted in * descending order of tx_bytes, so that bals[0] represents the most @@ -3585,7 +3609,7 @@ bond_send_learning_packets(struct port *port) struct ofpbuf packet; int error, n_packets, n_errors; - if (!port->n_ifaces || port->active_iface < 0) { + if (!port->n_ifaces || port->active_iface < 0 || bond_is_tcp_hash(port)) { return; } @@ -3598,8 +3622,15 @@ bond_send_learning_packets(struct port *port) struct flow flow; int retval; - if (e->port == port->port_idx - || !choose_output_iface(port, e->mac, e->vlan, &dp_ifidx, &tags)) { + if (e->port == port->port_idx) { + continue; + } + + compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177, + e->mac); + flow_extract(&packet, 0, ODPP_NONE, &flow); + + if (!choose_output_iface(port, &flow, e->vlan, &dp_ifidx, &tags)) { continue; } @@ -3619,9 +3650,6 @@ bond_send_learning_packets(struct port *port) /* Send packet. */ n_packets++; - compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177, - e->mac); - flow_extract(&packet, 0, ODPP_NONE, &flow); retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions, &packet); if (retval) { @@ -3756,6 +3784,12 @@ bond_unixctl_show(struct unixctl_conn *conn, ds_put_cstr(&ds, "\tlacp: off\n"); } + if (port->bond_mode != BM_AB) { + ds_put_format(&ds, "bond-hash-algorithm: %s\n", + bond_is_tcp_hash(port) ? "balance-tcp" : "balance-slb"); + } + + ds_put_format(&ds, "bond-detect-mode: %s\n", port->miimon ? "miimon" : "carrier"); @@ -3767,7 +3801,7 @@ bond_unixctl_show(struct unixctl_conn *conn, ds_put_format(&ds, "updelay: %d ms\n", port->updelay); ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay); - if (port->bond_mode == BM_SLB) { + if (port->bond_mode != BM_AB) { ds_put_format(&ds, "next rebalance: %lld ms\n", port->bond_next_rebalance - time_msec()); } @@ -3775,6 +3809,7 @@ bond_unixctl_show(struct unixctl_conn *conn, for (j = 0; j < port->n_ifaces; j++) { const struct iface *iface = port->ifaces[j]; struct bond_entry *be; + struct flow flow; /* Basic info. */ ds_put_format(&ds, "slave %s: %s\n", @@ -3848,11 +3883,12 @@ bond_unixctl_show(struct unixctl_conn *conn, ds_put_cstr(&ds, "\n\n"); } - if (port->bond_mode != BM_SLB) { + if (port->bond_mode == BM_AB) { continue; } /* Hashes. */ + memset(&flow, 0, sizeof flow); for (be = port->bond_hash; be <= &port->bond_hash[BOND_MASK]; be++) { int hash = be - port->bond_hash; struct mac_entry *me; @@ -3864,13 +3900,19 @@ bond_unixctl_show(struct unixctl_conn *conn, ds_put_format(&ds, "\thash %d: %"PRIu64" kB load\n", hash, be->tx_bytes / 1024); + if (port->bond_mode != BM_SLB) { + continue; + } + /* MACs. */ LIST_FOR_EACH (me, lru_node, &port->bridge->ml->lrus) { uint16_t dp_ifidx; tag_type tags = 0; - if (bond_hash(me->mac, me->vlan) == hash + + memcpy(flow.dl_src, me->mac, ETH_ADDR_LEN); + if (bond_hash_src(me->mac, me->vlan) == hash && me->port != port->port_idx - && choose_output_iface(port, me->mac, me->vlan, + && choose_output_iface(port, &flow, me->vlan, &dp_ifidx, &tags) && dp_ifidx == iface->dp_ifidx) { @@ -4063,7 +4105,7 @@ bond_unixctl_hash(struct unixctl_conn *conn, const char *args_, if (sscanf(mac_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) == ETH_ADDR_SCAN_COUNT) { - hash = bond_hash(mac, vlan); + hash = bond_hash_src(mac, vlan); hash_cstr = xasprintf("%u", hash); unixctl_command_reply(conn, 200, hash_cstr); @@ -4222,6 +4264,8 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) port->bond_mode = BM_SLB; } else if (!strcmp(port->cfg->bond_mode, bond_mode_to_string(BM_AB))) { port->bond_mode = BM_AB; + } else if (!strcmp(port->cfg->bond_mode, bond_mode_to_string(BM_TCP))) { + port->bond_mode = BM_TCP; } else { port->bond_mode = BM_SLB; VLOG_WARN("port %s: unknown bond_mode %s, defaulting to %s", @@ -4480,7 +4524,7 @@ port_update_bonding(struct port *port) } else { size_t i; - if (port->bond_mode == BM_SLB && !port->bond_hash) { + if (port->bond_mode != BM_AB && !port->bond_hash) { port->bond_hash = xcalloc(BOND_MASK + 1, sizeof *port->bond_hash); for (i = 0; i <= BOND_MASK; i++) { struct bond_entry *e = &port->bond_hash[i]; @@ -4495,7 +4539,7 @@ port_update_bonding(struct port *port) if (port->cfg->bond_fake_iface) { port->bond_next_fake_iface_update = time_msec(); } - } else if (port->bond_mode != BM_SLB) { + } else if (port->bond_mode == BM_AB) { free(port->bond_hash); port->bond_hash = NULL; } diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index 6282a825..e15925ec 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -1,6 +1,6 @@ {"name": "Open_vSwitch", - "version": "1.3.0", - "cksum": "1230201059 15724", + "version": "1.3.1", + "cksum": "557026156 15739", "tables": { "Open_vSwitch": { "columns": { @@ -122,7 +122,7 @@ "min": 0, "max": 1}}, "bond_mode": { "type": {"key": {"type": "string", - "enum": ["set", ["balance-slb", "active-backup"]]}, + "enum": ["set", ["balance-tcp", "balance-slb", "active-backup"]]}, "min": 0, "max": 1}}, "lacp": { "type": {"key": {"type": "string", diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index d9e71b33..bb7afe88 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -508,22 +508,45 @@

A port that has more than one interface is a ``bonded port.'' Bonding - allows for load balancing and fail-over. Open vSwitch supports - ``source load balancing'' (SLB) and "active backup" bonding. SLB - bonding assigns flows to slaves based on source MAC address and output - VLAN, with periodic rebalancing as traffic patterns change. Active - backup bonding assigns all flows to one slave, failing over to a backup - slave when the active slave is disabled. Neither form of bonding - require 802.3ad or other special support from the upstream switch to - which the slave devices are connected.

+ allows for load balancing and fail-over. Some kinds of bonding will + work with any kind of upstream switch:

+ +
+
balance-slb
+
+ Balances flows among slaves based on source MAC address and output + VLAN, with periodic rebalancing as traffic patterns change. +
+ +
active-backup
+
+ Assigns all flows to one slave, failing over to a backup slave when + the active slave is disabled. +
+
+ +

+ The following mode requires the upstream switch to support 802.3ad with + successful LACP negotiation. If LACP negotiation fails then + balance-slb mode is used as a fallback: +

+ +
+
balance-tcp
+
+ Balances flows among slaves based on L2, L3, and L4 protocol + information such as destination MAC address, IP address, and TCP + port. +
+

These columns apply only to bonded ports. Their values are otherwise ignored.

-

The type of bonding used for a bonded port. Currently supported - values are balance-slb and active-backup. - Defaults to SLB if unset.

+

The type of bonding used for a bonded port. Defaults to + balance-slb if unset. +

-- 2.30.2