From be02e7c3712a927993d5ff00a98b824582e1f5e2 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Thu, 6 Jan 2011 12:14:23 -0800 Subject: [PATCH] vswitchd: Active backup bonding. This commit adds active backup bonding support to vswitchd. Bug #4210. --- vswitchd/bridge.c | 93 +++++++++++++++++++++++++++++++++----- vswitchd/ovs-vswitchd.8.in | 20 ++++---- vswitchd/vswitch.ovsschema | 8 +++- vswitchd/vswitch.xml | 24 ++++++---- 4 files changed, 113 insertions(+), 32 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index f7890e95..8b07b3e0 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -118,6 +118,11 @@ struct bond_entry { tag_type iface_tag; /* Tag associated with iface_idx. */ }; +enum bond_type { + BT_SLB, /* Source Load Balance (Default). */ + BT_AB /* Active Backup. */ +}; + #define MAX_MIRRORS 32 typedef uint32_t mirror_mask_t; #define MIRROR_MASK_C(X) UINT32_C(X) @@ -155,7 +160,7 @@ struct port { size_t n_ifaces, allocated_ifaces; /* Bonding info. */ - struct bond_entry *bond_hash; /* An array of (BOND_MASK + 1) elements. */ + enum bond_type bond_type; /* Type of the bond. BT_SLB is the default. */ int active_iface; /* Ifidx on which bcasts accepted, or -1. */ tag_type active_iface_tag; /* Tag for bcast flows. */ tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */ @@ -163,9 +168,12 @@ struct port { bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */ bool bond_fake_iface; /* Fake a bond interface for legacy compat? */ long long int bond_next_fake_iface_update; /* Time of next update. */ + struct netdev_monitor *monitor; /* Tracks carrier up/down status. */ + + /* SLB specific bonding info. */ + struct bond_entry *bond_hash; /* An array of (BOND_MASK + 1) elements. */ int bond_rebalance_interval; /* Interval between rebalances, in ms. */ long long int bond_next_rebalance; /* Next rebalancing time. */ - struct netdev_monitor *monitor; /* Tracks carrier up/down status. */ /* Port mirroring info. */ mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ @@ -1964,6 +1972,7 @@ static struct bond_entry * lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) { + assert(port->bond_type == BT_SLB); return &port->bond_hash[bond_hash(mac, vlan)]; } @@ -2006,7 +2015,13 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src, assert(port->n_ifaces); if (port->n_ifaces == 1) { iface = port->ifaces[0]; - } else { + } else if (port->bond_type == BT_AB) { + if (port->active_iface < 0) { + *tags |= port->no_ifaces_tag; + return false; + } + iface = port->ifaces[port->active_iface]; + } else if (port->bond_type == BT_SLB){ struct bond_entry *e = lookup_bond_entry(port, dl_src, vlan); if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces || !port->ifaces[e->iface_idx]->enabled) { @@ -2022,6 +2037,8 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src, } *tags |= e->iface_tag; iface = port->ifaces[e->iface_idx]; + } else { + NOT_REACHED(); } *dp_ifidx = iface->dp_ifidx; *tags |= iface->tag; /* Currently only used for bonding. */ @@ -2793,7 +2810,8 @@ bridge_account_flow_ofhook_cb(const struct flow *flow, tag_type tags, NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { if (nl_attr_type(a) == ODPAT_OUTPUT) { struct port *out_port = port_from_dp_ifidx(br, nl_attr_get_u32(a)); - if (out_port && out_port->n_ifaces >= 2) { + if (out_port && out_port->n_ifaces >= 2 && + out_port->bond_type == BT_SLB) { uint16_t vlan = (flow->vlan_tci ? vlan_tci_to_vid(flow->vlan_tci) : OFP_VLAN_NONE); @@ -2819,7 +2837,8 @@ bridge_account_checkpoint_ofhook_cb(void *br_) now = time_msec(); for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; - if (port->n_ifaces > 1 && now >= port->bond_next_rebalance) { + if (port->n_ifaces > 1 && port->bond_type == BT_SLB + && now >= port->bond_next_rebalance) { port->bond_next_rebalance = now + port->bond_rebalance_interval; bond_rebalance_port(port); } @@ -2846,6 +2865,20 @@ struct slave_balance { size_t n_hashes; }; +static const char * +bond_type_to_string(enum bond_type bt) { + static char *bt_slb = "slb"; + static char *bt_ab = "active-backup"; + + switch (bt) { + case BT_SLB: return bt_slb; + case BT_AB: return bt_ab; + } + + NOT_REACHED(); + return NULL; +} + /* Sorts pointers to pointers to bond_entries in ascending order by the * interface to which they are assigned, and within a single interface in * ascending order of bytes transmitted. */ @@ -2954,6 +2987,8 @@ bond_shift_load(struct slave_balance *from, struct slave_balance *to, struct port *port = from->iface->port; uint64_t delta = hash->tx_bytes; + assert(port->bond_type == BT_SLB); + VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %td) " "from %s to %s (now carrying %"PRIu64"kB and " "%"PRIu64"kB load, respectively)", @@ -2996,6 +3031,8 @@ bond_rebalance_port(struct port *port) struct bond_entry *e; size_t i; + assert(port->bond_type == BT_SLB); + /* Sets up 'bals' to describe each of the port's interfaces, sorted in * descending order of tx_bytes, so that bals[0] represents the most * heavily loaded slave and bals[n_bals - 1] represents the least heavily @@ -3208,7 +3245,7 @@ bond_unixctl_list(struct unixctl_conn *conn, struct ds ds = DS_EMPTY_INITIALIZER; const struct bridge *br; - ds_put_cstr(&ds, "bridge\tbond\tslaves\n"); + ds_put_cstr(&ds, "bridge\tbond\ttype\tslaves\n"); LIST_FOR_EACH (br, node, &all_bridges) { size_t i; @@ -3218,7 +3255,8 @@ bond_unixctl_list(struct unixctl_conn *conn, if (port->n_ifaces > 1) { size_t j; - ds_put_format(&ds, "%s\t%s\t", br->name, port->name); + ds_put_format(&ds, "%s\t%s\t%s\t", br->name, port->name, + bond_type_to_string(port->bond_type)); for (j = 0; j < port->n_ifaces; j++) { const struct iface *iface = port->ifaces[j]; if (j) { @@ -3266,10 +3304,16 @@ bond_unixctl_show(struct unixctl_conn *conn, return; } + ds_put_format(&ds, "bond_type: %s\n", + bond_type_to_string(port->bond_type)); ds_put_format(&ds, "updelay: %d ms\n", port->updelay); ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay); - ds_put_format(&ds, "next rebalance: %lld ms\n", - port->bond_next_rebalance - time_msec()); + + if (port->bond_type == BT_SLB) { + ds_put_format(&ds, "next rebalance: %lld ms\n", + port->bond_next_rebalance - time_msec()); + } + for (j = 0; j < port->n_ifaces; j++) { const struct iface *iface = port->ifaces[j]; struct bond_entry *be; @@ -3286,6 +3330,10 @@ bond_unixctl_show(struct unixctl_conn *conn, iface->delay_expires - time_msec()); } + if (port->bond_type != BT_SLB) { + continue; + } + /* Hashes. */ for (be = port->bond_hash; be <= &port->bond_hash[BOND_MASK]; be++) { int hash = be - port->bond_hash; @@ -3345,6 +3393,11 @@ bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_, return; } + if (port->bond_type != BT_SLB) { + unixctl_command_reply(conn, 501, "not an SLB bond"); + return; + } + if (strspn(hash_s, "0123456789") == strlen(hash_s)) { hash = atoi(hash_s) & BOND_MASK; } else { @@ -3611,6 +3664,18 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) port->bond_next_rebalance = next_rebalance; } + if (!port->cfg->bond_type || + !strcmp(port->cfg->bond_type, bond_type_to_string(BT_SLB))) { + port->bond_type = BT_SLB; + } else if (!strcmp(port->cfg->bond_type, bond_type_to_string(BT_AB))) { + port->bond_type = BT_AB; + } else { + port->bond_type = BT_SLB; + VLOG_WARN("port %s: unknown bond_type %s, defaulting to %s", + port->name, port->cfg->bond_type, + bond_type_to_string(port->bond_type)); + } + /* Add new interfaces and update 'cfg' member of existing ones. */ shash_init(&new_ifaces); for (i = 0; i < cfg->n_interfaces; i++) { @@ -3777,12 +3842,13 @@ port_update_bonding(struct port *port) free(port->bond_hash); port->bond_hash = NULL; port->bond_compat_is_stale = true; - port->bond_fake_iface = false; } + + port->bond_fake_iface = false; } else { size_t i; - if (!port->bond_hash) { + if (port->bond_type == BT_SLB && !port->bond_hash) { port->bond_hash = xcalloc(BOND_MASK + 1, sizeof *port->bond_hash); for (i = 0; i <= BOND_MASK; i++) { struct bond_entry *e = &port->bond_hash[i]; @@ -3797,6 +3863,9 @@ port_update_bonding(struct port *port) if (port->cfg->bond_fake_iface) { port->bond_next_fake_iface_update = time_msec(); } + } else if (port->bond_type != BT_SLB) { + free(port->bond_hash); + port->bond_hash = NULL; } port->bond_compat_is_stale = true; port->bond_fake_iface = port->cfg->bond_fake_iface; @@ -3815,7 +3884,7 @@ port_update_bond_compat(struct port *port) struct compat_bond bond; size_t i; - if (port->n_ifaces < 2) { + if (port->n_ifaces < 2 || port->bond_type != BT_SLB) { proc_net_compat_update_bond(port->name, NULL); return; } diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 822c0f29..5c525268 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -139,12 +139,12 @@ since it is not allowed to modify or override them. .SS "BOND COMMANDS" These commands manage bonded ports on an Open vSwitch's bridges. To understand some of these commands, it is important to understand a -detail of the bonding implementation called ``MAC hashing.'' Instead -of directly assigning Ethernet source addresses to slaves, the bonding -implementation computes a function that maps an 48-bit Ethernet source -addresses into an 8-bit value (a ``MAC hash'' value). All of the -Ethernet addresses that map to a single 8-bit value are then assigned -to a single slave. +detail of the bonding implementation called ``source load balancing'' +(SLB). Instead of directly assigning Ethernet source addresses to +slaves, the bonding implementation computes a function that maps an +48-bit Ethernet source addresses into an 8-bit value (a ``MAC hash'' +value). All of the Ethernet addresses that map to a single 8-bit +value are then assigned to a single slave. .IP "\fBbond/list\fR" Lists all of the bonds, and their slaves, on each bridge. . @@ -156,10 +156,10 @@ the time to completion of an updelay or downdelay if one is in progress, whether it is the active slave, the MAC hashes assigned to the slave, and the MAC learning table entries that hash to each MAC. .IP "\fBbond/migrate\fR \fIport\fR \fIhash\fR \fIslave\fR" -Assigns a given MAC hash to a new slave. \fIport\fR specifies the -bond port, \fIhash\fR the MAC hash to be migrated (as a decimal -number between 0 and 255), and \fIslave\fR the new slave to be -assigned. +Only valid for SLB bonds. Assigns a given MAC hash to a new slave. +\fIport\fR specifies the bond port, \fIhash\fR the MAC hash to be +migrated (as a decimal number between 0 and 255), and \fIslave\fR the +new slave to be assigned. .IP The reassignment is not permanent: rebalancing or fail-over will cause the MAC hash to be shifted to a new slave in the usual diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index d21a85c1..a8140b8a 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -1,6 +1,6 @@ {"name": "Open_vSwitch", - "version": "1.0.1", - "cksum": "665434435 14130", + "version": "1.0.2", + "cksum": "3196651018 14282", "tables": { "Open_vSwitch": { "columns": { @@ -120,6 +120,10 @@ "mac": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, + "bond_type": { + "type": {"key": {"type": "string", + "enum": ["set", ["slb", "active-backup"]]}, + "min": 0, "max": 1}}, "bond_updelay": { "type": "integer"}, "bond_downdelay": { diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 77906882..7a70909e 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -507,17 +507,25 @@ -

A port that has more than one interface is a ``bonded port.'' - Bonding allows for load balancing and fail-over. Open vSwitch - supports ``source load balancing'' (SLB) bonding, which - assigns flows to slaves based on source MAC address and output VLAN, - with periodic rebalancing as traffic patterns change. This form of - bonding does not require 802.3ad or other special support from the - upstream switch to which the slave devices are connected.

+

A port that has more than one interface is a ``bonded port.'' Bonding + allows for load balancing and fail-over. Open vSwitch supports + ``source load balancing'' (SLB) and "active backup" bonding. SLB + bonding assigns flows to slaves based on source MAC address and output + VLAN, with periodic rebalancing as traffic patterns change. Active + backup bonding assigns all flows to one slave, failing over to a backup + slave when the active slave is disabled. Neither form of bonding + require 802.3ad or other special support from the upstream switch to + which the slave devices are connected.

These columns apply only to bonded ports. Their values are otherwise ignored.

+ +

The type of bonding used for a bonded port. Currently supported + values are slb and active-backup. Defaults + to SLB if unset.

+
+

For a bonded port, the number of milliseconds for which carrier must stay up on an interface before the interface is considered to be up. @@ -583,7 +591,7 @@

An Ethernet address in the form xx:xx:xx:xx:xx:xx.
bond-rebalance-interval
-
For a bonded port, the number of milliseconds between +
For an SLB bonded port, the number of milliseconds between successive attempts to rebalance the bond, that is, to move source MACs and their flows from one interface on the bond to another in an attempt to keep usage of each -- 2.30.2