-choose_output_iface(const struct port *port, const struct flow *flow,
- uint16_t vlan, uint16_t *dp_ifidx, tag_type *tags)
-{
- struct iface *iface;
-
- assert(port->n_ifaces);
- if (port->n_ifaces == 1) {
- iface = port->ifaces[0];
- } else if (port->bond_mode == BM_AB) {
- if (port->active_iface < 0) {
- *tags |= port->no_ifaces_tag;
- return false;
- }
- iface = port->ifaces[port->active_iface];
- } else {
- struct bond_entry *e = lookup_bond_entry(port, flow, vlan);
- if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces
- || !port->ifaces[e->iface_idx]->enabled) {
- /* XXX select interface properly. The current interface selection
- * is only good for testing the rebalancing code. */
- e->iface_idx = bond_choose_iface(port);
- if (e->iface_idx < 0) {
- *tags |= port->no_ifaces_tag;
- return false;
- }
- e->iface_tag = tag_create_random();
- ((struct port *) port)->bond_compat_is_stale = true;
- }
- *tags |= e->iface_tag;
- iface = port->ifaces[e->iface_idx];
- }
- *dp_ifidx = iface->dp_ifidx;
- *tags |= iface->tag; /* Currently only used for bonding. */
- return true;
-}
-
-static void
-bond_link_status_update(struct iface *iface)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
- struct port *port = iface->port;
- bool up = iface->up;
- int updelay, downdelay;
-
- updelay = port->updelay;
- downdelay = port->downdelay;
-
- if (iface->port->lacp & LACP_NEGOTIATED) {
- downdelay = 0;
- updelay = 0;
- }
-
- if (iface->port->lacp && up) {
- /* The interface is up if it's attached to an aggregator and its
- * partner is synchronized. The only exception is defaulted links.
- * They are not required to have synchronized partners because they
- * have no partners at all. However, they will only be attached if
- * negotiations failed on all interfaces in the bond. */
- up = iface->lacp_attached
- && (iface->lacp_partner.state & LACP_STATE_SYNC
- || iface->lacp_status == LACP_STATUS_DEFAULTED);
- }
-
-
- if ((up == iface->enabled) == (iface->delay_expires == LLONG_MAX)) {
- /* Nothing to do. */
- return;
- }
- VLOG_INFO_RL(&rl, "interface %s: link state %s",
- iface->name, up ? "up" : "down");
- if (up == iface->enabled) {
- iface->delay_expires = LLONG_MAX;
- VLOG_INFO_RL(&rl, "interface %s: will not be %s",
- iface->name, up ? "disabled" : "enabled");
- } else if (up && port->active_iface < 0) {
- bond_enable_slave(iface, true);
- if (updelay) {
- VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
- "other interface is up", iface->name, updelay);
- }
- } else {
- int delay = up ? updelay : downdelay;
- iface->delay_expires = time_msec() + delay;
- if (delay) {
- VLOG_INFO_RL(&rl,
- "interface %s: will be %s if it stays %s for %d ms",
- iface->name,
- up ? "enabled" : "disabled",
- up ? "up" : "down",
- delay);
- }
- }
-}
-
-static void
-bond_choose_active_iface(struct port *port)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-
- port->active_iface = bond_choose_iface(port);
- port->active_iface_tag = tag_create_random();
- if (port->active_iface >= 0) {
- VLOG_INFO_RL(&rl, "port %s: active interface is now %s",
- port->name, port->ifaces[port->active_iface]->name);
- } else {
- VLOG_WARN_RL(&rl, "port %s: all ports disabled, no active interface",
- port->name);
- }
-}
-
-static void
-bond_enable_slave(struct iface *iface, bool enable)
-{
- struct port *port = iface->port;
- struct bridge *br = port->bridge;
-
- /* This acts as a recursion check. If the act of disabling a slave
- * causes a different slave to be enabled, the flag will allow us to
- * skip redundant work when we reenter this function. It must be
- * cleared on exit to keep things safe with multiple bonds. */
- static bool moving_active_iface = false;
-
- iface->delay_expires = LLONG_MAX;
- if (enable == iface->enabled) {
- return;
- }
-
- iface->enabled = enable;
- if (!iface->enabled) {
- VLOG_WARN("interface %s: disabled", iface->name);
- ofproto_revalidate(br->ofproto, iface->tag);
- if (iface->port_ifidx == port->active_iface) {
- ofproto_revalidate(br->ofproto,
- port->active_iface_tag);
-
- /* Disabling a slave can lead to another slave being immediately
- * enabled if there will be no active slaves but one is waiting
- * on an updelay. In this case we do not need to run most of the
- * code for the newly enabled slave since there was no period
- * without an active slave and it is redundant with the disabling
- * path. */
- moving_active_iface = true;
- bond_choose_active_iface(port);
- }
- bond_send_learning_packets(port);
- } else {
- VLOG_WARN("interface %s: enabled", iface->name);
- if (port->active_iface < 0 && !moving_active_iface) {
- ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
- bond_choose_active_iface(port);
- bond_send_learning_packets(port);
- }
- iface->tag = tag_create_random();
- }
-
- moving_active_iface = false;
- port->bond_compat_is_stale = true;
-}
-
-/* Attempts to make the sum of the bond slaves' statistics appear on the fake
- * bond interface. */
-static void
-bond_update_fake_iface_stats(struct port *port)
-{
- struct netdev_stats bond_stats;
- struct netdev *bond_dev;
- size_t i;
-
- memset(&bond_stats, 0, sizeof bond_stats);
-
- for (i = 0; i < port->n_ifaces; i++) {
- struct netdev_stats slave_stats;
-
- if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) {
- /* XXX: We swap the stats here because they are swapped back when
- * reported by the internal device. The reason for this is
- * internal devices normally represent packets going into the system
- * but when used as fake bond device they represent packets leaving
- * the system. We really should do this in the internal device
- * itself because changing it here reverses the counts from the
- * perspective of the switch. However, the internal device doesn't
- * know what type of device it represents so we have to do it here
- * for now. */
- bond_stats.tx_packets += slave_stats.rx_packets;
- bond_stats.tx_bytes += slave_stats.rx_bytes;
- bond_stats.rx_packets += slave_stats.tx_packets;
- bond_stats.rx_bytes += slave_stats.tx_bytes;
- }
- }
-
- if (!netdev_open_default(port->name, &bond_dev)) {
- netdev_set_stats(bond_dev, &bond_stats);
- netdev_close(bond_dev);
- }
-}
-
-static void
-bond_link_carrier_update(struct iface *iface, bool carrier)
-{
- if (carrier == iface->up) {
- return;
- }
-
- if (iface->lacp_status == LACP_STATUS_CURRENT) {
- iface_set_lacp_expired(iface);
- }
-
- iface->up = carrier;
- iface->lacp_tx = 0;
- iface->port->bond_compat_is_stale = true;
-}
-
-static void
-bond_run(struct bridge *br)
-{
- size_t i, j;
-
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
-
- if (port->n_ifaces >= 2) {
- char *devname;
-
- if (port->monitor) {
- assert(!port->miimon);
-
- /* Track carrier going up and down on interfaces. */
- while (!netdev_monitor_poll(port->monitor, &devname)) {
- struct iface *iface;
-
- iface = port_lookup_iface(port, devname);
- if (iface) {
- bool up = netdev_get_carrier(iface->netdev);
- bond_link_carrier_update(iface, up);
- }
- free(devname);
- }
- } else {
- assert(port->miimon);
-
- if (time_msec() >= port->bond_miimon_next_update) {
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- bool up = netdev_get_miimon(iface->netdev);
- bond_link_carrier_update(iface, up);
- }
- port->bond_miimon_next_update = time_msec() +
- port->bond_miimon_interval;
- }
- }
-
- for (j = 0; j < port->n_ifaces; j++) {
- bond_link_status_update(port->ifaces[j]);
- }
-
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- if (time_msec() >= iface->delay_expires) {
- bond_enable_slave(iface, !iface->enabled);
- }
- }
-
- if (port->bond_fake_iface
- && time_msec() >= port->bond_next_fake_iface_update) {
- bond_update_fake_iface_stats(port);
- port->bond_next_fake_iface_update = time_msec() + 1000;
- }
- }
-
- if (port->bond_compat_is_stale) {
- port->bond_compat_is_stale = false;
- port_update_bond_compat(port);
- }
- }
-}
-
-static void
-bond_wait(struct bridge *br)
-{
- size_t i, j;
-
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
- if (port->n_ifaces < 2) {
- continue;
- }
-
- if (port->monitor) {
- netdev_monitor_poll_wait(port->monitor);
- }
-
- if (port->miimon) {
- poll_timer_wait_until(port->bond_miimon_next_update);
- }
-
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- if (iface->delay_expires != LLONG_MAX) {
- poll_timer_wait_until(iface->delay_expires);
- }
- }
- if (port->bond_fake_iface) {
- poll_timer_wait_until(port->bond_next_fake_iface_update);
- }
- }
-}
-
-static bool
-set_dst(struct dst *dst, const struct flow *flow,
- const struct port *in_port, const struct port *out_port,
- tag_type *tags)
-{
- dst->vlan = (out_port->vlan >= 0 ? OFP_VLAN_NONE
- : in_port->vlan >= 0 ? in_port->vlan
- : flow->vlan_tci == 0 ? OFP_VLAN_NONE
- : vlan_tci_to_vid(flow->vlan_tci));
- return choose_output_iface(out_port, flow, dst->vlan,
- &dst->dp_ifidx, tags);
-}
-
-static void
-swap_dst(struct dst *p, struct dst *q)
-{
- struct dst tmp = *p;
- *p = *q;
- *q = tmp;
-}
-
-/* Moves all the dsts with vlan == 'vlan' to the front of the 'n_dsts' in
- * 'dsts'. (This may help performance by reducing the number of VLAN changes
- * that we push to the datapath. We could in fact fully sort the array by
- * vlan, but in most cases there are at most two different vlan tags so that's
- * possibly overkill.) */
-static void
-partition_dsts(struct dst_set *set, int vlan)
-{
- struct dst *first = set->dsts;
- struct dst *last = set->dsts + set->n;
-
- while (first != last) {
- /* Invariants:
- * - All dsts < first have vlan == 'vlan'.
- * - All dsts >= last have vlan != 'vlan'.
- * - first < last. */
- while (first->vlan == vlan) {
- if (++first == last) {
- return;
- }
- }
-
- /* Same invariants, plus one additional:
- * - first->vlan != vlan.
- */
- while (last[-1].vlan != vlan) {
- if (--last == first) {
- return;
- }
- }
-
- /* Same invariants, plus one additional:
- * - last[-1].vlan == vlan.*/
- swap_dst(first++, --last);
- }
-}
-
-static int
-mirror_mask_ffs(mirror_mask_t mask)
-{
- BUILD_ASSERT_DECL(sizeof(unsigned int) >= sizeof(mask));
- return ffs(mask);
-}
-
-static void
-dst_set_init(struct dst_set *set)
-{
- set->dsts = set->builtin;
- set->n = 0;
- set->allocated = ARRAY_SIZE(set->builtin);
-}
-
-static void
-dst_set_add(struct dst_set *set, const struct dst *dst)
-{
- if (set->n >= set->allocated) {
- size_t new_allocated;
- struct dst *new_dsts;
-
- new_allocated = set->allocated * 2;
- new_dsts = xmalloc(new_allocated * sizeof *new_dsts);
- memcpy(new_dsts, set->dsts, set->n * sizeof *new_dsts);
-
- dst_set_free(set);
-
- set->dsts = new_dsts;
- set->allocated = new_allocated;
- }
- set->dsts[set->n++] = *dst;
-}
-
-static void
-dst_set_free(struct dst_set *set)
-{
- if (set->dsts != set->builtin) {
- free(set->dsts);
- }
-}
-
-static bool
-dst_is_duplicate(const struct dst_set *set, const struct dst *test)