From 6333182946ffd368eb7623d7408185a521058b46 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 7 Jan 2011 16:22:34 -0800
Subject: [PATCH] vswitchd: Add miimon support.

This commit allows users to check link status in bonded ports using
MII instead of carrier.
---
 lib/netdev-dummy.c         |  1 +
 lib/netdev-linux.c         | 45 +++++++++++++++++++
 lib/netdev-provider.h      |  8 ++++
 lib/netdev-vport.c         |  1 +
 lib/netdev.c               | 28 ++++++++++++
 lib/netdev.h               |  1 +
 vswitchd/bridge.c          | 89 +++++++++++++++++++++++++++++++-------
 vswitchd/vswitch.ovsschema |  4 +-
 vswitchd/vswitch.xml       | 10 +++++
 9 files changed, 169 insertions(+), 18 deletions(-)

diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 4569f974..37058435 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -296,6 +296,7 @@ static const struct netdev_class dummy_class = {
     netdev_dummy_get_mtu,
     NULL,                       /* get_ifindex */
     NULL,                       /* get_carrier */
+    NULL,                       /* get_miimon */
     netdev_dummy_get_stats,
     netdev_dummy_set_stats,
 
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 97f9a647..f953cfc7 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -25,6 +25,7 @@
 #include <linux/ip.h>
 #include <linux/types.h>
 #include <linux/ethtool.h>
+#include <linux/mii.h>
 #include <linux/pkt_sched.h>
 #include <linux/rtnetlink.h>
 #include <linux/sockios.h>
@@ -1007,6 +1008,49 @@ exit:
     return error;
 }
 
+static int
+netdev_linux_get_miimon(const struct netdev *netdev_, bool *miimon)
+{
+    int error;
+    struct ifreq ifr;
+    const char *name = netdev_get_name(netdev_);
+
+    *miimon = false;
+    memset(&ifr, 0, sizeof ifr);
+
+    error = netdev_linux_do_ioctl(name, &ifr, SIOCGMIIPHY, "SIOCGMIIPHY");
+    if (!error) {
+        struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr.ifr_data;
+
+        /* data->phy_id is filled out by previous SIOCGMIIPHY ioctl call. */
+        data->reg_num = MII_BMSR;
+        error = netdev_linux_do_ioctl(name, &ifr, SIOCGMIIREG, "SIOCGMIIREG");
+
+        if (!error) {
+            *miimon = !!(data->val_out & BMSR_LSTATUS);
+        } else {
+            VLOG_WARN_RL(&rl, "%s: failed to query MII", name);
+        }
+    } else {
+        struct ethtool_cmd ecmd;
+        struct ethtool_value *eval = (struct ethtool_value *) &ecmd;
+
+        VLOG_DBG_RL(&rl, "%s: failed to query MII, falling back to ethtool",
+                    name);
+
+        memset(&ecmd, 0, sizeof ecmd);
+        error = netdev_linux_do_ethtool(name, &ecmd, ETHTOOL_GLINK,
+                                        "ETHTOOL_GLINK");
+        if (!error) {
+            *miimon = !!eval->data;
+        } else {
+            VLOG_WARN_RL(&rl, "%s: ethtool link status failed", name);
+        }
+    }
+
+    return error;
+}
+
 /* Check whether we can we use RTM_GETLINK to get network device statistics.
  * In pre-2.6.19 kernels, this was only available if wireless extensions were
  * enabled. */
@@ -2152,6 +2196,7 @@ netdev_linux_poll_remove(struct netdev_notifier *notifier_)
     netdev_linux_get_mtu,                                       \
     netdev_linux_get_ifindex,                                   \
     netdev_linux_get_carrier,                                   \
+    netdev_linux_get_miimon,                                    \
     netdev_linux_get_stats,                                     \
     SET_STATS,                                                  \
                                                                 \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index da0ad45f..9c75ccb6 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -266,6 +266,14 @@ struct netdev_class {
      */
     int (*get_carrier)(const struct netdev *netdev, bool *carrier);
 
+    /* Sets 'miimon' to true if 'netdev' is up according to its MII.  If
+     * 'netdev' does not support MII, may fall back to another method or return
+     * EOPNOTSUPP.
+     *
+     * This function may be set to null if it would always return EOPNOTSUPP.
+     */
+    int (*get_miimon)(const struct netdev *netdev, bool *miimon);
+
     /* Retrieves current device stats for 'netdev' into 'stats'.
      *
      * A network device that supports some statistics but not others, it should
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index cf717cc7..b006a677 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -963,6 +963,7 @@ parse_patch_config(const struct netdev_dev *dev, const struct shash *args,
     netdev_vport_get_mtu,                                   \
     NULL,                       /* get_ifindex */           \
     NULL,                       /* get_carrier */           \
+    NULL,                       /* get_miimon */            \
     netdev_vport_get_stats,                                 \
     netdev_vport_set_stats,                                 \
                                                             \
diff --git a/lib/netdev.c b/lib/netdev.c
index 0f223271..b2c4d663 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -935,6 +935,34 @@ netdev_get_carrier(const struct netdev *netdev)
     return carrier;
 }
 
+/* Returns true if 'netdev' is up according to its MII. */
+bool
+netdev_get_miimon(const struct netdev *netdev)
+{
+    int error;
+    enum netdev_flags flags;
+    bool miimon;
+
+    netdev_get_flags(netdev, &flags);
+    if (!(flags & NETDEV_UP)) {
+        return false;
+    }
+
+    if (!netdev_get_dev(netdev)->netdev_class->get_miimon) {
+        return true;
+    }
+
+    error = netdev_get_dev(netdev)->netdev_class->get_miimon(netdev, &miimon);
+
+    if (error) {
+        VLOG_DBG("%s: failed to get network device MII status, assuming "
+                 "down: %s", netdev_get_name(netdev), strerror(error));
+        miimon = false;
+    }
+
+    return miimon;
+}
+
 /* Retrieves current device stats for 'netdev'. */
 int
 netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
diff --git a/lib/netdev.h b/lib/netdev.h
index 50ad5a3b..179628c6 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -126,6 +126,7 @@ int netdev_get_etheraddr(const struct netdev *, uint8_t mac[6]);
 
 /* PHY interface. */
 bool netdev_get_carrier(const struct netdev *);
+bool netdev_get_miimon(const struct netdev *);
 int netdev_get_features(struct netdev *,
                         uint32_t *current, uint32_t *advertised,
                         uint32_t *supported, uint32_t *peer);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index c15ef364..7fa819cf 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -167,6 +167,9 @@ struct port {
     int updelay, downdelay;     /* Delay before iface goes up/down, in ms. */
     bool bond_compat_is_stale;  /* Need to call port_update_bond_compat()? */
     bool bond_fake_iface;       /* Fake a bond interface for legacy compat? */
+    bool miimon;                /* Use miimon instead of carrier? */
+    long long int bond_miimon_interval; /* Miimon status refresh interval. */
+    long long int bond_miimon_next_update; /* Time of next miimon update. */
     long long int bond_next_fake_iface_update; /* Time of next update. */
     struct netdev_monitor *monitor; /* Tracks carrier up/down status. */
 
@@ -2073,8 +2076,8 @@ bond_link_status_update(struct iface *iface, bool carrier)
         /* Nothing to do. */
         return;
     }
-    VLOG_INFO_RL(&rl, "interface %s: carrier %s",
-                 iface->name, carrier ? "detected" : "dropped");
+    VLOG_INFO_RL(&rl, "interface %s: link state %s",
+                 iface->name, carrier ? "up" : "down");
     if (carrier == iface->enabled) {
         iface->delay_expires = LLONG_MAX;
         VLOG_INFO_RL(&rl, "interface %s: will not be %s",
@@ -2212,18 +2215,36 @@ bond_run(struct bridge *br)
         if (port->n_ifaces >= 2) {
             char *devname;
 
-            /* Track carrier going up and down on interfaces. */
-            while (!netdev_monitor_poll(port->monitor, &devname)) {
-                struct iface *iface;
+            if (port->monitor) {
+                assert(!port->miimon);
 
-                iface = port_lookup_iface(port, devname);
-                if (iface) {
-                    bool carrier = netdev_get_carrier(iface->netdev);
+                /* Track carrier going up and down on interfaces. */
+                while (!netdev_monitor_poll(port->monitor, &devname)) {
+                    struct iface *iface;
+
+                    iface = port_lookup_iface(port, devname);
+                    if (iface) {
+                        bool up = netdev_get_carrier(iface->netdev);
+
+                        bond_link_status_update(iface, up);
+                        port_update_bond_compat(port);
+                    }
+                    free(devname);
+                }
+            } else {
+                assert(port->miimon);
 
-                    bond_link_status_update(iface, carrier);
-                    port_update_bond_compat(port);
+                if (time_msec() >= port->bond_miimon_next_update) {
+                    for (j = 0; j < port->n_ifaces; j++) {
+                        struct iface *iface = port->ifaces[j];
+                        bool up = netdev_get_miimon(iface->netdev);
+
+                        bond_link_status_update(iface, up);
+                        port_update_bond_compat(port);
+                    }
+                    port->bond_miimon_next_update = time_msec() +
+                        port->bond_miimon_interval;
                 }
-                free(devname);
             }
 
             for (j = 0; j < port->n_ifaces; j++) {
@@ -2257,7 +2278,15 @@ bond_wait(struct bridge *br)
         if (port->n_ifaces < 2) {
             continue;
         }
-        netdev_monitor_poll_wait(port->monitor);
+
+        if (port->monitor) {
+            netdev_monitor_poll_wait(port->monitor);
+        }
+
+        if (port->miimon) {
+            poll_timer_wait_until(port->bond_miimon_next_update);
+        }
+
         for (j = 0; j < port->n_ifaces; j++) {
             struct iface *iface = port->ifaces[j];
             if (iface->delay_expires != LLONG_MAX) {
@@ -3324,6 +3353,8 @@ bond_unixctl_show(struct unixctl_conn *conn,
 
     ds_put_format(&ds, "bond_mode: %s\n",
                   bond_mode_to_string(port->bond_mode));
+    ds_put_format(&ds, "bond-detect-mode: %s\n",
+                  port->miimon ? "miimon" : "carrier");
     ds_put_format(&ds, "updelay: %d ms\n", port->updelay);
     ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay);
 
@@ -3655,8 +3686,9 @@ port_del_ifaces(struct port *port, const struct ovsrec_port *cfg)
 static void
 port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
 {
+    const char *detect_mode;
     struct shash new_ifaces;
-    long long int next_rebalance;
+    long long int next_rebalance, miimon_next_update;
     unsigned long *trunks;
     int vlan;
     size_t i;
@@ -3682,6 +3714,29 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
         port->bond_next_rebalance = next_rebalance;
     }
 
+    detect_mode = get_port_other_config(cfg, "bond-detect-mode",
+                                        "carrier");
+
+    if (!strcmp(detect_mode, "carrier")) {
+        port->miimon = false;
+    } else if (!strcmp(detect_mode, "miimon")) {
+        port->miimon = true;
+    } else {
+        port->miimon = false;
+        VLOG_WARN("port %s: unsupported bond-detect-mode %s, defaulting to "
+                  "carrier", port->name, detect_mode);
+    }
+
+    port->bond_miimon_interval = atoi(
+        get_port_other_config(cfg, "bond-miimon-interval", "200"));
+    if (port->bond_miimon_interval < 100) {
+        port->bond_miimon_interval = 100;
+    }
+    miimon_next_update = time_msec() + port->bond_miimon_interval;
+    if (port->bond_miimon_next_update > miimon_next_update) {
+        port->bond_miimon_next_update = miimon_next_update;
+    }
+
     if (!port->cfg->bond_mode ||
         !strcmp(port->cfg->bond_mode, bond_mode_to_string(BM_SLB))) {
         port->bond_mode = BM_SLB;
@@ -3888,9 +3943,11 @@ port_update_bonding(struct port *port)
         port->bond_compat_is_stale = true;
         port->bond_fake_iface = port->cfg->bond_fake_iface;
 
-        port->monitor = netdev_monitor_create();
-        for (i = 0; i < port->n_ifaces; i++) {
-            netdev_monitor_add(port->monitor, port->ifaces[i]->netdev);
+        if (!port->miimon) {
+            port->monitor = netdev_monitor_create();
+            for (i = 0; i < port->n_ifaces; i++) {
+                netdev_monitor_add(port->monitor, port->ifaces[i]->netdev);
+            }
         }
     }
 }
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index beb83315..70e56f4c 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "1.0.4",
- "cksum": "1999497811 14145",
+ "version": "1.0.5",
+ "cksum": "2737967217 14145",
  "tables": {
    "Open_vSwitch": {
      "columns": {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 923d1be6..53c2650d 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -597,6 +597,16 @@
             the bond to another in an attempt to keep usage of each
             interface roughly equal.  The default is 10000 (10
             seconds), and the minimum is 1000 (1 second).</dd>
+          <dt><code>bond-detect-mode</code></dt>
+          <dd> Sets the method used to detect link failures in a bonded port.
+            Options are <code>carrier</code> and <code>miimon</code>. Defaults
+            to <code>carrier</code> which uses each interface's carrier to detect
+            failures.  When set to <code>miimon</code>, will check for failures
+            by polling each interface's MII. </dd>
+          <dt><code>bond-miimon-interval</code></dt>
+          <dd> The number of milliseconds between successive attempts to
+            poll each interface's MII.  Only relevant on ports which use
+            <code>miimon</code> to detect failures. </dd>
         </dl>
       </column>
     </group>
-- 
2.30.2