Added handling of previously ignored cfm faults.
authorMehak Mahajan <mmahajan@nicira.com>
Thu, 5 Apr 2012 00:36:00 +0000 (17:36 -0700)
committerMehak Mahajan <mmahajan@nicira.com>
Fri, 6 Apr 2012 18:59:46 +0000 (11:59 -0700)
The CFM packets that are out of sequence or contain invalid cfm_interval were
previously not ignored. The behavior is changed with this patch to not
process those CFM frames.

Signed-off-by: Mehak Mahajan <mmahajan@nicira.com>
lib/cfm.c
lib/cfm.h
vswitchd/vswitch.xml

index ea39e27a1829b0d52476050b365518c0022f9cea..d0ae55d1acb62f2fdb761dd22ead4c871ace78b8 100644 (file)
--- a/lib/cfm.c
+++ b/lib/cfm.c
@@ -87,8 +87,9 @@ struct cfm {
 
     uint64_t mpid;
     bool extended;         /* Extended mode. */
-    int fault;             /* Connectivity fault status. */
-    int recv_fault;        /* Bit mask of faults occuring on receive. */
+    enum cfm_fault_reason fault;  /* Connectivity fault status. */
+    enum cfm_fault_reason recv_fault;  /* Bit mask of faults occuring on
+                                          receive. */
     bool opup;             /* Operational State. */
     bool remote_opup;      /* Remote Operational State. */
 
@@ -127,8 +128,6 @@ struct remote_mp {
     struct hmap_node node; /* Node in 'remote_mps' map. */
 
     bool recv;           /* CCM was received since last fault check. */
-    bool rdi;            /* Remote Defect Indicator. Indicates remote_mp isn't
-                            receiving CCMs that it's expecting to. */
     bool opup;           /* Operational State. */
     uint32_t seq;        /* Most recently received sequence number. */
     uint8_t num_health_ccm; /* Number of received ccm frames every
@@ -384,18 +383,6 @@ cfm_run(struct cfm *cfm)
             } else {
                 rmp->recv = false;
 
-                if (rmp->mpid == cfm->mpid) {
-                    VLOG_WARN_RL(&rl,"%s: received CCM with local MPID"
-                                 " %"PRIu64, cfm->name, rmp->mpid);
-                    cfm->fault |= CFM_FAULT_LOOPBACK;
-                }
-
-                if (rmp->rdi) {
-                    VLOG_DBG("%s: RDI bit flagged from RMP %"PRIu64, cfm->name,
-                             rmp->mpid);
-                    cfm->fault |= CFM_FAULT_RDI;
-                }
-
                 if (!rmp->opup) {
                     cfm->remote_opup = rmp->opup;
                 }
@@ -563,7 +550,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
      * expensive changes to the network topology.  It seems prudent to trigger
      * them judiciously, especially when CFM is used to check slave status of
      * bonds. Furthermore, faults can be maliciously triggered by crafting
-     * invalid CCMs. */
+     * unexpected CCMs. */
     if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) {
         cfm->recv_fault |= CFM_FAULT_MAID;
         VLOG_WARN_RL(&rl, "%s: Received unexpected remote MAID from MAC "
@@ -577,7 +564,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
         uint64_t ccm_mpid;
         uint32_t ccm_seq;
         bool ccm_opdown;
-        bool fault = false;
+        enum cfm_fault_reason cfm_fault = 0;
 
         if (cfm->extended) {
             ccm_mpid = ntohll(ccm->mpid64);
@@ -589,18 +576,18 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
         ccm_seq = ntohl(ccm->seq);
 
         if (ccm_interval != cfm->ccm_interval) {
-            VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid interval"
+            cfm_fault |= CFM_FAULT_INTERVAL;
+            VLOG_WARN_RL(&rl, "%s: received a CCM with an unexpected interval"
                          " (%"PRIu8") from RMP %"PRIu64, cfm->name,
                          ccm_interval, ccm_mpid);
-            fault = true;
         }
 
         if (cfm->extended && ccm_interval == 0
             && ccm_interval_ms_x != cfm->ccm_interval_ms) {
-            VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid extended"
+            cfm_fault |= CFM_FAULT_INTERVAL;
+            VLOG_WARN_RL(&rl, "%s: received a CCM with an unexpected extended"
                          " interval (%"PRIu16"ms) from RMP %"PRIu64, cfm->name,
                          ccm_interval_ms_x, ccm_mpid);
-            fault = true;
         }
 
         rmp = lookup_remote_mp(cfm, ccm_mpid);
@@ -609,38 +596,46 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
                 rmp = xzalloc(sizeof *rmp);
                 hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid));
             } else {
-                cfm->recv_fault |= CFM_FAULT_OVERFLOW;
+                cfm_fault |= CFM_FAULT_OVERFLOW;
                 VLOG_WARN_RL(&rl,
                              "%s: dropped CCM with MPID %"PRIu64" from MAC "
                              ETH_ADDR_FMT, cfm->name, ccm_mpid,
                              ETH_ADDR_ARGS(eth->eth_src));
-                fault = true;
             }
         }
 
+        if (ccm_rdi) {
+            cfm_fault |= CFM_FAULT_RDI;
+            VLOG_DBG("%s: RDI bit flagged from RMP %"PRIu64, cfm->name,
+                     rmp->mpid);
+        }
+
         VLOG_DBG("%s: received CCM (seq %"PRIu32") (mpid %"PRIu64")"
                  " (interval %"PRIu8") (RDI %s)", cfm->name, ccm_seq,
                  ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false");
 
-        if (ccm_rdi) {
-            fault = true;
-        }
         if (rmp) {
+            if (rmp->mpid == cfm->mpid) {
+                cfm_fault |= CFM_FAULT_LOOPBACK;
+                VLOG_WARN_RL(&rl,"%s: received CCM with local MPID"
+                             " %"PRIu64, cfm->name, rmp->mpid);
+            }
+
             if (rmp->seq && ccm_seq != (rmp->seq + 1)) {
+                cfm_fault |= CFM_FAULT_SEQUENCE;
                 VLOG_WARN_RL(&rl, "%s: (mpid %"PRIu64") detected sequence"
                              " numbers which indicate possible connectivity"
                              " problems (previous %"PRIu32") (current %"PRIu32
                              ")", cfm->name, ccm_mpid, rmp->seq, ccm_seq);
-                fault = true;
             }
 
             rmp->mpid = ccm_mpid;
-            rmp->recv = true;
-            if (!fault) {
+            if (!cfm_fault) {
                 rmp->num_health_ccm++;
             }
+            rmp->recv = true;
+            cfm->recv_fault |= cfm_fault;
             rmp->seq = ccm_seq;
-            rmp->rdi = ccm_rdi;
             rmp->opup = !ccm_opdown;
         }
     }
@@ -735,9 +730,7 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm)
                   timer_msecs_until_expired(&cfm->fault_timer));
 
     HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) {
-        ds_put_format(ds, "Remote MPID %"PRIu64":%s\n",
-                      rmp->mpid,
-                      rmp->rdi ? " rdi" : "");
+        ds_put_format(ds, "Remote MPID %"PRIu64"\n", rmp->mpid);
         ds_put_format(ds, "\trecv since check: %s\n",
                       rmp->recv ? "true" : "false");
         ds_put_format(ds, "\topstate: %s\n", rmp->opup? "up" : "down");
index 2b4f888f576ac9e33bbe8526856f3dc80cd6506f..509da2b21c3e1b42b03878069d819327651e099c 100644 (file)
--- a/lib/cfm.h
+++ b/lib/cfm.h
@@ -32,7 +32,9 @@ struct ofpbuf;
     CFM_FAULT_REASON(MAID, maid)           \
     CFM_FAULT_REASON(LOOPBACK, loopback)   \
     CFM_FAULT_REASON(OVERFLOW, overflow)   \
-    CFM_FAULT_REASON(OVERRIDE, override)
+    CFM_FAULT_REASON(OVERRIDE, override)   \
+    CFM_FAULT_REASON(INTERVAL, interval)   \
+    CFM_FAULT_REASON(SEQUENCE, sequence)
 
 enum cfm_fault_bit_index {
 #define CFM_FAULT_REASON(NAME, STR) CFM_FAULT_INDEX_##NAME,
index 03c8539d0a448e31a5594f6173c00ae6f514b278..1128db945f7e75c0d49f085d7d02bb1a103c17d9 100644 (file)
         an <code>ovs-appctl</code> command.
       </column>
 
+      <column name="cfm_fault_status" key="interval">
+        Indicates a CFM fault was triggered due to the reception of a CCM
+        frame having an invalid interval.
+      </column>
+
+      <column name="cfm_fault_status" key="sequence">
+        Indicates a CFM fault was triggered because the CFM module received
+        a CCM frame with a sequence number that it was not expecting.
+      </column>
+
       <column name="cfm_health">
         <p>
           Indicates the health of the interface as a percentage of CCM frames