From: Ethan Jackson
Date: Tue, 7 Feb 2012 22:35:09 +0000 (-0800)
Subject: cfm: Expose detailed fault status in the database.
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b93803967e38ee4b65cd1e720decd8dcbb58d04f;p=openvswitch
cfm: Expose detailed fault status in the database.
The cfm_fault column of the database is the logical OR of a number
of reasons that CFM can be in a faulted state. A controller may
want to have more specific information in which case it can look at
the cfm_fault_status column which this patch adds.
Signed-off-by: Ethan Jackson
---
diff --git a/lib/cfm.c b/lib/cfm.c
index 537eeaa1..d373f422 100644
--- a/lib/cfm.c
+++ b/lib/cfm.c
@@ -85,8 +85,8 @@ struct cfm {
uint64_t mpid;
bool extended; /* Extended mode. */
- bool fault; /* Indicates connectivity fault. */
- bool unexpected_recv; /* Received an unexpected CCM. */
+ int fault; /* Connectivity fault status. */
+ int recv_fault; /* Bit mask of faults occuring on receive. */
bool opup; /* Operational State. */
bool remote_opup; /* Remote Operational State. */
@@ -136,6 +136,36 @@ cfm_ccm_addr(const struct cfm *cfm)
return cfm->extended ? eth_addr_ccm_x : eth_addr_ccm;
}
+/* Returns the string representation of the given cfm_fault_reason 'reason'. */
+const char *
+cfm_fault_reason_to_str(int reason) {
+ switch (reason) {
+#define CFM_FAULT_REASON(NAME, STR) case CFM_FAULT_##NAME: return #STR;
+ CFM_FAULT_REASONS
+#undef CFM_FAULT_REASON
+ default: return "";
+ }
+}
+
+static void
+ds_put_cfm_fault(struct ds *ds, int fault)
+{
+ size_t length = ds->length;
+ int i;
+
+ for (i = 0; i < CFM_FAULT_N_REASONS; i++) {
+ int reason = 1 << i;
+
+ if (fault & reason) {
+ ds_put_format(ds, "%s ", cfm_fault_reason_to_str(reason));
+ }
+ }
+
+ if (ds->length > length) {
+ ds_truncate(ds, ds->length - 1);
+ }
+}
+
static void
cfm_generate_maid(struct cfm *cfm)
{
@@ -291,8 +321,8 @@ cfm_run(struct cfm *cfm)
struct remote_mp *rmp, *rmp_next;
bool old_cfm_fault = cfm->fault;
- cfm->fault = cfm->unexpected_recv;
- cfm->unexpected_recv = false;
+ cfm->fault = cfm->recv_fault;
+ cfm->recv_fault = 0;
cfm->rmps_array_len = 0;
free(cfm->rmps_array);
@@ -313,13 +343,13 @@ cfm_run(struct cfm *cfm)
if (rmp->mpid == cfm->mpid) {
VLOG_WARN_RL(&rl,"%s: received CCM with local MPID"
" %"PRIu64, cfm->name, rmp->mpid);
- cfm->fault = true;
+ cfm->fault |= CFM_FAULT_LOOPBACK;
}
if (rmp->rdi) {
VLOG_DBG("%s: RDI bit flagged from RMP %"PRIu64, cfm->name,
rmp->mpid);
- cfm->fault = true;
+ cfm->fault |= CFM_FAULT_RDI;
}
if (!rmp->opup) {
@@ -331,12 +361,16 @@ cfm_run(struct cfm *cfm)
}
if (hmap_is_empty(&cfm->remote_mps)) {
- cfm->fault = true;
+ cfm->fault |= CFM_FAULT_RECV;
}
if (old_cfm_fault != cfm->fault) {
- VLOG_INFO_RL(&rl, "%s: CFM fault status changed to %s",
- cfm->name, cfm->fault ? "true" : "false");
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ ds_put_cfm_fault(&ds, cfm->fault);
+ VLOG_INFO_RL(&rl, "%s: CFM fault status changed: %s", cfm->name,
+ ds_cstr_ro(&ds));
+ ds_destroy(&ds);
}
timer_set_duration(&cfm->fault_timer, interval);
@@ -481,7 +515,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
* bonds. Furthermore, faults can be maliciously triggered by crafting
* invalid CCMs. */
if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) {
- cfm->unexpected_recv = true;
+ cfm->recv_fault |= CFM_FAULT_MAID;
VLOG_WARN_RL(&rl, "%s: Received unexpected remote MAID from MAC "
ETH_ADDR_FMT, cfm->name, ETH_ADDR_ARGS(eth->eth_src));
} else {
@@ -522,7 +556,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
rmp = xzalloc(sizeof *rmp);
hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid));
} else {
- cfm->unexpected_recv = true;
+ cfm->recv_fault |= CFM_FAULT_OVERFLOW;
VLOG_WARN_RL(&rl,
"%s: dropped CCM with MPID %"PRIu64" from MAC "
ETH_ADDR_FMT, cfm->name, ccm_mpid,
@@ -551,13 +585,14 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
}
}
-/* Gets the fault status of 'cfm'. Returns true when 'cfm' has detected
- * connectivity problems, false otherwise. */
-bool
+/* Gets the fault status of 'cfm'. Returns a bit mask of 'cfm_fault_reason's
+ * indicating the cause of the connectivity fault, or zero if there is no
+ * fault. */
+int
cfm_get_fault(const struct cfm *cfm)
{
if (cfm->fault_override >= 0) {
- return cfm->fault_override;
+ return cfm->fault_override ? CFM_FAULT_OVERRIDE : 0;
}
return cfm->fault;
}
@@ -602,11 +637,16 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm)
struct remote_mp *rmp;
ds_put_format(ds, "---- %s ----\n", cfm->name);
- ds_put_format(ds, "MPID %"PRIu64":%s%s%s%s\n", cfm->mpid,
+ ds_put_format(ds, "MPID %"PRIu64":%s%s\n", cfm->mpid,
cfm->extended ? " extended" : "",
- cfm_get_fault(cfm) ? " fault" : "",
- cfm->fault_override >= 0 ? " fault_override" : "",
- cfm->unexpected_recv ? " unexpected_recv" : "");
+ cfm->fault_override >= 0 ? " fault_override" : "");
+
+
+ if (cfm_get_fault(cfm)) {
+ ds_put_cstr(ds, "\tfault: ");
+ ds_put_cfm_fault(ds, cfm_get_fault(cfm));
+ ds_put_cstr(ds, "\n");
+ }
ds_put_format(ds, "\topstate: %s\n", cfm->opup ? "up" : "down");
ds_put_format(ds, "\tremote_opstate: %s\n",
diff --git a/lib/cfm.h b/lib/cfm.h
index 5106a51b..6d23293f 100644
--- a/lib/cfm.h
+++ b/lib/cfm.h
@@ -24,6 +24,28 @@
struct flow;
struct ofpbuf;
+#define CFM_FAULT_REASONS \
+ CFM_FAULT_REASON(RECV, recv) \
+ CFM_FAULT_REASON(RDI, rdi) \
+ CFM_FAULT_REASON(MAID, maid) \
+ CFM_FAULT_REASON(LOOPBACK, loopback) \
+ CFM_FAULT_REASON(OVERFLOW, overflow) \
+ CFM_FAULT_REASON(OVERRIDE, override)
+
+enum cfm_fault_bit_index {
+#define CFM_FAULT_REASON(NAME, STR) CFM_FAULT_INDEX_##NAME,
+ CFM_FAULT_REASONS
+#undef CFM_FAULT_REASON
+ CFM_FAULT_N_REASONS
+};
+
+enum cfm_fault_reason {
+#define CFM_FAULT_REASON(NAME, STR) \
+ CFM_FAULT_##NAME = 1 << CFM_FAULT_INDEX_##NAME,
+ CFM_FAULT_REASONS
+#undef CFM_FAULT_REASON
+};
+
struct cfm_settings {
uint64_t mpid; /* The MPID of this CFM. */
int interval; /* The requested transmission interval. */
@@ -43,9 +65,10 @@ void cfm_wait(struct cfm *);
bool cfm_configure(struct cfm *, const struct cfm_settings *);
bool cfm_should_process_flow(const struct cfm *cfm, const struct flow *);
void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet);
-bool cfm_get_fault(const struct cfm *);
+int cfm_get_fault(const struct cfm *);
bool cfm_get_opup(const struct cfm *);
void cfm_get_remote_mpids(const struct cfm *, const uint64_t **rmps,
size_t *n_rmps);
+const char *cfm_fault_reason_to_str(int fault);
#endif /* cfm.h */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index cb97188a..3927a2e9 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -961,9 +961,10 @@ struct ofproto_class {
* support CFM, as does a null pointer. */
int (*set_cfm)(struct ofport *ofport, const struct cfm_settings *s);
- /* Checks the fault status of CFM configured on 'ofport'. Returns 1 if CFM
- * is faulted (generally indicating a connectivity problem), 0 if CFM is
- * not faulted, or -1 if CFM is not enabled on 'port'
+ /* Checks the fault status of CFM configured on 'ofport'. Returns a
+ * bitmask of 'cfm_fault_reason's to indicate a CFM fault (generally
+ * indicating a connectivity problem). Returns zero if CFM is not faulted,
+ * and -1 if CFM is not enabled on 'port'.
*
* This function may be a null pointer if the ofproto implementation does
* not support CFM. */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 473ae41b..7e94b219 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2449,9 +2449,10 @@ ofproto_get_netflow_ids(const struct ofproto *ofproto,
ofproto->ofproto_class->get_netflow_ids(ofproto, engine_type, engine_id);
}
-/* Checks the fault status of CFM for 'ofp_port' within 'ofproto'. Returns 1
- * if CFM is faulted (generally indiciating a connectivity problem), 0 if CFM
- * is not faulted, and -1 if CFM is not enabled on 'ofp_port'. */
+/* Checks the fault status of CFM for 'ofp_port' within 'ofproto'. Returns a
+ * bitmask of 'cfm_fault_reason's to indicate a CFM fault (generally
+ * indicating a connectivity problem). Returns zero if CFM is not faulted,
+ * and -1 if CFM is not enabled on 'port'. */
int
ofproto_port_get_cfm_fault(const struct ofproto *ofproto, uint16_t ofp_port)
{
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index c175c58f..15fb632d 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -285,6 +285,7 @@ bridge_init(const char *remote)
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_statistics);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_status);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault);
+ ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault_status);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current);
ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids);
@@ -1554,10 +1555,23 @@ iface_refresh_cfm_stats(struct iface *iface)
fault = ofproto_port_get_cfm_fault(iface->port->bridge->ofproto,
iface->ofp_port);
if (fault >= 0) {
+ const char *reasons[CFM_FAULT_N_REASONS];
bool fault_bool = fault;
+ size_t i, j;
+
+ j = 0;
+ for (i = 0; i < CFM_FAULT_N_REASONS; i++) {
+ int reason = 1 << i;
+ if (fault & reason) {
+ reasons[j++] = cfm_fault_reason_to_str(reason);
+ }
+ }
+
ovsrec_interface_set_cfm_fault(cfg, &fault_bool, 1);
+ ovsrec_interface_set_cfm_fault_status(cfg, (char **) reasons, j);
} else {
ovsrec_interface_set_cfm_fault(cfg, NULL, 0);
+ ovsrec_interface_set_cfm_fault_status(cfg, NULL, 0);
}
error = ofproto_port_get_cfm_remote_mpids(iface->port->bridge->ofproto,
@@ -3046,6 +3060,7 @@ iface_clear_db_record(const struct ovsrec_interface *if_cfg)
ovsrec_interface_set_link_state(if_cfg, NULL);
ovsrec_interface_set_mtu(if_cfg, NULL, 0);
ovsrec_interface_set_cfm_fault(if_cfg, NULL, 0);
+ ovsrec_interface_set_cfm_fault_status(if_cfg, NULL, 0);
ovsrec_interface_set_cfm_remote_mpids(if_cfg, NULL, 0);
ovsrec_interface_set_lacp_current(if_cfg, NULL, 0);
ovsrec_interface_set_statistics(if_cfg, NULL, NULL, 0);
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 6f2c458f..c260c43d 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
{"name": "Open_vSwitch",
- "version": "6.5.0",
- "cksum": "2847700438 16419",
+ "version": "6.6.0",
+ "cksum": "3676036878 16515",
"tables": {
"Open_vSwitch": {
"columns": {
@@ -205,6 +205,8 @@
"min": 0,
"max": 1},
"ephemeral": true},
+ "cfm_fault_status": {
+ "type": {"key": "string", "min": 0, "max": "unlimited"}},
"lacp_current": {
"type": {"key": {"type": "boolean"},
"min": 0, "max": 1},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 7be78911..9e38d7f2 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1699,6 +1699,43 @@
+
+ Indicates a CFM fault was triggered due to a lack of CCMs received on
+ the .
+
+
+
+ Indicates a CFM fault was triggered due to the reception of a CCM with
+ the RDI bit flagged. Endpoints set the RDI bit in their CCMs when they
+ are not receiving CCMs themselves. This typically indicates a
+ unidirectional connectivity failure.
+
+
+
+ Indicates a CFM fault was triggered due to the reception of a CCM with
+ a MAID other than the one Open vSwitch uses. CFM broadcasts are tagged
+ with an identification number in addition to the MPID called the MAID.
+ Open vSwitch only supports receiving CCM broadcasts tagged with the
+ MAID it uses internally.
+
+
+
+ Indicates a CFM fault was triggered due to the reception of a CCM
+ advertising the same MPID configured in the
+ column of this . This may indicate a loop in
+ the network.
+
+
+
+ Indicates a CFM fault was triggered because the CFM module received
+ CCMs from more remote endpoints than it can keep track of.
+
+
+
+ Indicates a CFM fault was manually triggered by an administrator using
+ an ovs-appctl
command.
+
+
When CFM is properly configured, Open vSwitch will occasionally
receive CCM broadcasts. These broadcasts contain the MPID of the