From: Ethan Jackson Date: Tue, 7 Feb 2012 22:35:09 +0000 (-0800) Subject: cfm: Expose detailed fault status in the database. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b93803967e38ee4b65cd1e720decd8dcbb58d04f;p=openvswitch cfm: Expose detailed fault status in the database. The cfm_fault column of the database is the logical OR of a number of reasons that CFM can be in a faulted state. A controller may want to have more specific information in which case it can look at the cfm_fault_status column which this patch adds. Signed-off-by: Ethan Jackson --- diff --git a/lib/cfm.c b/lib/cfm.c index 537eeaa1..d373f422 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -85,8 +85,8 @@ struct cfm { uint64_t mpid; bool extended; /* Extended mode. */ - bool fault; /* Indicates connectivity fault. */ - bool unexpected_recv; /* Received an unexpected CCM. */ + int fault; /* Connectivity fault status. */ + int recv_fault; /* Bit mask of faults occuring on receive. */ bool opup; /* Operational State. */ bool remote_opup; /* Remote Operational State. */ @@ -136,6 +136,36 @@ cfm_ccm_addr(const struct cfm *cfm) return cfm->extended ? eth_addr_ccm_x : eth_addr_ccm; } +/* Returns the string representation of the given cfm_fault_reason 'reason'. */ +const char * +cfm_fault_reason_to_str(int reason) { + switch (reason) { +#define CFM_FAULT_REASON(NAME, STR) case CFM_FAULT_##NAME: return #STR; + CFM_FAULT_REASONS +#undef CFM_FAULT_REASON + default: return ""; + } +} + +static void +ds_put_cfm_fault(struct ds *ds, int fault) +{ + size_t length = ds->length; + int i; + + for (i = 0; i < CFM_FAULT_N_REASONS; i++) { + int reason = 1 << i; + + if (fault & reason) { + ds_put_format(ds, "%s ", cfm_fault_reason_to_str(reason)); + } + } + + if (ds->length > length) { + ds_truncate(ds, ds->length - 1); + } +} + static void cfm_generate_maid(struct cfm *cfm) { @@ -291,8 +321,8 @@ cfm_run(struct cfm *cfm) struct remote_mp *rmp, *rmp_next; bool old_cfm_fault = cfm->fault; - cfm->fault = cfm->unexpected_recv; - cfm->unexpected_recv = false; + cfm->fault = cfm->recv_fault; + cfm->recv_fault = 0; cfm->rmps_array_len = 0; free(cfm->rmps_array); @@ -313,13 +343,13 @@ cfm_run(struct cfm *cfm) if (rmp->mpid == cfm->mpid) { VLOG_WARN_RL(&rl,"%s: received CCM with local MPID" " %"PRIu64, cfm->name, rmp->mpid); - cfm->fault = true; + cfm->fault |= CFM_FAULT_LOOPBACK; } if (rmp->rdi) { VLOG_DBG("%s: RDI bit flagged from RMP %"PRIu64, cfm->name, rmp->mpid); - cfm->fault = true; + cfm->fault |= CFM_FAULT_RDI; } if (!rmp->opup) { @@ -331,12 +361,16 @@ cfm_run(struct cfm *cfm) } if (hmap_is_empty(&cfm->remote_mps)) { - cfm->fault = true; + cfm->fault |= CFM_FAULT_RECV; } if (old_cfm_fault != cfm->fault) { - VLOG_INFO_RL(&rl, "%s: CFM fault status changed to %s", - cfm->name, cfm->fault ? "true" : "false"); + struct ds ds = DS_EMPTY_INITIALIZER; + + ds_put_cfm_fault(&ds, cfm->fault); + VLOG_INFO_RL(&rl, "%s: CFM fault status changed: %s", cfm->name, + ds_cstr_ro(&ds)); + ds_destroy(&ds); } timer_set_duration(&cfm->fault_timer, interval); @@ -481,7 +515,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) * bonds. Furthermore, faults can be maliciously triggered by crafting * invalid CCMs. */ if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) { - cfm->unexpected_recv = true; + cfm->recv_fault |= CFM_FAULT_MAID; VLOG_WARN_RL(&rl, "%s: Received unexpected remote MAID from MAC " ETH_ADDR_FMT, cfm->name, ETH_ADDR_ARGS(eth->eth_src)); } else { @@ -522,7 +556,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) rmp = xzalloc(sizeof *rmp); hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid)); } else { - cfm->unexpected_recv = true; + cfm->recv_fault |= CFM_FAULT_OVERFLOW; VLOG_WARN_RL(&rl, "%s: dropped CCM with MPID %"PRIu64" from MAC " ETH_ADDR_FMT, cfm->name, ccm_mpid, @@ -551,13 +585,14 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) } } -/* Gets the fault status of 'cfm'. Returns true when 'cfm' has detected - * connectivity problems, false otherwise. */ -bool +/* Gets the fault status of 'cfm'. Returns a bit mask of 'cfm_fault_reason's + * indicating the cause of the connectivity fault, or zero if there is no + * fault. */ +int cfm_get_fault(const struct cfm *cfm) { if (cfm->fault_override >= 0) { - return cfm->fault_override; + return cfm->fault_override ? CFM_FAULT_OVERRIDE : 0; } return cfm->fault; } @@ -602,11 +637,16 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm) struct remote_mp *rmp; ds_put_format(ds, "---- %s ----\n", cfm->name); - ds_put_format(ds, "MPID %"PRIu64":%s%s%s%s\n", cfm->mpid, + ds_put_format(ds, "MPID %"PRIu64":%s%s\n", cfm->mpid, cfm->extended ? " extended" : "", - cfm_get_fault(cfm) ? " fault" : "", - cfm->fault_override >= 0 ? " fault_override" : "", - cfm->unexpected_recv ? " unexpected_recv" : ""); + cfm->fault_override >= 0 ? " fault_override" : ""); + + + if (cfm_get_fault(cfm)) { + ds_put_cstr(ds, "\tfault: "); + ds_put_cfm_fault(ds, cfm_get_fault(cfm)); + ds_put_cstr(ds, "\n"); + } ds_put_format(ds, "\topstate: %s\n", cfm->opup ? "up" : "down"); ds_put_format(ds, "\tremote_opstate: %s\n", diff --git a/lib/cfm.h b/lib/cfm.h index 5106a51b..6d23293f 100644 --- a/lib/cfm.h +++ b/lib/cfm.h @@ -24,6 +24,28 @@ struct flow; struct ofpbuf; +#define CFM_FAULT_REASONS \ + CFM_FAULT_REASON(RECV, recv) \ + CFM_FAULT_REASON(RDI, rdi) \ + CFM_FAULT_REASON(MAID, maid) \ + CFM_FAULT_REASON(LOOPBACK, loopback) \ + CFM_FAULT_REASON(OVERFLOW, overflow) \ + CFM_FAULT_REASON(OVERRIDE, override) + +enum cfm_fault_bit_index { +#define CFM_FAULT_REASON(NAME, STR) CFM_FAULT_INDEX_##NAME, + CFM_FAULT_REASONS +#undef CFM_FAULT_REASON + CFM_FAULT_N_REASONS +}; + +enum cfm_fault_reason { +#define CFM_FAULT_REASON(NAME, STR) \ + CFM_FAULT_##NAME = 1 << CFM_FAULT_INDEX_##NAME, + CFM_FAULT_REASONS +#undef CFM_FAULT_REASON +}; + struct cfm_settings { uint64_t mpid; /* The MPID of this CFM. */ int interval; /* The requested transmission interval. */ @@ -43,9 +65,10 @@ void cfm_wait(struct cfm *); bool cfm_configure(struct cfm *, const struct cfm_settings *); bool cfm_should_process_flow(const struct cfm *cfm, const struct flow *); void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet); -bool cfm_get_fault(const struct cfm *); +int cfm_get_fault(const struct cfm *); bool cfm_get_opup(const struct cfm *); void cfm_get_remote_mpids(const struct cfm *, const uint64_t **rmps, size_t *n_rmps); +const char *cfm_fault_reason_to_str(int fault); #endif /* cfm.h */ diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index cb97188a..3927a2e9 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -961,9 +961,10 @@ struct ofproto_class { * support CFM, as does a null pointer. */ int (*set_cfm)(struct ofport *ofport, const struct cfm_settings *s); - /* Checks the fault status of CFM configured on 'ofport'. Returns 1 if CFM - * is faulted (generally indicating a connectivity problem), 0 if CFM is - * not faulted, or -1 if CFM is not enabled on 'port' + /* Checks the fault status of CFM configured on 'ofport'. Returns a + * bitmask of 'cfm_fault_reason's to indicate a CFM fault (generally + * indicating a connectivity problem). Returns zero if CFM is not faulted, + * and -1 if CFM is not enabled on 'port'. * * This function may be a null pointer if the ofproto implementation does * not support CFM. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 473ae41b..7e94b219 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2449,9 +2449,10 @@ ofproto_get_netflow_ids(const struct ofproto *ofproto, ofproto->ofproto_class->get_netflow_ids(ofproto, engine_type, engine_id); } -/* Checks the fault status of CFM for 'ofp_port' within 'ofproto'. Returns 1 - * if CFM is faulted (generally indiciating a connectivity problem), 0 if CFM - * is not faulted, and -1 if CFM is not enabled on 'ofp_port'. */ +/* Checks the fault status of CFM for 'ofp_port' within 'ofproto'. Returns a + * bitmask of 'cfm_fault_reason's to indicate a CFM fault (generally + * indicating a connectivity problem). Returns zero if CFM is not faulted, + * and -1 if CFM is not enabled on 'port'. */ int ofproto_port_get_cfm_fault(const struct ofproto *ofproto, uint16_t ofp_port) { diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index c175c58f..15fb632d 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -285,6 +285,7 @@ bridge_init(const char *remote) ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_statistics); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault); + ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current); ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids); @@ -1554,10 +1555,23 @@ iface_refresh_cfm_stats(struct iface *iface) fault = ofproto_port_get_cfm_fault(iface->port->bridge->ofproto, iface->ofp_port); if (fault >= 0) { + const char *reasons[CFM_FAULT_N_REASONS]; bool fault_bool = fault; + size_t i, j; + + j = 0; + for (i = 0; i < CFM_FAULT_N_REASONS; i++) { + int reason = 1 << i; + if (fault & reason) { + reasons[j++] = cfm_fault_reason_to_str(reason); + } + } + ovsrec_interface_set_cfm_fault(cfg, &fault_bool, 1); + ovsrec_interface_set_cfm_fault_status(cfg, (char **) reasons, j); } else { ovsrec_interface_set_cfm_fault(cfg, NULL, 0); + ovsrec_interface_set_cfm_fault_status(cfg, NULL, 0); } error = ofproto_port_get_cfm_remote_mpids(iface->port->bridge->ofproto, @@ -3046,6 +3060,7 @@ iface_clear_db_record(const struct ovsrec_interface *if_cfg) ovsrec_interface_set_link_state(if_cfg, NULL); ovsrec_interface_set_mtu(if_cfg, NULL, 0); ovsrec_interface_set_cfm_fault(if_cfg, NULL, 0); + ovsrec_interface_set_cfm_fault_status(if_cfg, NULL, 0); ovsrec_interface_set_cfm_remote_mpids(if_cfg, NULL, 0); ovsrec_interface_set_lacp_current(if_cfg, NULL, 0); ovsrec_interface_set_statistics(if_cfg, NULL, NULL, 0); diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index 6f2c458f..c260c43d 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -1,6 +1,6 @@ {"name": "Open_vSwitch", - "version": "6.5.0", - "cksum": "2847700438 16419", + "version": "6.6.0", + "cksum": "3676036878 16515", "tables": { "Open_vSwitch": { "columns": { @@ -205,6 +205,8 @@ "min": 0, "max": 1}, "ephemeral": true}, + "cfm_fault_status": { + "type": {"key": "string", "min": 0, "max": "unlimited"}}, "lacp_current": { "type": {"key": {"type": "boolean"}, "min": 0, "max": 1}, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 7be78911..9e38d7f2 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1699,6 +1699,43 @@

+ + Indicates a CFM fault was triggered due to a lack of CCMs received on + the . + + + + Indicates a CFM fault was triggered due to the reception of a CCM with + the RDI bit flagged. Endpoints set the RDI bit in their CCMs when they + are not receiving CCMs themselves. This typically indicates a + unidirectional connectivity failure. + + + + Indicates a CFM fault was triggered due to the reception of a CCM with + a MAID other than the one Open vSwitch uses. CFM broadcasts are tagged + with an identification number in addition to the MPID called the MAID. + Open vSwitch only supports receiving CCM broadcasts tagged with the + MAID it uses internally. + + + + Indicates a CFM fault was triggered due to the reception of a CCM + advertising the same MPID configured in the + column of this . This may indicate a loop in + the network. + + + + Indicates a CFM fault was triggered because the CFM module received + CCMs from more remote endpoints than it can keep track of. + + + + Indicates a CFM fault was manually triggered by an administrator using + an ovs-appctl command. + + When CFM is properly configured, Open vSwitch will occasionally receive CCM broadcasts. These broadcasts contain the MPID of the