From c75b7e39d973cc9f4869c84d48eeb3b66afb2971 Mon Sep 17 00:00:00 2001 From: Mehak Mahajan Date: Thu, 29 Mar 2012 14:34:51 -0700 Subject: [PATCH] Granular link health statistics for cfm. The changes display the cfm_health of an interface. The cfm_health is an exponential weighted moving average of the health of all remote_mpids. The value can vary from 0 to 100, 100 being very healthy and 0 being unhealthy. Feature #10363 Requested-by: Ethan Jackson Signed-off-by: Mehak Mahajan --- NEWS | 2 ++ lib/cfm.c | 69 ++++++++++++++++++++++++++++++++++++++ lib/cfm.h | 1 + ofproto/ofproto-dpif.c | 9 +++++ ofproto/ofproto-provider.h | 11 ++++++ ofproto/ofproto.c | 13 +++++++ ofproto/ofproto.h | 3 +- vswitchd/bridge.c | 11 ++++++ vswitchd/vswitch.ovsschema | 9 +++-- vswitchd/vswitch.xml | 21 ++++++++++++ 10 files changed, 146 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index a466f923..ed3fc888 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ post-v1.6.0 - Added ability to configure dscp setting for manager and controller connections. By default, these connections have a DSCP value of Internetwork Control (0xc0). + - Added the granular link health statistics, 'cfm_health', to an + interface. v1.6.0 - xx xxx xxxx diff --git a/lib/cfm.c b/lib/cfm.c index 8b9e5bc5..ea39e27a 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -60,6 +60,7 @@ static const uint8_t eth_addr_ccm_x[6] = { #define CCM_MAID_LEN 48 #define CCM_OPCODE 1 /* CFM message opcode meaning CCM. */ #define CCM_RDI_MASK 0x80 +#define CFM_HEALTH_INTERVAL 6 struct ccm { uint8_t mdlevel_version; /* MD Level and Version */ uint8_t opcode; @@ -111,6 +112,12 @@ struct cfm { * avoid flapping. */ uint64_t *rmps_array; /* Cache of remote_mps. */ size_t rmps_array_len; /* Number of rmps in 'rmps_array'. */ + + int health; /* Percentage of the number of CCM frames + received. */ + int health_interval; /* Number of fault_intervals since health was + recomputed. */ + }; /* Remote MPs represent foreign network entities that are configured to have @@ -124,6 +131,9 @@ struct remote_mp { receiving CCMs that it's expecting to. */ bool opup; /* Operational State. */ uint32_t seq; /* Most recently received sequence number. */ + uint8_t num_health_ccm; /* Number of received ccm frames every + CFM_HEALTH_INTERVAL * 'fault_interval'. */ + }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 30); @@ -290,6 +300,7 @@ cfm_create(const char *name) hmap_insert(&all_cfms, &cfm->hmap_node, hash_string(cfm->name, 0)); cfm->remote_opup = true; cfm->fault_override = -1; + cfm->health = -1; return cfm; } @@ -332,6 +343,37 @@ cfm_run(struct cfm *cfm) sizeof *cfm->rmps_array); cfm->remote_opup = true; + if (cfm->health_interval == CFM_HEALTH_INTERVAL) { + /* Calculate the cfm health of the interface. If the number of + * remote_mpids of a cfm interface is > 1, the cfm health is + * undefined. If the number of remote_mpids is 1, the cfm health is + * the percentage of the ccm frames received in the + * (CFM_HEALTH_INTERVAL * 3.5)ms, else it is 0. */ + if (hmap_count(&cfm->remote_mps) > 1) { + cfm->health = -1; + } else if (hmap_is_empty(&cfm->remote_mps)) { + cfm->health = 0; + } else { + int exp_ccm_recvd; + + rmp = CONTAINER_OF(hmap_first(&cfm->remote_mps), + struct remote_mp, node); + exp_ccm_recvd = (CFM_HEALTH_INTERVAL * 7) / 2; + /* Calculate the percentage of healthy ccm frames received. + * Since the 'fault_interval' is (3.5 * cfm_interval), and + * 1 CCM packet must be received every cfm_interval, + * the 'remote_mpid' health reports the percentage of + * healthy CCM frames received every + * 'CFM_HEALTH_INTERVAL'th 'fault_interval'. */ + cfm->health = (rmp->num_health_ccm * 100) / exp_ccm_recvd; + cfm->health = MIN(cfm->health, 100); + rmp->num_health_ccm = 0; + assert(cfm->health >= 0 && cfm->health <= 100); + } + cfm->health_interval = 0; + } + cfm->health_interval++; + HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) { if (!rmp->recv) { @@ -535,6 +577,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) uint64_t ccm_mpid; uint32_t ccm_seq; bool ccm_opdown; + bool fault = false; if (cfm->extended) { ccm_mpid = ntohll(ccm->mpid64); @@ -549,6 +592,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid interval" " (%"PRIu8") from RMP %"PRIu64, cfm->name, ccm_interval, ccm_mpid); + fault = true; } if (cfm->extended && ccm_interval == 0 @@ -556,6 +600,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid extended" " interval (%"PRIu16"ms) from RMP %"PRIu64, cfm->name, ccm_interval_ms_x, ccm_mpid); + fault = true; } rmp = lookup_remote_mp(cfm, ccm_mpid); @@ -569,6 +614,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) "%s: dropped CCM with MPID %"PRIu64" from MAC " ETH_ADDR_FMT, cfm->name, ccm_mpid, ETH_ADDR_ARGS(eth->eth_src)); + fault = true; } } @@ -576,16 +622,23 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) " (interval %"PRIu8") (RDI %s)", cfm->name, ccm_seq, ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false"); + if (ccm_rdi) { + fault = true; + } if (rmp) { if (rmp->seq && ccm_seq != (rmp->seq + 1)) { VLOG_WARN_RL(&rl, "%s: (mpid %"PRIu64") detected sequence" " numbers which indicate possible connectivity" " problems (previous %"PRIu32") (current %"PRIu32 ")", cfm->name, ccm_mpid, rmp->seq, ccm_seq); + fault = true; } rmp->mpid = ccm_mpid; rmp->recv = true; + if (!fault) { + rmp->num_health_ccm++; + } rmp->seq = ccm_seq; rmp->rdi = ccm_rdi; rmp->opup = !ccm_opdown; @@ -605,6 +658,17 @@ cfm_get_fault(const struct cfm *cfm) return cfm->fault; } +/* Gets the health of 'cfm'. Returns an integer between 0 and 100 indicating + * the health of the link as a percentage of ccm frames received in + * CFM_HEALTH_INTERVAL * 'fault_interval' if there is only 1 remote_mpid, + * returns 0 if there are no remote_mpids, and returns -1 if there are more + * than 1 remote_mpids. */ +int +cfm_get_health(const struct cfm *cfm) +{ + return cfm->health; +} + /* Gets the operational state of 'cfm'. 'cfm' is considered operationally down * if it has received a CCM with the operationally down bit set from any of its * remote maintenance points. Returns true if 'cfm' is operationally up. False @@ -656,6 +720,11 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm) ds_put_cstr(ds, "\n"); } + if (cfm->health == -1) { + ds_put_format(ds, "\taverage health: undefined\n"); + } else { + ds_put_format(ds, "\taverage health: %d\n", cfm->health); + } ds_put_format(ds, "\topstate: %s\n", cfm->opup ? "up" : "down"); ds_put_format(ds, "\tremote_opstate: %s\n", cfm->remote_opup ? "up" : "down"); diff --git a/lib/cfm.h b/lib/cfm.h index 2556a325..2b4f888f 100644 --- a/lib/cfm.h +++ b/lib/cfm.h @@ -69,6 +69,7 @@ bool cfm_configure(struct cfm *, const struct cfm_settings *); bool cfm_should_process_flow(const struct cfm *cfm, const struct flow *); void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet); int cfm_get_fault(const struct cfm *); +int cfm_get_health(const struct cfm *); bool cfm_get_opup(const struct cfm *); void cfm_get_remote_mpids(const struct cfm *, const uint64_t **rmps, size_t *n_rmps); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 51b847f2..a42d09eb 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1119,6 +1119,14 @@ get_cfm_remote_mpids(const struct ofport *ofport_, const uint64_t **rmps, return -1; } } + +static int +get_cfm_health(const struct ofport *ofport_) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + return ofport->cfm ? cfm_get_health(ofport->cfm) : -1; +} /* Spanning Tree. */ @@ -6491,6 +6499,7 @@ const struct ofproto_class ofproto_dpif_class = { set_cfm, get_cfm_fault, get_cfm_remote_mpids, + get_cfm_health, set_stp, get_stp_status, set_stp_port, diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 26904ef0..7b0e478b 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -980,6 +980,17 @@ struct ofproto_class { int (*get_cfm_remote_mpids)(const struct ofport *ofport, const uint64_t **rmps, size_t *n_rmps); + /* Checks the health of CFM configured on 'ofport'. Returns an integer + * to indicate the health percentage of the 'ofport' which is an average of + * the health of all the remote_mps. Returns an integer between 0 and 100 + * where 0 means that the 'ofport' is very unhealthy and 100 means the + * 'ofport' is perfectly healthy. Returns -1 if CFM is not enabled on + * 'port' or if the number of remote_mpids is > 1. + * + * This function may be a null pointer if the ofproto implementation does + * not support CFM. */ + int (*get_cfm_health)(const struct ofport *ofport); + /* Configures spanning tree protocol (STP) on 'ofproto' using the * settings defined in 's'. * diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index e7e04012..f9343069 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2481,6 +2481,19 @@ ofproto_port_get_cfm_remote_mpids(const struct ofproto *ofproto, : -1); } +/* Checks the health of the CFM for 'ofp_port' within 'ofproto'. Returns an + * integer value between 0 and 100 to indicate the health of the port as a + * percentage which is the average of cfm health of all the remote_mpids or + * returns -1 if CFM is not enabled on 'ofport'. */ +int +ofproto_port_get_cfm_health(const struct ofproto *ofproto, uint16_t ofp_port) +{ + struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); + return (ofport && ofproto->ofproto_class->get_cfm_health + ? ofproto->ofproto_class->get_cfm_health(ofport) + : -1); +} + static enum ofperr handle_aggregate_stats_request(struct ofconn *ofconn, const struct ofp_stats_msg *osm) diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 6172f291..c40f5d3a 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -348,7 +348,8 @@ int ofproto_port_get_cfm_fault(const struct ofproto *, uint16_t ofp_port); int ofproto_port_get_cfm_remote_mpids(const struct ofproto *, uint16_t ofp_port, const uint64_t **rmps, size_t *n_rmps); - +int ofproto_port_get_cfm_health(const struct ofproto *ofproto, + uint16_t ofp_port); void ofproto_get_ofproto_controller_info(const struct ofproto *, struct shash *); void ofproto_free_ofproto_controller_info(struct shash *); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index adc3b47c..cf1d279b 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -279,6 +279,7 @@ bridge_init(const char *remote) ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids); + ovsdb_idl_omit_alert(idl, &ovsrec_interface_set_cfm_health); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current); ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids); @@ -1547,6 +1548,7 @@ iface_refresh_cfm_stats(struct iface *iface) int fault, error; const uint64_t *rmps; size_t n_rmps; + int health; if (iface_is_synthetic(iface)) { return; @@ -1582,6 +1584,15 @@ iface_refresh_cfm_stats(struct iface *iface) } else { ovsrec_interface_set_cfm_remote_mpids(cfg, NULL, 0); } + + health = ofproto_port_get_cfm_health(iface->port->bridge->ofproto, + iface->ofp_port); + if (health >= 0) { + int64_t cfm_health = health; + ovsrec_interface_set_cfm_health(cfg, &cfm_health, 1); + } else { + ovsrec_interface_set_cfm_health(cfg, NULL, 0); + } } static void diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index a3847e77..c7e1ac9e 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -1,6 +1,6 @@ {"name": "Open_vSwitch", - "version": "6.8.0", - "cksum": "4106006492 16485", + "version": "6.9.0", + "cksum": "617116616 16682", "tables": { "Open_vSwitch": { "columns": { @@ -197,6 +197,11 @@ "ephemeral": true}, "cfm_fault_status": { "type": {"key": "string", "min": 0, "max": "unlimited"}}, + "cfm_health": { + "type": {"key": {"type": "integer", + "minInteger": 0, + "maxInteger": 100}, + "min": 0, "max": 1}}, "lacp_current": { "type": {"key": {"type": "boolean"}, "min": 0, "max": 1}, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index f3ea3387..03c8539d 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1726,6 +1726,27 @@ an ovs-appctl command. + +

+ Indicates the health of the interface as a percentage of CCM frames + received over 21 s. + The health of an interface is undefined if it is communicating with + more than one . It reduces if + healthy heartbeats are not received at the expected rate, and + gradually improves as healthy heartbeats are received at the desired + rate. Every 21 s, the + health of the interface is refreshed. +

+

+ As mentioned above, the faults can be triggered for several reasons. + The link health will deteriorate even if heartbeats are received but + they are reported to be unhealthy. An unhealthy heartbeat in this + context is a heartbeat for which either some fault is set or is out + of sequence. The interface health can be 100 only on receiving + healthy heartbeats at the desired rate. +

+
+ When CFM is properly configured, Open vSwitch will occasionally receive CCM broadcasts. These broadcasts contain the MPID of the -- 2.30.2