From 8210113e95b679acc5e08ac81c1b8580b7912ab6 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Mon, 16 Jan 2012 16:42:08 -0800 Subject: [PATCH] cfm: Log more aggressively amidst packet loss. While debugging some issues today it became clear that it would be useful to log when the CFM fault status changes and when packets are lost. The CFM module logs pretty aggressively when in debug mode, but this can be chatty and most systems don't operate under this logging level for extended periods of time. This patch logs when CCMs are received which indicate reordering or packet loss and when the CFM fault status changed. Requested-by: Jacob Cherkas Signed-off-by: Ethan Jackson --- AUTHORS | 1 + lib/cfm.c | 29 +++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/AUTHORS b/AUTHORS index fbf83552..4479aebb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -88,6 +88,7 @@ Gregor Schaffrath grsch@net.t-labs.tu-berlin.de Hassan Khan hassan.khan@seecs.edu.pk Hector Oron hector.oron@gmail.com Henrik Amren henrik@nicira.com +Jacob Cherkas jcherkas@nicira.com Jad Naous jnaous@gmail.com Jamal Hadi Salim hadi@cyberus.ca Jan Medved jmedved@juniper.net diff --git a/lib/cfm.c b/lib/cfm.c index d2995a58..1e05853d 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -117,9 +117,10 @@ struct remote_mp { bool rdi; /* Remote Defect Indicator. Indicates remote_mp isn't receiving CCMs that it's expecting to. */ bool opup; /* Operational State. */ + uint32_t seq; /* Most recently received sequence number. */ }; -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 30); static struct hmap all_cfms = HMAP_INITIALIZER(&all_cfms); static unixctl_cb_func cfm_unixctl_show; @@ -280,6 +281,7 @@ cfm_run(struct cfm *cfm) if (timer_expired(&cfm->fault_timer)) { long long int interval = cfm_fault_interval(cfm); struct remote_mp *rmp, *rmp_next; + bool old_cfm_fault = cfm->fault; cfm->fault = cfm->unexpected_recv; cfm->unexpected_recv = false; @@ -324,6 +326,11 @@ cfm_run(struct cfm *cfm) cfm->fault = true; } + if (old_cfm_fault != cfm->fault) { + VLOG_INFO_RL(&rl, "%s: CFM fault status changed to %s", + cfm->name, cfm->fault ? "true" : "false"); + } + timer_set_duration(&cfm->fault_timer, interval); } } @@ -474,6 +481,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) struct remote_mp *rmp; uint64_t ccm_mpid; + uint32_t ccm_seq; bool ccm_opdown; if (cfm->extended) { @@ -483,6 +491,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) ccm_mpid = ntohs(ccm->mpid); ccm_opdown = false; } + ccm_seq = ntohl(ccm->seq); if (ccm_interval != cfm->ccm_interval) { VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid interval" @@ -500,7 +509,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) rmp = lookup_remote_mp(cfm, ccm_mpid); if (!rmp) { if (hmap_count(&cfm->remote_mps) < CFM_MAX_RMPS) { - rmp = xmalloc(sizeof *rmp); + rmp = xzalloc(sizeof *rmp); hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid)); } else { cfm->unexpected_recv = true; @@ -511,16 +520,24 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) } } + VLOG_DBG("%s: received CCM (seq %"PRIu32") (mpid %"PRIu64")" + " (interval %"PRIu8") (RDI %s)", cfm->name, ccm_seq, + ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false"); + if (rmp) { + if (rmp->seq && ccm_seq != (rmp->seq + 1)) { + VLOG_WARN_RL(&rl, "%s: (mpid %"PRIu64") detected sequence" + " numbers which indicate possible connectivity" + " problems (previous %"PRIu32") (current %"PRIu32 + ")", cfm->name, ccm_mpid, rmp->seq, ccm_seq); + } + rmp->mpid = ccm_mpid; rmp->recv = true; + rmp->seq = ccm_seq; rmp->rdi = ccm_rdi; rmp->opup = !ccm_opdown; } - - VLOG_DBG("%s: received CCM (seq %"PRIu32") (mpid %"PRIu64")" - " (interval %"PRIu8") (RDI %s)", cfm->name, ntohl(ccm->seq), - ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false"); } } -- 2.30.2