While debugging some issues today it became clear that it would be
useful to log when the CFM fault status changes and when packets
are lost. The CFM module logs pretty aggressively when in debug
mode, but this can be chatty and most systems don't operate under
this logging level for extended periods of time. This patch logs
when CCMs are received which indicate reordering or packet loss and
when the CFM fault status changed.
Requested-by: Jacob Cherkas <jcherkas@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Hassan Khan hassan.khan@seecs.edu.pk
Hector Oron hector.oron@gmail.com
Henrik Amren henrik@nicira.com
Hassan Khan hassan.khan@seecs.edu.pk
Hector Oron hector.oron@gmail.com
Henrik Amren henrik@nicira.com
+Jacob Cherkas jcherkas@nicira.com
Jad Naous jnaous@gmail.com
Jamal Hadi Salim hadi@cyberus.ca
Jan Medved jmedved@juniper.net
Jad Naous jnaous@gmail.com
Jamal Hadi Salim hadi@cyberus.ca
Jan Medved jmedved@juniper.net
bool rdi; /* Remote Defect Indicator. Indicates remote_mp isn't
receiving CCMs that it's expecting to. */
bool opup; /* Operational State. */
bool rdi; /* Remote Defect Indicator. Indicates remote_mp isn't
receiving CCMs that it's expecting to. */
bool opup; /* Operational State. */
+ uint32_t seq; /* Most recently received sequence number. */
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 30);
static struct hmap all_cfms = HMAP_INITIALIZER(&all_cfms);
static unixctl_cb_func cfm_unixctl_show;
static struct hmap all_cfms = HMAP_INITIALIZER(&all_cfms);
static unixctl_cb_func cfm_unixctl_show;
if (timer_expired(&cfm->fault_timer)) {
long long int interval = cfm_fault_interval(cfm);
struct remote_mp *rmp, *rmp_next;
if (timer_expired(&cfm->fault_timer)) {
long long int interval = cfm_fault_interval(cfm);
struct remote_mp *rmp, *rmp_next;
+ bool old_cfm_fault = cfm->fault;
cfm->fault = cfm->unexpected_recv;
cfm->unexpected_recv = false;
cfm->fault = cfm->unexpected_recv;
cfm->unexpected_recv = false;
+ if (old_cfm_fault != cfm->fault) {
+ VLOG_INFO_RL(&rl, "%s: CFM fault status changed to %s",
+ cfm->name, cfm->fault ? "true" : "false");
+ }
+
timer_set_duration(&cfm->fault_timer, interval);
}
}
timer_set_duration(&cfm->fault_timer, interval);
}
}
struct remote_mp *rmp;
uint64_t ccm_mpid;
struct remote_mp *rmp;
uint64_t ccm_mpid;
bool ccm_opdown;
if (cfm->extended) {
bool ccm_opdown;
if (cfm->extended) {
ccm_mpid = ntohs(ccm->mpid);
ccm_opdown = false;
}
ccm_mpid = ntohs(ccm->mpid);
ccm_opdown = false;
}
+ ccm_seq = ntohl(ccm->seq);
if (ccm_interval != cfm->ccm_interval) {
VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid interval"
if (ccm_interval != cfm->ccm_interval) {
VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid interval"
rmp = lookup_remote_mp(cfm, ccm_mpid);
if (!rmp) {
if (hmap_count(&cfm->remote_mps) < CFM_MAX_RMPS) {
rmp = lookup_remote_mp(cfm, ccm_mpid);
if (!rmp) {
if (hmap_count(&cfm->remote_mps) < CFM_MAX_RMPS) {
- rmp = xmalloc(sizeof *rmp);
+ rmp = xzalloc(sizeof *rmp);
hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid));
} else {
cfm->unexpected_recv = true;
hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid));
} else {
cfm->unexpected_recv = true;
+ VLOG_DBG("%s: received CCM (seq %"PRIu32") (mpid %"PRIu64")"
+ " (interval %"PRIu8") (RDI %s)", cfm->name, ccm_seq,
+ ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false");
+
+ if (rmp->seq && ccm_seq != (rmp->seq + 1)) {
+ VLOG_WARN_RL(&rl, "%s: (mpid %"PRIu64") detected sequence"
+ " numbers which indicate possible connectivity"
+ " problems (previous %"PRIu32") (current %"PRIu32
+ ")", cfm->name, ccm_mpid, rmp->seq, ccm_seq);
+ }
+
rmp->mpid = ccm_mpid;
rmp->recv = true;
rmp->mpid = ccm_mpid;
rmp->recv = true;
rmp->rdi = ccm_rdi;
rmp->opup = !ccm_opdown;
}
rmp->rdi = ccm_rdi;
rmp->opup = !ccm_opdown;
}
-
- VLOG_DBG("%s: received CCM (seq %"PRIu32") (mpid %"PRIu64")"
- " (interval %"PRIu8") (RDI %s)", cfm->name, ntohl(ccm->seq),
- ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false");