From: Ben Pfaff Date: Mon, 6 Aug 2012 22:03:32 +0000 (-0700) Subject: rconn: Treat draining a message from the send queue as activity. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=133f2dc95454;p=openvswitch rconn: Treat draining a message from the send queue as activity. Until now, the rconn module has used messages received from the controller as the sole means to determine that the connection is up. This can interact badly with the OVS connection manager in ofproto, which stops reading and processing messages from the receive queue when there is a backlog in the send queue for a given connection (because reading and processes messages is the main cause of messages getting pushed onto the send queue). So, if a send queue backlog lasts more than twice the inactivity probe interval, then the connection drops, whether the controller is sending messages or not. Dumping a large flow table can trigger this behavior if the controller becomes temporarily busy or if the network between OVS and a controller is slow. The problem can easily repeat itself, since upon reconnection the controller will generally dump the flow table. This commit fixes the problem by expanding the definition of "activity" to include successfully sending an OpenFlow message that was previously queued. Bug #12789. Reported-by: Natasha Gude Signed-off-by: Ben Pfaff --- diff --git a/lib/rconn.c b/lib/rconn.c index a32f042e..eae6a7e8 100644 --- a/lib/rconn.c +++ b/lib/rconn.c @@ -80,7 +80,6 @@ struct rconn { int backoff; int max_backoff; time_t backoff_deadline; - time_t last_received; time_t last_connected; time_t last_disconnected; unsigned int packets_sent; @@ -105,11 +104,15 @@ struct rconn { time_t creation_time; unsigned long int total_time_connected; - /* Throughout this file, "probe" is shorthand for "inactivity probe". - * When nothing has been received from the peer for a while, we send out - * an echo request as an inactivity probe packet. We should receive back - * a response. */ + /* Throughout this file, "probe" is shorthand for "inactivity probe". When + * no activity has been observed from the peer for a while, we send out an + * echo request as an inactivity probe packet. We should receive back a + * response. + * + * "Activity" is defined as either receiving an OpenFlow message from the + * peer or successfully sending a message that had been in 'txq'. */ int probe_interval; /* Secs of inactivity before sending probe. */ + time_t last_activity; /* Last time we saw some activity. */ /* When we create a vconn we obtain these values, to save them past the end * of the vconn's lifetime. Otherwise, in-band control will only allow @@ -179,7 +182,6 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp) rc->backoff = 0; rc->max_backoff = max_backoff ? max_backoff : 8; rc->backoff_deadline = TIME_MIN; - rc->last_received = time_now(); rc->last_connected = TIME_MIN; rc->last_disconnected = TIME_MIN; rc->seqno = 0; @@ -195,6 +197,8 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp) rc->creation_time = time_now(); rc->total_time_connected = 0; + rc->last_activity = time_now(); + rconn_set_probe_interval(rc, probe_interval); rconn_set_dscp(rc, dscp); @@ -419,6 +423,7 @@ do_tx_work(struct rconn *rc) if (error) { break; } + rc->last_activity = time_now(); } if (list_is_empty(&rc->txq)) { poll_immediate_wake(); @@ -429,7 +434,7 @@ static unsigned int timeout_ACTIVE(const struct rconn *rc) { if (rc->probe_interval) { - unsigned int base = MAX(rc->last_received, rc->state_entered); + unsigned int base = MAX(rc->last_activity, rc->state_entered); unsigned int arg = base + rc->probe_interval - rc->state_entered; return arg; } @@ -440,7 +445,7 @@ static void run_ACTIVE(struct rconn *rc) { if (timed_out(rc)) { - unsigned int base = MAX(rc->last_received, rc->state_entered); + unsigned int base = MAX(rc->last_activity, rc->state_entered); VLOG_DBG("%s: idle %u seconds, sending inactivity probe", rc->name, (unsigned int) (time_now() - base)); @@ -543,7 +548,7 @@ rconn_recv(struct rconn *rc) rc->probably_admitted = true; rc->last_admitted = time_now(); } - rc->last_received = time_now(); + rc->last_activity = time_now(); rc->packets_received++; if (rc->state == S_IDLE) { state_transition(rc, S_ACTIVE);