From 5eda645e36a9b5dd72c275b7e400dfefcba281d3 Mon Sep 17 00:00:00 2001 From: Andrew Evans Date: Mon, 14 Mar 2011 13:10:02 -0700 Subject: [PATCH] ovsdb-server: Report time since last connect and disconnect for each manager. Only the time connected (if connected) or disconnected (if disconnected) is currently reported for each manager. Change to reporting both in seconds since the last connect and disconnect events respectively. An empty value indicates no previous connection or disconnection. This can help diagnose certain connectivity problems, e.g. flapping. Requested-by: Peter Balland Bug #4833. --- lib/reconnect.c | 42 ++++++++------ lib/reconnect.h | 26 ++++----- ovsdb/jsonrpc-server.c | 8 +-- ovsdb/jsonrpc-server.h | 3 +- ovsdb/ovsdb-server.c | 13 +++-- python/ovs/reconnect.py | 34 ++++++----- tests/reconnect.at | 124 +++++++++++++--------------------------- tests/test-reconnect.c | 41 +++++++------ tests/test-reconnect.py | 37 ++++++------ vswitchd/vswitch.xml | 16 ++++-- 10 files changed, 163 insertions(+), 181 deletions(-) diff --git a/lib/reconnect.c b/lib/reconnect.c index f8463bb0..c1690162 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -106,8 +106,8 @@ reconnect_create(long long int now) fsm->state_entered = now; fsm->backoff = 0; fsm->last_received = now; - fsm->last_connected = now; - fsm->last_disconnected = now; + fsm->last_connected = LLONG_MAX; + fsm->last_disconnected = LLONG_MAX; fsm->max_tries = UINT_MAX; fsm->creation_time = now; @@ -642,23 +642,26 @@ reconnect_is_connected(const struct reconnect *fsm) return is_connected_state(fsm->state); } -/* Returns the number of milliseconds for which 'fsm' has been continuously - * connected to its peer. (If 'fsm' is not currently connected, this is 0.) */ +/* Returns the number of milliseconds since 'fsm' last successfully connected + * to its peer (even if it has since disconnected). Returns UINT_MAX if never + * connected. */ unsigned int -reconnect_get_connection_duration(const struct reconnect *fsm, - long long int now) +reconnect_get_last_connect_elapsed(const struct reconnect *fsm, + long long int now) { - return reconnect_is_connected(fsm) ? now - fsm->last_connected : 0; + return fsm->last_connected == LLONG_MAX ? UINT_MAX + : now - fsm->last_connected; } -/* Returns the number of milliseconds for which 'fsm' has been continuously - * disconnected from its peer. (If 'fsm' is not currently connected, - * this is 0.) */ +/* Returns the number of milliseconds since 'fsm' last disconnected + * from its peer (even if it has since reconnected). Returns UINT_MAX if never + * disconnected. */ unsigned int -reconnect_get_disconnect_duration(const struct reconnect *fsm, - long long int now) +reconnect_get_last_disconnect_elapsed(const struct reconnect *fsm, + long long int now) { - return reconnect_is_connected(fsm) ? 0 : now - fsm->last_disconnected; + return fsm->last_disconnected == LLONG_MAX ? UINT_MAX + : now - fsm->last_disconnected; } /* Copies various statistics for 'fsm' into '*stats'. */ @@ -673,12 +676,13 @@ reconnect_get_stats(const struct reconnect *fsm, long long int now, stats->backoff = fsm->backoff; stats->seqno = fsm->seqno; stats->is_connected = reconnect_is_connected(fsm); - stats->current_connection_duration - = reconnect_get_connection_duration(fsm, now); - stats->current_disconnect_duration - = reconnect_get_disconnect_duration(fsm, now); - stats->total_connected_duration = (stats->current_connection_duration - + fsm->total_connected_duration); + stats->msec_since_connect + = reconnect_get_last_connect_elapsed(fsm, now); + stats->msec_since_disconnect + = reconnect_get_last_disconnect_elapsed(fsm, now); + stats->total_connected_duration = fsm->total_connected_duration + + (is_connected_state(fsm->state) + ? reconnect_get_last_connect_elapsed(fsm, now) : 0); stats->n_attempted_connections = fsm->n_attempted_connections; stats->n_successful_connections = fsm->n_successful_connections; stats->state = reconnect_state_name__(fsm->state); diff --git a/lib/reconnect.h b/lib/reconnect.h index 52f1001f..997a03f2 100644 --- a/lib/reconnect.h +++ b/lib/reconnect.h @@ -67,10 +67,10 @@ void reconnect_disable(struct reconnect *, long long int now); void reconnect_force_reconnect(struct reconnect *, long long int now); bool reconnect_is_connected(const struct reconnect *); -unsigned int reconnect_get_connection_duration(const struct reconnect *, - long long int now); -unsigned int reconnect_get_disconnect_duration(const struct reconnect *, - long long int now); +unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *, + long long int now); +unsigned int reconnect_get_last_disconnect_elapsed(const struct reconnect *, + long long int now); void reconnect_disconnected(struct reconnect *, long long int now, int error); void reconnect_connecting(struct reconnect *, long long int now); @@ -92,18 +92,18 @@ int reconnect_timeout(struct reconnect *, long long int now); struct reconnect_stats { /* All times and durations in this structure are in milliseconds. */ - long long int creation_time; /* Time reconnect_create() called. */ - long long int last_received; /* Last call to reconnect_received(). */ - long long int last_connected; /* Last call to reconnect_connected(). */ + long long int creation_time; /* Time reconnect_create() called. */ + long long int last_received; /* Last call to reconnect_received(). */ + long long int last_connected; /* Last call to reconnect_connected(). */ long long int last_disconnected; /* Last call to reconnect_disconnected(). */ - int backoff; /* Current backoff duration. */ + int backoff; /* Current backoff duration. */ - unsigned int seqno; /* # of connections + # of disconnections. */ + unsigned int seqno; /* # of connections + # of disconnections. */ - bool is_connected; /* Currently connected? */ - unsigned int current_connection_duration; /* Time of current connection. */ - unsigned int current_disconnect_duration; /* Time disconnected (if disconnected). */ - unsigned int total_connected_duration; /* Sum of all connections. */ + bool is_connected; /* Currently connected? */ + unsigned int msec_since_connect; /* Time since last connect. */ + unsigned int msec_since_disconnect; /* Time since last disconnect. */ + unsigned int total_connected_duration; /* Sum of all connections. */ unsigned int n_attempted_connections; unsigned int n_successful_connections; diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 713c5182..fc9c6047 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -470,10 +470,10 @@ ovsdb_jsonrpc_session_get_status(const struct ovsdb_jsonrpc_remote *remote, jsonrpc_session_get_reconnect_stats(js, &rstats); status->state = rstats.state; - status->conn_secs = (rstats.is_connected - ? rstats.current_connection_duration - : rstats.current_disconnect_duration - ) / 1000; + status->sec_since_connect = rstats.msec_since_connect == UINT_MAX + ? UINT_MAX : rstats.msec_since_connect / 1000; + status->sec_since_disconnect = rstats.msec_since_disconnect == UINT_MAX + ? UINT_MAX : rstats.msec_since_disconnect / 1000; return; } diff --git a/ovsdb/jsonrpc-server.h b/ovsdb/jsonrpc-server.h index 691b55e2..dc786352 100644 --- a/ovsdb/jsonrpc-server.h +++ b/ovsdb/jsonrpc-server.h @@ -38,7 +38,8 @@ void ovsdb_jsonrpc_server_set_remotes(struct ovsdb_jsonrpc_server *, struct ovsdb_jsonrpc_remote_status { const char *state; int last_error; - unsigned int conn_secs; + unsigned int sec_since_connect; + unsigned int sec_since_disconnect; bool is_connected; }; void ovsdb_jsonrpc_server_get_remote_status( diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 9eb58ebf..8d44d848 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -469,7 +469,7 @@ update_remote_row(const struct ovsdb_row *row, struct ovsdb_txn *txn, struct ovsdb_row *rw_row; const char *target; const struct ovsdb_jsonrpc_remote_status *status; - char *keys[3], *values[3]; + char *keys[4], *values[4]; size_t n = 0; /* Get the "target" (protocol/host/port) spec. */ @@ -495,9 +495,14 @@ update_remote_row(const struct ovsdb_row *row, struct ovsdb_txn *txn, keys[n] = xstrdup("state"); values[n++] = xstrdup(status->state); - keys[n] = xstrdup(status->is_connected ? "time_connected" - : "time_disconnected"); - values[n++] = xasprintf("%u", status->conn_secs); + if (status->sec_since_connect != UINT_MAX) { + keys[n] = xstrdup("sec_since_connect"); + values[n++] = xasprintf("%u", status->sec_since_connect); + } + if (status->sec_since_disconnect != UINT_MAX) { + keys[n] = xstrdup("sec_since_disconnect"); + values[n++] = xasprintf("%u", status->sec_since_disconnect); + } if (status->last_error) { keys[n] = xstrdup("last_error"); values[n++] = diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py index 3445245d..49c7a094 100644 --- a/python/ovs/reconnect.py +++ b/python/ovs/reconnect.py @@ -145,8 +145,8 @@ class Reconnect(object): self.state_entered = now self.backoff = 0 self.last_received = now - self.last_connected = now - self.last_disconnected = now + self.last_connected = None + self.last_disconnected = None self.max_tries = None self.creation_time = now @@ -529,23 +529,21 @@ class Reconnect(object): False otherwise.""" return self.state.is_connected - def get_connection_duration(self, now): - """Returns the number of milliseconds for which this FSM has been - continuously connected to its peer. (If this FSM is not currently - connected, this is 0.)""" - if self.is_connected(): + def get_last_connect_elapsed(self, now): + """Returns the number of milliseconds since 'fsm' was last connected + to its peer. Returns None if never connected.""" + if self.last_connected: return now - self.last_connected else: - return 0 + return None - def get_disconnect_duration(self, now): - """Returns the number of milliseconds for which this FSM has been - continuously disconnected from its peer. (If this FSM is not currently - connected, this is 0.)""" - if not self.is_connected(): + def get_last_disconnect_elapsed(self, now): + """Returns the number of milliseconds since 'fsm' was last disconnected + from its peer. Returns None if never disconnected.""" + if self.last_disconnected: return now - self.last_disconnected else: - return 0 + return None def get_stats(self, now): class Stats(object): @@ -558,10 +556,10 @@ class Reconnect(object): stats.backoff = self.backoff stats.seqno = self.seqno stats.is_connected = self.is_connected() - stats.current_connection_duration = self.get_connection_duration(now) - stats.current_disconnect_duration = self.get_disconnect_duration(now) - stats.total_connected_duration = (stats.current_connection_duration + - self.total_connected_duration) + stats.msec_since_connect = self.get_last_connect_elapsed(now) + stats.msec_since_disconnect = self.get_last_disconnect_elapsed(now) + stats.total_connected_duration = self.total_connected_duration + \ + (self.get_last_connect_elapsed(now) if self.is_connected() else 0) stats.n_attempted_connections = self.n_attempted_connections stats.n_successful_connections = self.n_successful_connections stats.state = self.state.name diff --git a/tests/reconnect.at b/tests/reconnect.at index 6a9ae1e8..bb37170e 100644 --- a/tests/reconnect.at +++ b/tests/reconnect.at @@ -59,8 +59,10 @@ run should connect connected in ACTIVE for 0 ms (0 ms backoff) + created 1000, last received 1000, last connected 1000 1 successful connections out of 1 attempts, seqno 1 - connected (0 ms), total 0 ms connected + connected + last connected 0 ms ago, connected 0 ms total # Send inactivity probe. timeout @@ -68,7 +70,6 @@ timeout ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) - connected (5000 ms), total 5000 ms connected run should send probe in IDLE for 0 ms (0 ms backoff) @@ -79,13 +80,12 @@ timeout ### t=11000 ### in IDLE for 5000 ms (0 ms backoff) - connected (10000 ms), total 10000 ms connected run should disconnect disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 - not connected (0 ms), total 10000 ms connected + disconnected disconnected at 11000 ms (0 ms ago) ]) @@ -126,14 +126,13 @@ advance 500 ### t=1500 ### in CONNECTING for 500 ms (0 ms backoff) - disconnected for 500 ms run connected in ACTIVE for 0 ms (0 ms backoff) created 1000, last received 1000, last connected 1500 1 successful connections out of 1 attempts, seqno 1 - connected (0 ms), total 0 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 0 ms total # Send inactivity probe. timeout @@ -141,7 +140,6 @@ timeout ### t=6500 ### in ACTIVE for 5000 ms (0 ms backoff) - connected (5000 ms), total 5000 ms connected run should send probe in IDLE for 0 ms (0 ms backoff) @@ -152,13 +150,12 @@ timeout ### t=11500 ### in IDLE for 5000 ms (0 ms backoff) - connected (10000 ms), total 10000 ms connected run should disconnect disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 - not connected (0 ms), total 10000 ms connected + disconnected disconnected at 11500 ms (0 ms ago) ]) @@ -239,7 +236,6 @@ timeout ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) - disconnected for 1000 ms run should disconnect connect-failed @@ -252,7 +248,6 @@ timeout ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 2000 ms run should connect @@ -264,7 +259,6 @@ timeout ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) - disconnected for 3000 ms run should disconnect connect-failed @@ -277,7 +271,6 @@ timeout ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) - disconnected for 5000 ms run should connect @@ -289,7 +282,6 @@ timeout ### t=8000 ### in CONNECTING for 2000 ms (2000 ms backoff) - disconnected for 7000 ms run should disconnect connect-failed @@ -302,7 +294,6 @@ timeout ### t=12000 ### in BACKOFF for 4000 ms (4000 ms backoff) - disconnected for 11000 ms run should connect @@ -314,7 +305,6 @@ timeout ### t=16000 ### in CONNECTING for 4000 ms (4000 ms backoff) - disconnected for 15000 ms run should disconnect connect-failed @@ -327,7 +317,6 @@ timeout ### t=24000 ### in BACKOFF for 8000 ms (8000 ms backoff) - disconnected for 23000 ms run should connect @@ -339,7 +328,6 @@ timeout ### t=32000 ### in CONNECTING for 8000 ms (8000 ms backoff) - disconnected for 31000 ms run should disconnect connect-failed @@ -352,7 +340,6 @@ timeout ### t=40000 ### in BACKOFF for 8000 ms (8000 ms backoff) - disconnected for 39000 ms run should connect @@ -364,7 +351,6 @@ timeout ### t=48000 ### in CONNECTING for 8000 ms (8000 ms backoff) - disconnected for 47000 ms run should disconnect connect-failed @@ -422,14 +408,15 @@ run should connect connected in ACTIVE for 0 ms (0 ms backoff) + created 1000, last received 1000, last connected 1000 1 successful connections out of 1 attempts, seqno 1 - connected (0 ms), total 0 ms connected + connected + last connected 0 ms ago, connected 0 ms total timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) - connected (5000 ms), total 5000 ms connected run should send probe in IDLE for 0 ms (0 ms backoff) @@ -438,13 +425,12 @@ timeout ### t=11000 ### in IDLE for 5000 ms (0 ms backoff) - connected (10000 ms), total 10000 ms connected run should disconnect disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 - not connected (0 ms), total 10000 ms connected + disconnected disconnected at 11000 ms (0 ms ago) # Back off for 1000 ms. @@ -453,7 +439,7 @@ timeout ### t=12000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 1000 ms + last connected 11000 ms ago, connected 10000 ms total run should connect @@ -464,14 +450,13 @@ connected in ACTIVE for 0 ms (1000 ms backoff) created 1000, last received 1000, last connected 12000 2 successful connections out of 2 attempts, seqno 3 - connected (0 ms), total 10000 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 10000 ms total timeout advance 5000 ms ### t=17000 ### in ACTIVE for 5000 ms (1000 ms backoff) - connected (5000 ms), total 15000 ms connected run should send probe in IDLE for 0 ms (1000 ms backoff) @@ -480,13 +465,12 @@ timeout ### t=22000 ### in IDLE for 5000 ms (1000 ms backoff) - connected (10000 ms), total 20000 ms connected run should disconnect disconnected in BACKOFF for 0 ms (2000 ms backoff) 2 successful connections out of 2 attempts, seqno 4 - not connected (0 ms), total 20000 ms connected + disconnected disconnected at 22000 ms (0 ms ago) # Back off for 2000 ms. @@ -495,7 +479,7 @@ timeout ### t=24000 ### in BACKOFF for 2000 ms (2000 ms backoff) - disconnected for 2000 ms + last connected 12000 ms ago, connected 20000 ms total run should connect @@ -506,14 +490,13 @@ connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last received 1000, last connected 24000 3 successful connections out of 3 attempts, seqno 5 - connected (0 ms), total 20000 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 20000 ms total timeout advance 5000 ms ### t=29000 ### in ACTIVE for 5000 ms (2000 ms backoff) - connected (5000 ms), total 25000 ms connected run should send probe in IDLE for 0 ms (2000 ms backoff) @@ -522,13 +505,12 @@ timeout ### t=34000 ### in IDLE for 5000 ms (2000 ms backoff) - connected (10000 ms), total 30000 ms connected run should disconnect disconnected in BACKOFF for 0 ms (4000 ms backoff) 3 successful connections out of 3 attempts, seqno 6 - not connected (0 ms), total 30000 ms connected + disconnected disconnected at 34000 ms (0 ms ago) # Back off for 4000 ms. @@ -537,7 +519,7 @@ timeout ### t=38000 ### in BACKOFF for 4000 ms (4000 ms backoff) - disconnected for 4000 ms + last connected 14000 ms ago, connected 30000 ms total ]) ###################################################################### @@ -595,7 +577,6 @@ timeout ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) - disconnected for 1000 ms run should disconnect connect-failed @@ -608,7 +589,6 @@ timeout ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 2000 ms run should connect @@ -620,7 +600,6 @@ timeout ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) - disconnected for 3000 ms run should disconnect connect-failed @@ -633,7 +612,6 @@ timeout ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) - disconnected for 5000 ms run should connect @@ -644,25 +622,23 @@ advance 500 ### t=6500 ### in CONNECTING for 500 ms (2000 ms backoff) - disconnected for 5500 ms run connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last received 1000, last connected 6500 1 successful connections out of 3 attempts, seqno 1 - connected (0 ms), total 0 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 0 ms total # Connection drops after another 250 ms. advance 250 ### t=6750 ### in ACTIVE for 250 ms (2000 ms backoff) - connected (250 ms), total 250 ms connected disconnected in BACKOFF for 0 ms (4000 ms backoff) 1 successful connections out of 3 attempts, seqno 2 - not connected (0 ms), total 250 ms connected + disconnected disconnected at 6750 ms (0 ms ago) run @@ -672,7 +648,7 @@ timeout ### t=10750 ### in BACKOFF for 4000 ms (4000 ms backoff) - disconnected for 4000 ms + last connected 4250 ms ago, connected 250 ms total run should connect ]) @@ -743,7 +719,6 @@ timeout ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) - disconnected for 1000 ms run should disconnect connect-failed @@ -756,7 +731,6 @@ timeout ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 2000 ms run should connect @@ -768,7 +742,6 @@ timeout ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) - disconnected for 3000 ms run should disconnect connect-failed @@ -781,7 +754,6 @@ timeout ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) - disconnected for 5000 ms run should connect @@ -792,21 +764,19 @@ advance 500 ### t=6500 ### in CONNECTING for 500 ms (2000 ms backoff) - disconnected for 5500 ms run connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last received 1000, last connected 6500 1 successful connections out of 3 attempts, seqno 1 - connected (0 ms), total 0 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 0 ms total # Connection receives 3 chunks of data spaced 250 ms apart. advance 250 ### t=6750 ### in ACTIVE for 250 ms (2000 ms backoff) - connected (250 ms), total 250 ms connected run received created 1000, last received 6750, last connected 6500 @@ -814,7 +784,6 @@ advance 250 ### t=7000 ### in ACTIVE for 500 ms (2000 ms backoff) - connected (500 ms), total 500 ms connected run received created 1000, last received 7000, last connected 6500 @@ -822,7 +791,6 @@ advance 250 ### t=7250 ### in ACTIVE for 750 ms (2000 ms backoff) - connected (750 ms), total 750 ms connected run received created 1000, last received 7250, last connected 6500 @@ -831,7 +799,7 @@ received disconnected in BACKOFF for 0 ms (4000 ms backoff) 1 successful connections out of 3 attempts, seqno 2 - not connected (0 ms), total 750 ms connected + disconnected disconnected at 7250 ms (0 ms ago) run @@ -841,7 +809,7 @@ timeout ### t=11250 ### in BACKOFF for 4000 ms (4000 ms backoff) - disconnected for 4000 ms + last connected 4750 ms ago, connected 750 ms total run should connect ]) @@ -912,7 +880,6 @@ timeout ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) - disconnected for 1000 ms run should disconnect connect-failed @@ -925,7 +892,6 @@ timeout ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 2000 ms run should connect @@ -937,7 +903,6 @@ timeout ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) - disconnected for 3000 ms run should disconnect connect-failed @@ -950,7 +915,6 @@ timeout ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) - disconnected for 5000 ms run should connect @@ -961,21 +925,19 @@ advance 500 ### t=6500 ### in CONNECTING for 500 ms (2000 ms backoff) - disconnected for 5500 ms run connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last received 1000, last connected 6500 1 successful connections out of 3 attempts, seqno 1 - connected (0 ms), total 0 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 0 ms total # Connection receives 3 chunks of data spaced 2000 ms apart. advance 2000 ### t=8500 ### in ACTIVE for 2000 ms (2000 ms backoff) - connected (2000 ms), total 2000 ms connected run received created 1000, last received 8500, last connected 6500 @@ -983,7 +945,6 @@ advance 2000 ### t=10500 ### in ACTIVE for 4000 ms (2000 ms backoff) - connected (4000 ms), total 4000 ms connected run received created 1000, last received 10500, last connected 6500 @@ -991,7 +952,6 @@ advance 2000 ### t=12500 ### in ACTIVE for 6000 ms (2000 ms backoff) - connected (6000 ms), total 6000 ms connected run received created 1000, last received 12500, last connected 6500 @@ -1000,7 +960,7 @@ received disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 3 attempts, seqno 2 - not connected (0 ms), total 6000 ms connected + disconnected disconnected at 12500 ms (0 ms ago) run @@ -1010,7 +970,7 @@ timeout ### t=13500 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 1000 ms + last connected 7000 ms ago, connected 6000 ms total run should connect ]) @@ -1053,7 +1013,6 @@ timeout ### t=2000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 1000 ms # Connection fails quickly again. run @@ -1069,7 +1028,6 @@ timeout ### t=4000 ### in BACKOFF for 2000 ms (2000 ms backoff) - disconnected for 3000 ms ]) ###################################################################### @@ -1102,8 +1060,10 @@ run should connect connected in ACTIVE for 0 ms (0 ms backoff) + created 1000, last received 1000, last connected 1000 1 successful connections out of 1 attempts, seqno 1 - connected (0 ms), total 0 ms connected + connected + last connected 0 ms ago, connected 0 ms total # Send inactivity probe. timeout @@ -1111,7 +1071,6 @@ timeout ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) - connected (5000 ms), total 5000 ms connected run should send probe in IDLE for 0 ms (0 ms backoff) @@ -1122,13 +1081,12 @@ timeout ### t=11000 ### in IDLE for 5000 ms (0 ms backoff) - connected (10000 ms), total 10000 ms connected run should disconnect disconnected in VOID for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 - not connected (0 ms), total 10000 ms connected + disconnected disconnected at 11000 ms (0 ms ago) ]) @@ -1212,7 +1170,6 @@ timeout ### t=2000 ### in BACKOFF for 1000 ms (1000 ms backoff) - disconnected for 1000 ms run should connect listening @@ -1223,15 +1180,14 @@ connected in ACTIVE for 0 ms (1000 ms backoff) created 1000, last received 1000, last connected 2000 1 successful connections out of 1 attempts, seqno 1 - connected (0 ms), total 0 ms connected - disconnected for 0 ms + connected + last connected 0 ms ago, connected 0 ms total received created 1000, last received 2000, last connected 2000 advance 1000 ### t=3000 ### in ACTIVE for 1000 ms (1000 ms backoff) - connected (1000 ms), total 1000 ms connected received created 1000, last received 3000, last connected 2000 @@ -1241,7 +1197,6 @@ timeout ### t=8000 ### in ACTIVE for 6000 ms (1000 ms backoff) - connected (6000 ms), total 6000 ms connected run should send probe in IDLE for 0 ms (1000 ms backoff) @@ -1250,13 +1205,12 @@ timeout ### t=13000 ### in IDLE for 5000 ms (1000 ms backoff) - connected (11000 ms), total 11000 ms connected run should disconnect disconnected in BACKOFF for 0 ms (0 ms backoff) 1 successful connections out of 1 attempts, seqno 2 - not connected (0 ms), total 11000 ms connected + disconnected disconnected at 13000 ms (0 ms ago) # Start listening again. diff --git a/tests/test-reconnect.c b/tests/test-reconnect.c index a519d352..fae0f178 100644 --- a/tests/test-reconnect.c +++ b/tests/test-reconnect.c @@ -35,7 +35,8 @@ static int now; static const struct command commands[]; static void diff_stats(const struct reconnect_stats *old, - const struct reconnect_stats *new); + const struct reconnect_stats *new, + int delta); int main(void) @@ -74,16 +75,17 @@ main(void) if (old_time != now) { printf("\n### t=%d ###\n", now); - old_time = now; } reconnect_get_stats(reconnect, now, &cur); - diff_stats(&prev, &cur); + diff_stats(&prev, &cur, now - old_time); prev = cur; if (reconnect_get_max_tries(reconnect) != old_max_tries) { old_max_tries = reconnect_get_max_tries(reconnect); printf(" %u tries left\n", old_max_tries); } + + old_time = now; } return 0; @@ -208,7 +210,8 @@ do_set_max_tries(int argc OVS_UNUSED, char *argv[]) static void diff_stats(const struct reconnect_stats *old, - const struct reconnect_stats *new) + const struct reconnect_stats *new, + int delta) { if (old->state != new->state || old->state_elapsed != new->state_elapsed @@ -229,20 +232,25 @@ diff_stats(const struct reconnect_stats *old, new->n_successful_connections, new->n_attempted_connections, new->seqno); } - if (old->is_connected != new->is_connected - || old->current_connection_duration != new->current_connection_duration - || old->total_connected_duration != new->total_connected_duration) { - printf(" %sconnected (%u ms), total %u ms connected\n", - new->is_connected ? "" : "not ", - new->current_connection_duration, - new->total_connected_duration); + if (old->is_connected != new->is_connected) { + printf(" %sconnected\n", new->is_connected ? "" : "dis"); } - if (old->last_disconnected != new->last_disconnected) { - printf(" disconnected at %llu ms (%u ms ago)\n", - new->last_disconnected, new->current_disconnect_duration); + if (old->last_connected != new->last_connected + || (old->msec_since_connect != new->msec_since_connect - delta + && !(old->msec_since_connect == UINT_MAX + && new->msec_since_connect == UINT_MAX)) + || (old->total_connected_duration != new->total_connected_duration - delta + && !(old->total_connected_duration == 0 + && new->total_connected_duration == 0))) { + printf(" last connected %u ms ago, connected %u ms total\n", + new->msec_since_connect, new->total_connected_duration); } - if (old->current_disconnect_duration != new->current_disconnect_duration) { - printf(" disconnected for %u ms\n", new->current_disconnect_duration); + if (old->last_disconnected != new->last_disconnected + || (old->msec_since_disconnect != new->msec_since_disconnect - delta + && !(old->msec_since_disconnect == UINT_MAX + && new->msec_since_disconnect == UINT_MAX))) { + printf(" disconnected at %llu ms (%u ms ago)\n", + new->last_disconnected, new->msec_since_disconnect); } } @@ -282,4 +290,3 @@ static const struct command commands[] = { { "listen-error", 1, 1, do_listen_error }, { NULL, 0, 0, NULL }, }; - diff --git a/tests/test-reconnect.py b/tests/test-reconnect.py index 0c467564..a4cc9b7d 100644 --- a/tests/test-reconnect.py +++ b/tests/test-reconnect.py @@ -86,7 +86,7 @@ def do_timeout(arg): def do_set_max_tries(arg): r.set_max_tries(int(arg)) -def diff_stats(old, new): +def diff_stats(old, new, delta): if (old.state != new.state or old.state_elapsed != new.state_elapsed or old.backoff != new.backoff): @@ -106,23 +106,27 @@ def diff_stats(old, new): % (new.n_successful_connections, new.n_attempted_connections, new.seqno)) - if (old.is_connected != new.is_connected or - old.current_connection_duration != new.current_connection_duration or - old.total_connected_duration != new.total_connected_duration): + if (old.is_connected != new.is_connected): if new.is_connected: negate = "" else: - negate = "not " - print(" %sconnected (%d ms), total %d ms connected" - % (negate, new.current_connection_duration, - new.total_connected_duration)) - - if (old.last_disconnected != new.last_disconnected): + negate = "dis" + print(" %sconnected" % negate) + + if (old.last_connected != new.last_connected or + (new.msec_since_connect != None and + old.msec_since_connect != new.msec_since_connect - delta) or + (old.total_connected_duration != new.total_connected_duration - delta and + not (old.total_connected_duration == 0 and + new.total_connected_duration == 0))): + print(" last connected %d ms ago, connected %d ms total" + % (new.msec_since_connect, new.total_connected_duration)) + + if (old.last_disconnected != new.last_disconnected or + (new.msec_since_disconnect != None and + old.msec_since_disconnect != new.msec_since_disconnect - delta)): print(" disconnected at %d ms (%d ms ago)" - % (new.last_disconnected, new.current_disconnect_duration)) - - if (old.current_disconnect_duration != new.current_disconnect_duration): - print(" disconnected for %d ms" % (new.current_disconnect_duration)) + % (new.last_disconnected, new.msec_since_disconnect)) def do_set_passive(arg): r.set_passive(True, now) @@ -187,15 +191,16 @@ def main(): if old_time != now: print print "### t=%d ###" % now - old_time = now cur = r.get_stats(now) - diff_stats(prev, cur) + diff_stats(prev, cur, now - old_time) prev = cur if r.get_max_tries() != old_max_tries: old_max_tries = r.get_max_tries() print " %d tries left" % old_max_tries + old_time = now + if __name__ == '__main__': main() diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 89354dae..b084884c 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2097,15 +2097,23 @@
The state of the connection to the manager. Possible values are: VOID (connection is disabled), BACKOFF (attempting to reconnect at an increasing - period), CONNECT_IN_PROGRESS (attempting to connect), + period), CONNECTING (attempting to connect), ACTIVE (connected, remote host responsive), and - IDLE (remote host unresponsive, disconnecting). These + IDLE (remote host idle, sending keep-alive). These values may change in the future. They are provided only for human consumption.
-
time_in_state
-
Milliseconds since the state key changed.
+
sec_since_connect
+
The amount of time since this manager last successfully connected + to the database (in seconds). Value is empty if manager has never + successfully connected.
+
+
+
sec_since_disconnect
+
The amount of time since this manager last disconnected from the + database (in seconds). Value is empty if manager has never + disconnected.
-- 2.30.2