From 4fdfe5ccf84c473ad98300cb9050889b033768df Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 31 Oct 2011 09:15:14 -0700 Subject: [PATCH] ovsdb-idl: Prevent occasional hang when multiple database clients race. When a client of the IDL tries to commit a read-modify-write transaction but the database has changed in the meantime, the IDL tells its client to wait for the IDL to change and then try the transaction again by returning TXN_TRY_AGAIN. The "wait for the IDL to change" part is important because there's no point in retrying the transaction before the IDL has received the database updates (the transaction would fail in the same way all over again). However, the logic was incomplete: the database update can be received *before* the reply to the transaction RPC (I think that in the current ovsdb-server implementation this will always happen, in fact). When this happens, the right thing to do is to retry the transaction immediately; if we wait, then we're waiting for an additional change to the database that may never come, causing an indefinite hang. This commit therefore breaks the "try again" IDL commit status code into two, one that means "try again immediately" and another that means "wait for a change then try again". When an update is processed after a transaction is committed but before the reply is received, the "try again now" tells the IDL client not to wait for another database change before retrying its transaction. Bug #5980. Reported-by: Ram Jothikumar Reproduced-by: Alex Yip --- lib/ovsdb-idl.c | 20 ++++++++++++++------ lib/ovsdb-idl.h | 6 ++++-- python/ovs/db/idl.py | 21 +++++++++++++++------ tests/ovsdb-idl.at | 2 +- utilities/ovs-vsctl.c | 26 +++++++++++++++++--------- 5 files changed, 51 insertions(+), 24 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 56b43286..439a40a1 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -91,6 +91,7 @@ struct ovsdb_idl_txn { char *error; bool dry_run; struct ds comment; + unsigned int commit_seqno; /* Increments. */ char *inc_table; @@ -1173,8 +1174,10 @@ ovsdb_idl_txn_status_to_string(enum ovsdb_idl_txn_status status) return "aborted"; case TXN_SUCCESS: return "success"; - case TXN_TRY_AGAIN: - return "try again"; + case TXN_AGAIN_WAIT: + return "wait then try again"; + case TXN_AGAIN_NOW: + return "try again now"; case TXN_NOT_LOCKED: return "not locked"; case TXN_ERROR: @@ -1197,6 +1200,7 @@ ovsdb_idl_txn_create(struct ovsdb_idl *idl) txn->error = NULL; txn->dry_run = false; ds_init(&txn->comment); + txn->commit_seqno = txn->idl->change_seqno; txn->inc_table = NULL; txn->inc_column = NULL; @@ -1587,7 +1591,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn) json_hash(txn->request_id, 0)); txn->status = TXN_INCOMPLETE; } else { - txn->status = TXN_TRY_AGAIN; + txn->status = TXN_AGAIN_WAIT; } ovsdb_idl_txn_disassemble(txn); @@ -1762,7 +1766,9 @@ ovsdb_idl_txn_write(const struct ovsdb_idl_row *row_, * prerequisite to completing the transaction. That is, if 'column' in 'row_' * changed (or if 'row_' was deleted) between the time that the IDL originally * read its contents and the time that the transaction commits, then the - * transaction aborts and ovsdb_idl_txn_commit() returns TXN_TRY_AGAIN. + * transaction aborts and ovsdb_idl_txn_commit() returns TXN_AGAIN_WAIT or + * TXN_AGAIN_NOW (depending on whether the database change has already been + * received). * * The intention is that, to ensure that no transaction commits based on dirty * reads, an application should call ovsdb_idl_txn_verify() on each data item @@ -1890,7 +1896,7 @@ ovsdb_idl_txn_abort_all(struct ovsdb_idl *idl) struct ovsdb_idl_txn *txn; HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) { - ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN); + ovsdb_idl_txn_complete(txn, TXN_AGAIN_WAIT); } } @@ -2091,7 +2097,9 @@ ovsdb_idl_txn_process_reply(struct ovsdb_idl *idl, status = (hard_errors ? TXN_ERROR : lock_errors ? TXN_NOT_LOCKED - : soft_errors ? TXN_TRY_AGAIN + : soft_errors ? (txn->commit_seqno == idl->change_seqno + ? TXN_AGAIN_WAIT + : TXN_AGAIN_NOW) : TXN_SUCCESS); } diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index e7ae6f1d..320a1efc 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -120,9 +120,11 @@ enum ovsdb_idl_txn_status { TXN_INCOMPLETE, /* Commit in progress, please wait. */ TXN_ABORTED, /* ovsdb_idl_txn_abort() called. */ TXN_SUCCESS, /* Commit successful. */ - TXN_TRY_AGAIN, /* Commit failed because a "verify" operation + TXN_AGAIN_WAIT, /* Commit failed because a "verify" operation * reported an inconsistency, due to a network - * problem, or other transient failure. */ + * problem, or other transient failure. Wait + * for a change, then try again. */ + TXN_AGAIN_NOW, /* Same as above but try again immediately. */ TXN_NOT_LOCKED, /* Server hasn't given us the lock yet. */ TXN_ERROR /* Commit failed due to a hard error. */ }; diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py index cd72fe4d..6150a02e 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -441,7 +441,7 @@ class Idl: def __txn_abort_all(self): while self._outstanding_txns: txn = self._outstanding_txns.popitem()[1] - txn._status = Transaction.TRY_AGAIN + txn._status = Transaction.AGAIN_WAIT def __txn_process_reply(self, msg): txn = self._outstanding_txns.pop(msg.id, None) @@ -561,7 +561,9 @@ class Row(object): if 'column_name' changed in this row (or if this row was deleted) between the time that the IDL originally read its contents and the time that the transaction commits, then the transaction aborts and - Transaction.commit() returns Transaction.TRY_AGAIN. + Transaction.commit() returns Transaction.AGAIN_WAIT or + Transaction.AGAIN_NOW (depending on whether the database change has + already been received). The intention is that, to ensure that no transaction commits based on dirty reads, an application should call Row.verify() on each data item @@ -620,9 +622,12 @@ class Transaction(object): INCOMPLETE = "incomplete" # Commit in progress, please wait. ABORTED = "aborted" # ovsdb_idl_txn_abort() called. SUCCESS = "success" # Commit successful. - TRY_AGAIN = "try again" # Commit failed because a "verify" operation + AGAIN_WAIT = "wait then try again" + # Commit failed because a "verify" operation # reported an inconsistency, due to a network - # problem, or other transient failure. + # problem, or other transient failure. Wait + # for a change, then try again. + AGAIN_NOW = "try again now" # Same as AGAIN_WAIT but try again right away. NOT_LOCKED = "not locked" # Server hasn't given us the lock yet. ERROR = "error" # Commit failed due to a hard error. @@ -657,6 +662,7 @@ class Transaction(object): self._status = Transaction.UNCOMMITTED self._error = None self._comments = [] + self._commit_seqno = self.idl.change_seqno self._inc_table = None self._inc_column = None @@ -827,7 +833,7 @@ class Transaction(object): self.idl._outstanding_txns[self._request_id] = self self._status = Transaction.INCOMPLETE else: - self._status = Transaction.TRY_AGAIN + self._status = Transaction.AGAIN_WAIT self.__disassemble() return self._status @@ -982,7 +988,10 @@ class Transaction(object): elif lock_errors: self._status = Transaction.NOT_LOCKED elif soft_errors: - self._status = Transaction.TRY_AGAIN + if self._commit_seqno == self.idl.change_seqno: + self._status = Transaction.AGAIN_WAIT + else: + self._status = Transaction.AGAIN_NOW else: self._status = Transaction.SUCCESS diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index cce6197e..a8ce3ca5 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -221,7 +221,7 @@ OVSDB_CHECK_IDL([simple idl, handling verification failure], 000: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 001: commit, status=success 002: {"error":null,"result":[{"count":1}]} -003: commit, status=try again +003: commit, status=try again now 004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 005: commit, status=success diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index a8b10118..1f0f485e 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -138,9 +138,9 @@ static void parse_command(int argc, char *argv[], struct vsctl_command *); static const struct vsctl_command_syntax *find_command(const char *name); static void run_prerequisites(struct vsctl_command[], size_t n_commands, struct ovsdb_idl *); -static void do_vsctl(const char *args, - struct vsctl_command *, size_t n_commands, - struct ovsdb_idl *); +static enum ovsdb_idl_txn_status do_vsctl(const char *args, + struct vsctl_command *, size_t n, + struct ovsdb_idl *); static const struct vsctl_table_class *get_table(const char *table_name); static void set_column(const struct vsctl_table_class *, @@ -156,6 +156,7 @@ int main(int argc, char *argv[]) { extern struct vlog_module VLM_reconnect; + enum ovsdb_idl_txn_status status; struct ovsdb_idl *idl; struct vsctl_command *commands; size_t n_commands; @@ -184,13 +185,16 @@ main(int argc, char *argv[]) run_prerequisites(commands, n_commands, idl); /* Now execute the commands. */ + status = TXN_AGAIN_WAIT; for (;;) { - if (ovsdb_idl_run(idl)) { - do_vsctl(args, commands, n_commands, idl); + if (ovsdb_idl_run(idl) || status == TXN_AGAIN_NOW) { + status = do_vsctl(args, commands, n_commands, idl); } - ovsdb_idl_wait(idl); - poll_block(); + if (status != TXN_AGAIN_NOW) { + ovsdb_idl_wait(idl); + poll_block(); + } } } @@ -3578,7 +3582,7 @@ run_prerequisites(struct vsctl_command *commands, size_t n_commands, } } -static void +static enum ovsdb_idl_txn_status do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, struct ovsdb_idl *idl) { @@ -3625,6 +3629,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, vsctl_context_done(&ctx, c); if (ctx.try_again) { + status = TXN_AGAIN_WAIT; goto try_again; } } @@ -3681,7 +3686,8 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, case TXN_SUCCESS: break; - case TXN_TRY_AGAIN: + case TXN_AGAIN_WAIT: + case TXN_AGAIN_NOW: goto try_again; case TXN_ERROR: @@ -3765,6 +3771,8 @@ try_again: free(c->table); } free(error); + + return status; } static const struct vsctl_command_syntax all_commands[] = { -- 2.30.2