From: Ben Pfaff Date: Wed, 24 Dec 2008 19:01:37 +0000 (-0800) Subject: vswitchd: Automatically restart secchan if it dies. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3b1832f59931c0c1d22e0f4d2557ff58b6a4261d;p=openvswitch vswitchd: Automatically restart secchan if it dies. --- diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 35bd3d7e..6b782bdb 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -99,13 +99,22 @@ struct port { struct bridge { struct list node; /* Node in global list of bridges. */ char *name; /* User-specified arbitrary name. */ - struct process *secchan; /* The "secchan" subprocess. */ - struct rconn *rconn; /* Connection to secchan subprocess. */ int txqlen; /* # of messages queued to send on 'rconn'. */ struct mac_learning *ml; /* MAC learning table, or null not to learn. */ int flow_idle_time; /* Idle time for flows we set up. */ bool sent_config; /* Successfully sent config request? */ + /* Secure channel. */ + enum { + SC_UNSTARTED, /* Not yet started. */ + SC_RUNNING, /* Started. */ + SC_DYING, /* 'rconn' is closed but process not dead. */ + SC_DEAD /* Died too many times, giving up. */ + } sc_state; + int sc_retries; /* Number of times secchan restarted. */ + struct process *secchan; /* The "secchan" subprocess. */ + struct rconn *rconn; /* Connection to secchan subprocess. */ + /* Kernel datapath information. */ int dp_idx; /* Kernel datapath index. */ struct iface *ifaces[DP_MAX_PORTS]; /* Index by kernel datapath port no. */ @@ -302,7 +311,9 @@ bridge_wait(void) if (!bridge_is_backlogged(br)) { rconn_recv_wait(br->rconn); } - process_wait(br->secchan); + if (br->secchan) { + process_wait(br->secchan); + } if (br->ml) { mac_learning_wait(br->ml); } @@ -313,24 +324,29 @@ bridge_wait(void) static int allocate_dp_idx(void); static void sanitize_opp(struct ofp_phy_port *opp); +static void run_secchan(struct bridge *); +static void start_secchan(struct bridge *); static struct bridge * bridge_create(const char *name) { struct bridge *br; - struct stat s; - struct svec argv; - char *dp_name; - int sockets[2]; int retval; assert(!bridge_lookup(name)); br = xcalloc(1, sizeof *br); list_push_back(&all_bridges, &br->node); br->name = xstrdup(name); + br->txqlen = 0; br->ml = mac_learning_create(); br->flow_idle_time = 5; br->sent_config = false; + + br->sc_state = SC_UNSTARTED; + br->sc_retries = 0; + br->secchan = NULL; + br->rconn = rconn_create(30, 1); + br->ft = ft_create(); /* Create kernel datapath. */ @@ -356,22 +372,93 @@ bridge_create(const char *name) break; } - VLOG_WARN("created bridge %s on datapath nl:%d", - br->name, br->dp_idx); + VLOG_WARN("created bridge %s on datapath nl:%d", br->name, br->dp_idx); + + return br; +} + +static void +log_secchan_died(enum vlog_level level, struct bridge *br, bool expected) +{ + char *status = process_status_msg(process_status(br->secchan)); + vlog(THIS_MODULE, level, "%s: secchan subprocess with pid %ld died%s (%s)", + br->name, (long int) process_pid(br->secchan), + expected ? "" : " unexpectedly", status); + free(status); +} + +static void +run_secchan(struct bridge *br) +{ + switch (br->sc_state) { + case SC_UNSTARTED: + start_secchan(br); + break; + + case SC_RUNNING: + if (process_exited(br->secchan)) { + log_secchan_died(VLL_ERR, br, false); + br->sc_state = SC_UNSTARTED; + } else if (!rconn_is_alive(br->rconn)) { + VLOG_ERR("%s: connection to secchan closed unexpectedly, " + "killing secchan", br->name); + process_kill(br->secchan, SIGTERM); + br->sc_state = SC_DYING; + } + break; + + case SC_DYING: + if (process_exited(br->secchan)) { + log_secchan_died(VLL_WARN, br, true); + br->sc_state = SC_UNSTARTED; + } + break; + + case SC_DEAD: + /* Nothing to do. */ + break; + } +} + +static void +start_secchan(struct bridge *br) +{ + struct svec argv; + int sockets[2]; + struct stat s; + char *dp_name; + int retval; + + if (br->sc_retries >= 5) { + VLOG_ERR("%s: restarted secchan maximum number of %d times, disabling", + br->name, br->sc_retries); + br->sc_state = SC_DEAD; + return; + } + br->sc_retries++; + + /* Clean up vestiges of any previous secchan. */ + rconn_disconnect(br->rconn); + if (br->secchan) { + process_destroy(br->secchan); + br->secchan = NULL; + } /* Create socketpair for communicating with secchan subprocess. */ if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sockets)) { - VLOG_ERR("failed to create socket pair: %s", strerror(errno)); - /* XXX */ + VLOG_ERR("%s: failed to create socket pair: %s", + br->name, strerror(errno)); + goto error; } set_nonblocking(sockets[0]); set_nonblocking(sockets[1]); + /* Connect to our end of the socketpair. */ dp_name = xasprintf("fd:%d", sockets[0]); - br->rconn = rconn_new(dp_name, 30, 1); + rconn_connect(br->rconn, dp_name); free(dp_name); - /* Start secchan subprocess. */ + /* Assemble command-line arguments. */ svec_init(&argv); svec_add(&argv, "secchan"); svec_add(&argv, "--out-of-band"); @@ -392,15 +479,20 @@ bridge_create(const char *name) svec_add_nocopy(&argv, xasprintf("fd:%d", sockets[1])); svec_terminate(&argv); + /* Start secchan. */ retval = process_start(argv.names, &sockets[1], 1, &br->secchan); close(sockets[1]); if (retval) { - VLOG_ERR("failed to start secchan for datapath nl:%d: %s", - br->dp_idx, strerror(retval)); - bridge_destroy(br); - return NULL; + VLOG_ERR("%s: failed to start secchan for datapath nl:%d: %s", + br->name, br->dp_idx, strerror(retval)); + goto error; } - return br; + br->sc_state = SC_RUNNING; + br->sent_config = false; + return; + +error: + br->sc_state = SC_UNSTARTED; } static void @@ -525,19 +617,7 @@ bridge_run_one(struct bridge *br) } } } - - if (process_exited(br->secchan)) { - int status = process_status(br->secchan); - VLOG_ERR("%s: secchan subprocess with pid %ld died unexpectedly (%s)", - br->name, (long int) process_pid(br->secchan), - process_status_msg(status)); - bridge_destroy(br); - /* XXX restart */ - } else if (!rconn_is_alive(br->rconn)) { - VLOG_ERR("%s: connection to secchan closed unexpectedly", br->name); - bridge_destroy(br); - /* XXX kill and restart secchan */ - } + run_secchan(br); } static void @@ -573,6 +653,12 @@ bridge_reconfigure_one(struct bridge *br) for (i = 0; i < br->n_ports; i++) { port_reconfigure(br->ports[i]); } + + /* Revive secchan if it's dead. */ + if (br->sc_state == SC_DEAD) { + br->sc_retries = 0; + br->sc_state = SC_UNSTARTED; + } } static void