From: Ben Pfaff Date: Fri, 6 Mar 2009 22:34:46 +0000 (-0800) Subject: Keep secchan and vswitchd from consuming 100% CPU when a datapath is deleted. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c583210b82600f7c12dd7ce28af4b27f3094313a;p=openvswitch Keep secchan and vswitchd from consuming 100% CPU when a datapath is deleted. Before this commit, "dpctl deldp x" would cause secchan or vswitchd to consume 100% CPU if they were responsible for the given datapath. This fixes the problem. --- diff --git a/secchan/main.c b/secchan/main.c index 43779617..f42fab2a 100644 --- a/secchan/main.c +++ b/secchan/main.c @@ -203,10 +203,11 @@ main(int argc, char *argv[]) reconfigure(ofproto, s.br_name); } - /* Do work. */ - ofproto_run(ofproto); + error = ofproto_run(ofproto); + if (error) { + ofp_fatal(error, "unrecoverable datapath error"); + } - /* Wait for something to happen. */ ofproto_wait(ofproto); signal_wait(sighup); poll_block(); diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 9092723c..ef0f3d6b 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -605,7 +605,7 @@ ofproto_destroy(struct ofproto *p) free(p); } -void +int ofproto_run(struct ofproto *p) { struct ofconn *ofconn, *next_ofconn; @@ -619,6 +619,15 @@ ofproto_run(struct ofproto *p) error = dpif_recv(&p->dpif, &buf); if (error) { + if (error == ENODEV) { + /* Someone destroyed the datapath behind our back. The caller + * better destroy us and give up, because we're just going to + * spin from here on out. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "dp%u: datapath was destroyed externally", + p->dpif.minor); + return ENODEV; + } break; } @@ -693,6 +702,8 @@ ofproto_run(struct ofproto *p) classifier_for_each(&p->cls, CLS_INC_EXACT, revalidate_subrule_cb, p); p->need_revalidate = false; } + + return 0; } void diff --git a/secchan/ofproto.h b/secchan/ofproto.h index 7a92e1c8..54bca7ad 100644 --- a/secchan/ofproto.h +++ b/secchan/ofproto.h @@ -44,7 +44,7 @@ struct svec; int ofproto_create(const char *datapath, struct ofproto **ofprotop); void ofproto_destroy(struct ofproto *); -void ofproto_run(struct ofproto *); +int ofproto_run(struct ofproto *); void ofproto_wait(struct ofproto *); bool ofproto_is_alive(const struct ofproto *); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 0d1aedba..91321886 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -201,7 +201,7 @@ static struct bridge *bridge_create(const char *name); static void bridge_destroy(struct bridge *); static struct bridge *bridge_lookup(const char *name); static int if_up(const char *netdev_name); -static void bridge_run_one(struct bridge *); +static int bridge_run_one(struct bridge *); static void bridge_reconfigure_one(struct bridge *); static void bridge_get_all_ifaces(const struct bridge *, struct svec *ifaces); static bool bridge_is_backlogged(const struct bridge *); @@ -532,14 +532,25 @@ bridge_pick_local_hw_addr(struct bridge *br, struct iface *local_iface) bridge_set_local_hw_addr(br, local_iface, ea); } -void +int bridge_run(void) { struct bridge *br, *next; + int retval; + retval = 0; LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { - bridge_run_one(br); + int error = bridge_run_one(br); + if (error) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "bridge %s: datapath was destroyed externally, " + "forcing reconfiguration", br->name); + if (!retval) { + retval = error; + } + } } + return retval; } void @@ -765,14 +776,14 @@ send_features_request(struct bridge *br) } } -static void +static int bridge_run_one(struct bridge *br) { int iteration; + int error; if (br->controller) { - ofproto_run(br->ofproto); - return; + return ofproto_run(br->ofproto); } rconn_run(br->rconn); @@ -807,7 +818,7 @@ bridge_run_one(struct bridge *br) /* Start or restart switch if necessary. */ connect_ofproto(br); - ofproto_run(br->ofproto); + error = ofproto_run(br->ofproto); /* Now revalidate any flows that need it. */ if (br->flush || !tag_set_is_empty(&br->revalidate_set)) { @@ -822,6 +833,8 @@ bridge_run_one(struct bridge *br) } tag_set_init(&br->revalidate_set); br->flush = false; + + return error; } static void diff --git a/vswitchd/bridge.h b/vswitchd/bridge.h index 1e484818..f8afeb82 100644 --- a/vswitchd/bridge.h +++ b/vswitchd/bridge.h @@ -32,7 +32,7 @@ void bridge_init(void); void bridge_reconfigure(void); -void bridge_run(void); +int bridge_run(void); void bridge_wait(void); bool bridge_exists(const char *); diff --git a/vswitchd/vswitchd.c b/vswitchd/vswitchd.c index 1890b091..c9e4bbc0 100644 --- a/vswitchd/vswitchd.c +++ b/vswitchd/vswitchd.c @@ -67,6 +67,7 @@ int main(int argc, char *argv[]) { struct signal *sighup; + bool need_reconfigure; int retval; set_program_name(argv[0]); @@ -90,12 +91,16 @@ main(int argc, char *argv[]) cfg_read(); bridge_init(); + need_reconfigure = false; for (;;) { - if (signal_poll(sighup)) { + if (need_reconfigure || signal_poll(sighup)) { + need_reconfigure = false; vlog_reopen_log_file(); reconfigure(); } - bridge_run(); + if (bridge_run()) { + need_reconfigure = true; + } if (brc_enabled) { brc_run();