int error = vconn_recv(rc->vconn, &buffer);
if (!error) {
copy_to_monitor(rc, buffer);
- if (is_admitted_msg(buffer)
+ if (rc->probably_admitted || is_admitted_msg(buffer)
|| time_now() - rc->last_connected >= 30) {
rc->probably_admitted = true;
rc->last_admitted = time_now();
return is_connected_state(rconn->state);
}
-/* Returns 0 if 'rconn' is connected. Otherwise, if 'rconn' is in a "failure
- * mode" (that is, it is not connected), returns the number of seconds that it
- * has been in failure mode, ignoring any times that it connected but the
- * controller's admission control policy caused it to be quickly
- * disconnected. */
+/* Returns true if 'rconn' is connected and thought to have been accepted by
+ * the peer's admission-control policy. */
+bool
+rconn_is_admitted(const struct rconn *rconn)
+{
+ return (rconn_is_connected(rconn)
+ && rconn->last_admitted >= rconn->last_connected);
+}
+
+/* Returns 0 if 'rconn' is currently connected and considered to have been
+ * accepted by the peer's admission-control policy, otherwise the number of
+ * seconds since 'rconn' was last in such a state. */
int
rconn_failure_duration(const struct rconn *rconn)
{
- return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted;
+ return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted;
}
/* Returns the IP address of the peer, or 0 if the peer's IP address is not
const char *rconn_get_name(const struct rconn *);
bool rconn_is_alive(const struct rconn *);
bool rconn_is_connected(const struct rconn *);
+bool rconn_is_admitted(const struct rconn *);
int rconn_failure_duration(const struct rconn *);
bool rconn_is_connectivity_questionable(struct rconn *);
#include "flow.h"
#include "mac-learning.h"
#include "odp-util.h"
+#include "ofpbuf.h"
#include "ofproto.h"
+#include "pktbuf.h"
+#include "poll-loop.h"
#include "rconn.h"
#include "status.h"
#include "timeval.h"
+#include "vconn.h"
#define THIS_MODULE VLM_fail_open
#include "vlog.h"
+/*
+ * Fail-open mode.
+ *
+ * In fail-open mode, the switch detects when the controller cannot be
+ * contacted or when the controller is dropping switch connections because the
+ * switch does not pass its admission control policy. In those situations the
+ * switch sets up flows itself using the "normal" action.
+ *
+ * There is a little subtlety to implementation, to properly handle the case
+ * where the controller allows switch connections but drops them a few seconds
+ * later for admission control reasons. Because of this case, we don't want to
+ * just stop setting up flows when we connect to the controller: if we did,
+ * then new flow setup and existing flows would stop during the duration of
+ * connection to the controller, and thus the whole network would go down for
+ * that period of time.
+ *
+ * So, instead, we add some special caseswhen we are connected to a controller,
+ * but not yet sure that it has admitted us:
+ *
+ * - We set up flows immediately ourselves, but simultaneously send out an
+ * OFPT_PACKET_IN to the controller. We put a special bogus buffer-id in
+ * these OFPT_PACKET_IN messages so that duplicate packets don't get sent
+ * out to the network when the controller replies.
+ *
+ * - We also send out OFPT_PACKET_IN messages for totally bogus packets
+ * every so often, in case no real new flows are arriving in the network.
+ *
+ * - We don't flush the flow table at the time we connect, because this
+ * could cause network stuttering in a switch with lots of flows or very
+ * high-bandwidth flows by suddenly throwing lots of packets down to
+ * userspace.
+ */
+
struct fail_open {
struct ofproto *ofproto;
struct rconn *controller;
int trigger_duration;
int last_disconn_secs;
struct status_category *ss_cat;
+ long long int next_bogus_packet_in;
+ struct rconn_packet_counter *bogus_packet_counter;
};
-/* Causes the switch to enter or leave fail-open mode, if appropriate. */
-void
-fail_open_run(struct fail_open *fo)
+/* Returns true if 'fo' should be in fail-open mode, otherwise false. */
+static inline bool
+should_fail_open(const struct fail_open *fo)
{
- int disconn_secs = rconn_failure_duration(fo->controller);
- bool open = disconn_secs >= fo->trigger_duration;
- if (open != (fo->last_disconn_secs != 0)) {
- if (!open) {
- flow_t flow;
+ return rconn_failure_duration(fo->controller) >= fo->trigger_duration;
+}
+
+/* Returns true if 'fo' is currently in fail-open mode, otherwise false. */
+bool
+fail_open_is_active(const struct fail_open *fo)
+{
+ return fo->last_disconn_secs != 0;
+}
- VLOG_WARN("No longer in fail-open mode");
- fo->last_disconn_secs = 0;
+static void
+send_bogus_packet_in(struct fail_open *fo)
+{
+ uint8_t mac[ETH_ADDR_LEN];
+ struct ofpbuf *opi;
+ struct ofpbuf b;
- memset(&flow, 0, sizeof flow);
- ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, 70000);
- } else {
+ /* Compose ofp_packet_in. */
+ ofpbuf_init(&b, 128);
+ eth_addr_random(mac);
+ compose_benign_packet(&b, "Open vSwitch Controller Probe", 0xa033, mac);
+ opi = make_packet_in(pktbuf_get_null(), OFPP_LOCAL, OFPR_NO_MATCH, &b, 64);
+ ofpbuf_uninit(&b);
+
+ /* Send. */
+ rconn_send_with_limit(fo->controller, opi, fo->bogus_packet_counter, 1);
+}
+
+/* Enter fail-open mode if we should be in it. Handle reconnecting to a
+ * controller from fail-open mode. */
+void
+fail_open_run(struct fail_open *fo)
+{
+ /* Enter fail-open mode if 'fo' is not in it but should be. */
+ if (should_fail_open(fo)) {
+ int disconn_secs = rconn_failure_duration(fo->controller);
+ if (!fail_open_is_active(fo)) {
VLOG_WARN("Could not connect to controller (or switch failed "
"controller's post-connection admission control "
"policy) for %d seconds, failing open", disconn_secs);
* fail-open rule from fail_open_flushed() when
* ofproto_flush_flows() calls back to us. */
ofproto_flush_flows(fo->ofproto);
+ } else if (disconn_secs > fo->last_disconn_secs + 60) {
+ VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
+ "from controller", disconn_secs);
+ fo->last_disconn_secs = disconn_secs;
}
- } else if (open && disconn_secs > fo->last_disconn_secs + 60) {
- VLOG_INFO("Still in fail-open mode after %d seconds disconnected "
- "from controller", disconn_secs);
- fo->last_disconn_secs = disconn_secs;
}
+
+ /* Schedule a bogus packet-in if we're connected and in fail-open. */
+ if (fail_open_is_active(fo)) {
+ if (rconn_is_connected(fo->controller)) {
+ bool expired = time_msec() >= fo->next_bogus_packet_in;
+ if (expired) {
+ send_bogus_packet_in(fo);
+ }
+ if (expired || fo->next_bogus_packet_in == LLONG_MAX) {
+ fo->next_bogus_packet_in = time_msec() + 2000;
+ }
+ } else {
+ fo->next_bogus_packet_in = LLONG_MAX;
+ }
+ }
+
}
+/* If 'fo' is currently in fail-open mode and its rconn has connected to the
+ * controller, exits fail open mode. */
void
-fail_open_wait(struct fail_open *fo UNUSED)
+fail_open_maybe_recover(struct fail_open *fo)
{
- /* Nothing to do. */
+ if (fail_open_is_active(fo) && rconn_is_admitted(fo->controller)) {
+ flow_t flow;
+
+ VLOG_WARN("No longer in fail-open mode");
+ fo->last_disconn_secs = 0;
+ fo->next_bogus_packet_in = LLONG_MAX;
+
+ memset(&flow, 0, sizeof flow);
+ ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY);
+ }
+}
+
+void
+fail_open_wait(struct fail_open *fo)
+{
+ if (fo->next_bogus_packet_in != LLONG_MAX) {
+ poll_timer_wait(fo->next_bogus_packet_in - time_msec());
+ }
}
void
action.output.len = htons(sizeof action);
action.output.port = htons(OFPP_NORMAL);
memset(&flow, 0, sizeof flow);
- ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, 70000,
+ ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY,
&action, 1, 0);
}
}
fo->last_disconn_secs = 0;
fo->ss_cat = switch_status_register(switch_status, "fail-open",
fail_open_status_cb, fo);
+ fo->next_bogus_packet_in = LLONG_MAX;
+ fo->bogus_packet_counter = rconn_packet_counter_create();
return fo;
}
if (fo) {
/* We don't own fo->controller. */
switch_status_unregister(fo->ss_cat);
+ rconn_packet_counter_destroy(fo->bogus_packet_counter);
free(fo);
}
}
struct rconn;
struct switch_status;
+/* Priority of the rule added by the fail-open subsystem when a switch enters
+ * fail-open mode. This priority value uniquely identifies a fail-open flow
+ * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch
+ * creates flows with this priority). */
+#define FAIL_OPEN_PRIORITY 70000
+
struct fail_open *fail_open_create(struct ofproto *, int trigger_duration,
struct switch_status *,
struct rconn *controller);
void fail_open_set_trigger_duration(struct fail_open *, int trigger_duration);
void fail_open_destroy(struct fail_open *);
void fail_open_wait(struct fail_open *);
+bool fail_open_is_active(const struct fail_open *);
void fail_open_run(struct fail_open *);
+void fail_open_maybe_recover(struct fail_open *);
void fail_open_flushed(struct fail_open *);
#endif /* fail-open.h */
}
}
}
- if (p->fail_open) {
- fail_open_run(p->fail_open);
- }
pinsched_run(p->miss_sched, send_packet_in_miss, p);
pinsched_run(p->action_sched, send_packet_in_action, p);
if (p->executer) {
ofconn_run(ofconn, p);
}
+ /* Fail-open maintenance. Do this after processing the ofconns since
+ * fail-open checks the status of the controller rconn. */
+ if (p->fail_open) {
+ fail_open_run(p->fail_open);
+ }
+
for (i = 0; i < p->n_listeners; i++) {
struct vconn *vconn;
int retval;
if (!of_msg) {
break;
}
+ if (p->fail_open) {
+ fail_open_maybe_recover(p->fail_open);
+ }
handle_openflow(ofconn, p, of_msg);
ofpbuf_delete(of_msg);
}
if (opo->buffer_id != htonl(UINT32_MAX)) {
error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id),
&buffer, &in_port);
- if (error) {
+ if (error || !buffer) {
return error;
}
payload = *buffer;
rule_execute(p, rule, &payload, &flow);
rule_reinstall(p, rule);
- ofpbuf_delete(packet);
+
+ if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY
+ && rconn_is_connected(p->controller->rconn)) {
+ /*
+ * Extra-special case for fail-open mode.
+ *
+ * We are in fail-open mode and the packet matched the fail-open rule,
+ * but we are connected to a controller too. We should send the packet
+ * up to the controller in the hope that it will try to set up a flow
+ * and thereby allow us to exit fail-open.
+ *
+ * See the top-level comment in fail-open.c for more information.
+ */
+ pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p);
+ } else {
+ ofpbuf_delete(packet);
+ }
}
\f
static void
send_packet_in_miss(struct ofpbuf *packet, void *p_)
{
struct ofproto *p = p_;
+ bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open);
struct ofconn *ofconn;
struct ofpbuf payload;
struct odp_msg *msg;
payload.size = msg->length - sizeof *msg;
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
if (ofconn->miss_send_len) {
- uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload,
- msg->port);
+ struct pktbuf *pb = ofconn->pktbuf;
+ uint32_t buffer_id = (in_fail_open
+ ? pktbuf_get_null()
+ : pktbuf_save(pb, &payload, msg->port));
int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
: UINT32_MAX);
do_send_packet_in(ofconn, buffer_id, packet, send_len);
struct pktbuf {
struct packet packets[PKTBUF_CNT];
unsigned int buffer_idx;
+ unsigned int null_idx;
};
int
}
}
+static unsigned int
+make_id(unsigned int buffer_idx, unsigned int cookie)
+{
+ return buffer_idx | (cookie << PKTBUF_BITS);
+}
+
+/* Attempts to allocate an OpenFlow packet buffer id within 'pb'. The packet
+ * buffer will store a copy of 'buffer' and the port number 'in_port', which
+ * should be the datapath port number on which 'buffer' was received.
+ *
+ * If successful, returns the packet buffer id (a number other than
+ * UINT32_MAX). pktbuf_retrieve() can later be used to retrieve the buffer and
+ * its input port number (buffers do expire after a time, so this is not
+ * guaranteed to be true forever). On failure, returns UINT32_MAX.
+ *
+ * The caller retains ownership of 'buffer'. */
uint32_t
pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
{
p->buffer = ofpbuf_clone(buffer);
p->timeout = time_msec() + OVERWRITE_MSECS;
p->in_port = in_port;
- return (p - pb->packets) | (p->cookie << PKTBUF_BITS);
+ return make_id(p - pb->packets, p->cookie);
+}
+
+/*
+ * Allocates and returns a "null" packet buffer id. The returned packet buffer
+ * id is considered valid by pktbuf_retrieve(), but it is not associated with
+ * actual buffered data.
+ *
+ * This function is always successful.
+ *
+ * This is useful in one special case: with the current OpenFlow design, the
+ * "fail-open" code cannot always know whether a connection to a controller is
+ * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request,
+ * but at that point the packet in question has already been forwarded (since
+ * we are still in "fail-open" mode). If the packet was buffered in the usual
+ * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate
+ * packet in the network. Null packet buffer ids identify such a packet that
+ * has already been forwarded, so that Open vSwitch can quietly ignore the
+ * request to re-send it. (After that happens, the switch exits fail-open
+ * mode.)
+ *
+ * See the top-level comment in fail-open.c for an overview.
+ */
+uint32_t
+pktbuf_get_null(void)
+{
+ return make_id(0, COOKIE_MAX);
}
+/* Attempts to retrieve a saved packet with the given 'id' from 'pb'. Returns
+ * 0 if successful, otherwise an OpenFlow error code constructed with
+ * ofp_mkerr().
+ *
+ * On success, ordinarily stores the buffered packet in '*bufferp' and the
+ * datapath port number on which the packet was received in '*in_port'. The
+ * caller becomes responsible for freeing the buffer. However, if 'id'
+ * identifies a "null" packet buffer (created with pktbuf_get_null()), stores
+ * NULL in '*bufferp' and -1 in '*in_port'.
+ *
+ * On failure, stores NULL in in '*bufferp' and -1 in '*in_port'. */
int
pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp,
uint16_t *in_port)
VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id);
error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY);
}
- } else {
+ } else if (id >> PKTBUF_BITS != COOKIE_MAX) {
COVERAGE_INC(pktbuf_bad_cookie);
VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32,
id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS));
error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE);
+ } else {
+ COVERAGE_INC(pktbuf_null_cookie);
+ VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal "
+ "if the switch was recently in fail-open mode)", id);
+ error = 0;
}
*bufferp = NULL;
*in_port = -1;
struct pktbuf *pktbuf_create(void);
void pktbuf_destroy(struct pktbuf *);
uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port);
+uint32_t pktbuf_get_null(void);
int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp,
uint16_t *in_port);
void pktbuf_discard(struct pktbuf *, uint32_t id);
This option is most useful for debugging. It reduces switching
performance, so it should not be used in production.
+.IP "\fB--mute\fR"
+Prevents ovs\-controller from replying to any OpenFlow messages sent
+to it by switches.
+.IP
+This option is only for debugging the Open vSwitch implementation of
+``fail open'' mode. It must not be used in production.
+
.so lib/daemon.man
.so lib/vlog.man
.so lib/common.man
/* --max-idle: Maximum idle time, in seconds, before flows expire. */
static int max_idle = 60;
+/* --mute: If true, accept connections from switches but do not reply to any
+ * of their messages (for debugging fail-open mode). */
+static bool mute = false;
+
static int do_switching(struct switch_ *);
static void new_switch(struct switch_ *, struct vconn *, const char *name);
static void parse_options(int argc, char *argv[]);
msg = rconn_recv(sw->rconn);
if (msg) {
- lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+ if (!mute) {
+ lswitch_process_packet(sw->lswitch, sw->rconn, msg);
+ }
ofpbuf_delete(msg);
}
rconn_run(sw->rconn);
enum {
OPT_MAX_IDLE = UCHAR_MAX + 1,
OPT_PEER_CA_CERT,
+ OPT_MUTE,
VLOG_OPTION_ENUMS
};
static struct option long_options[] = {
{"hub", no_argument, 0, 'H'},
{"noflow", no_argument, 0, 'n'},
{"max-idle", required_argument, 0, OPT_MAX_IDLE},
+ {"mute", no_argument, 0, OPT_MUTE},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'},
DAEMON_LONG_OPTIONS,
setup_flows = false;
break;
+ case OPT_MUTE:
+ mute = true;
+ break;
+
case OPT_MAX_IDLE:
if (!strcmp(optarg, "permanent")) {
max_idle = OFP_FLOW_PERMANENT;