/*
* Copyright (c) 2009, 2010 Nicira Networks.
+ * Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
struct hmap_node hmap_node; /* In struct ofproto's "controllers" map. */
struct discovery *discovery; /* Controller discovery object, if enabled. */
struct status_category *ss; /* Switch status category. */
+ enum ofproto_band band; /* In-band or out-of-band? */
};
/* We use OFPR_NO_MATCH and OFPR_ACTION as indexes into struct ofconn's
static void ofconn_destroy(struct ofconn *);
static void ofconn_run(struct ofconn *, struct ofproto *);
static void ofconn_wait(struct ofconn *);
+static bool ofconn_receives_async_msgs(const struct ofconn *);
+
static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn,
struct rconn_packet_counter *counter);
/* Configuration. */
struct switch_status *switch_status;
- struct in_band *in_band;
struct fail_open *fail_open;
struct netflow *netflow;
struct ofproto_sflow *sflow;
+ /* In-band control. */
+ struct in_band *in_band;
+ long long int next_in_band_update;
+ struct sockaddr_in *extra_in_band_remotes;
+ size_t n_extra_remotes;
+
/* Flow table. */
struct classifier cls;
bool need_revalidate;
int probe_interval;
int i;
+ ofconn->band = (is_in_band_controller(c)
+ ? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND);
+
rconn_set_max_backoff(ofconn->rconn, c->max_backoff);
probe_interval = c->probe_interval ? MAX(c->probe_interval, 5) : 0;
return NULL;
}
+static void
+update_in_band_remotes(struct ofproto *ofproto)
+{
+ const struct ofconn *ofconn;
+ struct sockaddr_in *addrs;
+ size_t max_addrs, n_addrs;
+ bool discovery;
+ size_t i;
+
+ /* Allocate enough memory for as many remotes as we could possibly have. */
+ max_addrs = ofproto->n_extra_remotes + hmap_count(&ofproto->controllers);
+ addrs = xmalloc(max_addrs * sizeof *addrs);
+ n_addrs = 0;
+
+ /* Add all the remotes. */
+ discovery = false;
+ HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) {
+ struct sockaddr_in *sin = &addrs[n_addrs];
+
+ sin->sin_addr.s_addr = rconn_get_remote_ip(ofconn->rconn);
+ if (sin->sin_addr.s_addr) {
+ sin->sin_port = rconn_get_remote_port(ofconn->rconn);
+ n_addrs++;
+ }
+ if (ofconn->discovery) {
+ discovery = true;
+ }
+ }
+ for (i = 0; i < ofproto->n_extra_remotes; i++) {
+ addrs[n_addrs++] = ofproto->extra_in_band_remotes[i];
+ }
+
+ /* Create or update or destroy in-band.
+ *
+ * Ordinarily we only enable in-band if there's at least one remote
+ * address, but discovery needs the in-band rules for DHCP to be installed
+ * even before we know any remote addresses. */
+ if (n_addrs || discovery) {
+ if (!ofproto->in_band) {
+ in_band_create(ofproto, ofproto->dpif, ofproto->switch_status,
+ &ofproto->in_band);
+ }
+ in_band_set_remotes(ofproto->in_band, addrs, n_addrs);
+ ofproto->next_in_band_update = time_msec() + 1000;
+ } else {
+ in_band_destroy(ofproto->in_band);
+ ofproto->in_band = NULL;
+ }
+
+ /* Clean up. */
+ free(addrs);
+}
+
void
ofproto_set_controllers(struct ofproto *p,
const struct ofproto_controller *controllers,
size_t n_controllers)
{
struct shash new_controllers;
- struct rconn **in_band_rconns;
enum ofproto_fail_mode fail_mode;
struct ofconn *ofconn, *next;
bool ss_exists;
- size_t n_in_band;
size_t i;
shash_init(&new_controllers);
}
}
- in_band_rconns = xmalloc(n_controllers * sizeof *in_band_rconns);
- n_in_band = 0;
fail_mode = OFPROTO_FAIL_STANDALONE;
ss_exists = false;
HMAP_FOR_EACH_SAFE (ofconn, next, struct ofconn, hmap_node,
ofconn_destroy(ofconn);
} else {
update_controller(ofconn, c);
-
if (ofconn->ss) {
ss_exists = true;
}
- if (is_in_band_controller(c)) {
- in_band_rconns[n_in_band++] = ofconn->rconn;
- }
-
if (c->fail == OFPROTO_FAIL_SECURE) {
fail_mode = OFPROTO_FAIL_SECURE;
}
}
shash_destroy(&new_controllers);
- if (n_in_band) {
- if (!p->in_band) {
- in_band_create(p, p->dpif, p->switch_status, &p->in_band);
- }
- if (p->in_band) {
- in_band_set_remotes(p->in_band, in_band_rconns, n_in_band);
- }
- } else {
- in_band_destroy(p->in_band);
- p->in_band = NULL;
- }
- free(in_band_rconns);
+ update_in_band_remotes(p);
if (!hmap_is_empty(&p->controllers)
&& fail_mode == OFPROTO_FAIL_STANDALONE) {
}
}
+static bool
+any_extras_changed(const struct ofproto *ofproto,
+ const struct sockaddr_in *extras, size_t n)
+{
+ size_t i;
+
+ if (n != ofproto->n_extra_remotes) {
+ return true;
+ }
+
+ for (i = 0; i < n; i++) {
+ const struct sockaddr_in *old = &ofproto->extra_in_band_remotes[i];
+ const struct sockaddr_in *new = &extras[i];
+
+ if (old->sin_addr.s_addr != new->sin_addr.s_addr ||
+ old->sin_port != new->sin_port) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Sets the 'n' TCP port addresses in 'extras' as ones to which 'ofproto''s
+ * in-band control should guarantee access, in the same way that in-band
+ * control guarantees access to OpenFlow controllers. */
+void
+ofproto_set_extra_in_band_remotes(struct ofproto *ofproto,
+ const struct sockaddr_in *extras, size_t n)
+{
+ if (!any_extras_changed(ofproto, extras, n)) {
+ return;
+ }
+
+ free(ofproto->extra_in_band_remotes);
+ ofproto->n_extra_remotes = n;
+ ofproto->extra_in_band_remotes = xmemdup(extras, n * sizeof *extras);
+
+ update_in_band_remotes(ofproto);
+}
+
void
ofproto_set_desc(struct ofproto *p,
const char *mfr_desc, const char *hw_desc,
in_band_destroy(p->in_band);
p->in_band = NULL;
+ free(p->extra_in_band_remotes);
ofproto_flush_flows(p);
classifier_destroy(&p->cls);
}
}
+/* Returns a "preference level" for snooping 'ofconn'. A higher return value
+ * means that 'ofconn' is more interesting for monitoring than a lower return
+ * value. */
+static int
+snoop_preference(const struct ofconn *ofconn)
+{
+ switch (ofconn->role) {
+ case NX_ROLE_MASTER:
+ return 3;
+ case NX_ROLE_OTHER:
+ return 2;
+ case NX_ROLE_SLAVE:
+ return 1;
+ default:
+ /* Shouldn't happen. */
+ return 0;
+ }
+}
+
/* One of ofproto's "snoop" pvconns has accepted a new connection on 'vconn'.
* Connects this vconn to a controller. */
static void
add_snooper(struct ofproto *ofproto, struct vconn *vconn)
{
- struct ofconn *ofconn;
+ struct ofconn *ofconn, *best;
- /* Arbitrarily pick the first controller in the list for monitoring. We
- * could do something smarter or more flexible later, if it ever proves
- * useful. */
+ /* Pick a controller for monitoring. */
+ best = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
- if (ofconn->type == OFCONN_CONTROLLER) {
- rconn_add_monitor(ofconn->rconn, vconn);
- return;
+ if (ofconn->type == OFCONN_CONTROLLER
+ && (!best || snoop_preference(ofconn) > snoop_preference(best))) {
+ best = ofconn;
}
+ }
+ if (best) {
+ rconn_add_monitor(best->rconn, vconn);
+ } else {
+ VLOG_INFO_RL(&rl, "no controller connection to snoop");
+ vconn_close(vconn);
}
- VLOG_INFO_RL(&rl, "no controller connection to monitor");
- vconn_close(vconn);
}
int
}
if (p->in_band) {
+ if (time_msec() >= p->next_in_band_update) {
+ update_in_band_remotes(p);
+ }
in_band_run(p->in_band);
}
ofconn_wait(ofconn);
}
if (p->in_band) {
+ poll_timer_wait(p->next_in_band_update - time_msec());
in_band_wait(p->in_band);
}
if (p->fail_open) {
struct ofp_port_status *ops;
struct ofpbuf *b;
- if (ofconn->role == NX_ROLE_SLAVE) {
+ if (!ofconn_receives_async_msgs(ofconn)) {
continue;
}
COVERAGE_INC(ofproto_ofconn_stuck);
}
}
+
+/* Returns true if 'ofconn' should receive asynchronous messages. */
+static bool
+ofconn_receives_async_msgs(const struct ofconn *ofconn)
+{
+ if (ofconn->type == OFCONN_CONTROLLER) {
+ /* Ordinary controllers always get asynchronous messages unless they
+ * have configured themselves as "slaves". */
+ return ofconn->role != NX_ROLE_SLAVE;
+ } else {
+ /* Transient connections don't get asynchronous messages unless they
+ * have explicitly asked for them by setting a nonzero miss send
+ * length. */
+ return ofconn->miss_send_len > 0;
+ }
+}
\f
/* Caller is responsible for initializing the 'cr' member of the returned
* rule. */
const struct ofp_action_output *oao)
{
union odp_action *a = odp_actions_add(actions, ODPAT_CONTROLLER);
- a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX;
+ a->controller.arg = ntohs(oao->max_len);
}
struct action_xlate_ctx {
static void
append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn,
- struct ofpbuf *msg)
+ struct ofpbuf **msgp)
{
struct netdev_stats stats;
struct ofp_port_stats *ops;
* netdev_get_stats() will log errors. */
netdev_get_stats(port->netdev, &stats);
- ops = append_stats_reply(sizeof *ops, ofconn, &msg);
+ ops = append_stats_reply(sizeof *ops, ofconn, msgp);
ops->port_no = htons(odp_port_to_ofp_port(port_no));
memset(ops->pad, 0, sizeof ops->pad);
ops->rx_packets = htonll(stats.rx_packets);
port = port_array_get(&p->ports,
ofp_port_to_odp_port(ntohs(psr->port_no)));
if (port) {
- append_port_stat(port, ntohs(psr->port_no), ofconn, msg);
+ append_port_stat(port, ntohs(psr->port_no), ofconn, &msg);
}
} else {
PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
- append_port_stat(port, port_no, ofconn, msg);
+ append_port_stat(port, port_no, ofconn, &msg);
}
}
uint32_t role;
if (ntohs(msg->header.length) != sizeof *nrr) {
- VLOG_WARN_RL(&rl, "received role request of length %zu (expected %zu)",
+ VLOG_WARN_RL(&rl, "received role request of length %u (expected %zu)",
ntohs(msg->header.length), sizeof *nrr);
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
}
struct nicira_header *nh;
if (ntohs(ovh->header.length) < sizeof(struct ofp_vendor_header)) {
- VLOG_WARN_RL(&rl, "received vendor message of length %zu "
+ VLOG_WARN_RL(&rl, "received vendor message of length %u "
"(expected at least %zu)",
ntohs(ovh->header.length), sizeof(struct ofp_vendor_header));
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR);
}
if (ntohs(ovh->header.length) < sizeof(struct nicira_header)) {
- VLOG_WARN_RL(&rl, "received Nicira vendor message of length %zu "
+ VLOG_WARN_RL(&rl, "received Nicira vendor message of length %u "
"(expected at least %zu)",
ntohs(ovh->header.length), sizeof(struct nicira_header));
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
prev = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
if (rule->send_flow_removed && rconn_is_connected(ofconn->rconn)
- && ofconn->role != NX_ROLE_SLAVE) {
+ && ofconn_receives_async_msgs(ofconn)) {
if (prev) {
queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
} else {
free(flows);
}
+/* pinsched callback for sending 'packet' on 'ofconn'. */
static void
do_send_packet_in(struct ofpbuf *packet, void *ofconn_)
{
struct ofconn *ofconn = ofconn_;
+
+ rconn_send_with_limit(ofconn->rconn, packet,
+ ofconn->packet_in_counter, 100);
+}
+
+/* Takes 'packet', which has been converted with do_convert_to_packet_in(), and
+ * finalizes its content for sending on 'ofconn', and passes it to 'ofconn''s
+ * packet scheduler for sending.
+ *
+ * 'max_len' specifies the maximum number of bytes of the packet to send on
+ * 'ofconn' (INT_MAX specifies no limit).
+ *
+ * If 'clone' is true, the caller retains ownership of 'packet'. Otherwise,
+ * ownership is transferred to this function. */
+static void
+schedule_packet_in(struct ofconn *ofconn, struct ofpbuf *packet, int max_len,
+ bool clone)
+{
struct ofproto *ofproto = ofconn->ofproto;
- struct odp_msg *msg = packet->data;
- struct ofpbuf payload;
- struct ofpbuf *opi;
+ struct ofp_packet_in *opi = packet->data;
+ uint16_t in_port = ofp_port_to_odp_port(ntohs(opi->in_port));
+ int send_len, trim_size;
uint32_t buffer_id;
- int send_len;
- /* Extract packet payload from 'msg'. */
- payload.data = msg + 1;
- payload.size = msg->length - sizeof *msg;
+ /* Get buffer. */
+ if (opi->reason == OFPR_ACTION) {
+ buffer_id = UINT32_MAX;
+ } else if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
+ buffer_id = pktbuf_get_null();
+ } else if (!ofconn->pktbuf) {
+ buffer_id = UINT32_MAX;
+ } else {
+ struct ofpbuf payload;
+ payload.data = opi->data;
+ payload.size = packet->size - offsetof(struct ofp_packet_in, data);
+ buffer_id = pktbuf_save(ofconn->pktbuf, &payload, in_port);
+ }
+
+ /* Figure out how much of the packet to send. */
+ send_len = ntohs(opi->total_len);
+ if (buffer_id != UINT32_MAX) {
+ send_len = MIN(send_len, ofconn->miss_send_len);
+ }
+ send_len = MIN(send_len, max_len);
+
+ /* Adjust packet length and clone if necessary. */
+ trim_size = offsetof(struct ofp_packet_in, data) + send_len;
+ if (clone) {
+ packet = ofpbuf_clone_data(packet->data, trim_size);
+ opi = packet->data;
+ } else {
+ packet->size = trim_size;
+ }
- /* Construct packet-in message. */
- send_len = INT_MAX;
+ /* Update packet headers. */
+ opi->buffer_id = htonl(buffer_id);
+ update_openflow_length(packet);
+
+ /* Hand over to packet scheduler. It might immediately call into
+ * do_send_packet_in() or it might buffer it for a while (until a later
+ * call to pinsched_run()). */
+ pinsched_send(ofconn->schedulers[opi->reason], in_port,
+ packet, do_send_packet_in, ofconn);
+}
+
+/* Replace struct odp_msg header in 'packet' by equivalent struct
+ * ofp_packet_in. The odp_msg must have sufficient headroom to do so (e.g. as
+ * returned by dpif_recv()).
+ *
+ * The conversion is not complete: the caller still needs to trim any unneeded
+ * payload off the end of the buffer, set the length in the OpenFlow header,
+ * and set buffer_id. Those require us to know the controller settings and so
+ * must be done on a per-controller basis.
+ *
+ * Returns the maximum number of bytes of the packet that should be sent to
+ * the controller (INT_MAX if no limit). */
+static int
+do_convert_to_packet_in(struct ofpbuf *packet)
+{
+ struct odp_msg *msg = packet->data;
+ struct ofp_packet_in *opi;
+ uint8_t reason;
+ uint16_t total_len;
+ uint16_t in_port;
+ int max_len;
+
+ /* Extract relevant header fields */
if (msg->type == _ODPL_ACTION_NR) {
- buffer_id = UINT32_MAX;
+ reason = OFPR_ACTION;
+ max_len = msg->arg;
} else {
- if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
- buffer_id = pktbuf_get_null();
- } else {
- buffer_id = pktbuf_save(ofconn->pktbuf, &payload, msg->port);
- }
- if (buffer_id != UINT32_MAX) {
- send_len = ofconn->miss_send_len;
- }
+ reason = OFPR_NO_MATCH;
+ max_len = INT_MAX;
}
- opi = make_packet_in(buffer_id, odp_port_to_ofp_port(msg->port),
- msg->type, &payload, send_len);
+ total_len = msg->length - sizeof *msg;
+ in_port = odp_port_to_ofp_port(msg->port);
- /* Send. */
- rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100);
+ /* Repurpose packet buffer by overwriting header. */
+ ofpbuf_pull(packet, sizeof(struct odp_msg));
+ opi = ofpbuf_push_zeros(packet, offsetof(struct ofp_packet_in, data));
+ opi->header.version = OFP_VERSION;
+ opi->header.type = OFPT_PACKET_IN;
+ opi->total_len = htons(total_len);
+ opi->in_port = htons(in_port);
+ opi->reason = reason;
- ofpbuf_delete(packet);
+ return max_len;
}
+/* Given 'packet' containing an odp_msg of type _ODPL_ACTION_NR or
+ * _ODPL_MISS_NR, sends an OFPT_PACKET_IN message to each OpenFlow controller
+ * as necessary according to their individual configurations.
+ *
+ * 'packet' must have sufficient headroom to convert it into a struct
+ * ofp_packet_in (e.g. as returned by dpif_recv()).
+ *
+ * Takes ownership of 'packet'. */
static void
send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet)
{
- struct odp_msg *msg = packet->data;
struct ofconn *ofconn, *prev;
+ int max_len;
- assert(msg->type == _ODPL_MISS_NR || msg->type == _ODPL_ACTION_NR);
+ max_len = do_convert_to_packet_in(packet);
prev = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
- if (ofconn->role != NX_ROLE_SLAVE) {
+ if (ofconn_receives_async_msgs(ofconn)) {
if (prev) {
- pinsched_send(prev->schedulers[msg->type], msg->port,
- ofpbuf_clone(packet), do_send_packet_in, prev);
+ schedule_packet_in(prev, packet, max_len, true);
}
prev = ofconn;
}
}
if (prev) {
- pinsched_send(prev->schedulers[msg->type], msg->port,
- packet, do_send_packet_in, prev);
+ schedule_packet_in(prev, packet, max_len, false);
} else {
ofpbuf_delete(packet);
}