/*
* Copyright (c) 2009, 2010 Nicira Networks.
+ * Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "ofproto.h"
#include <errno.h>
#include <inttypes.h>
+#include <sys/socket.h>
#include <net/if.h>
#include <netinet/in.h>
#include <stdbool.h>
static void ofconn_destroy(struct ofconn *);
static void ofconn_run(struct ofconn *, struct ofproto *);
static void ofconn_wait(struct ofconn *);
+static bool ofconn_receives_async_msgs(const struct ofconn *);
+static char *ofconn_make_name(const struct ofproto *, const char *target);
+
static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn,
struct rconn_packet_counter *counter);
if (discovery) {
ofconn->discovery = discovery;
} else {
- rconn_connect(ofconn->rconn, c->target);
+ char *name = ofconn_make_name(ofproto, c->target);
+ rconn_connect(ofconn->rconn, c->target, name);
+ free(name);
}
hmap_insert(&ofproto->controllers, &ofconn->hmap_node,
hash_string(c->target, 0));
static const char *
ofconn_get_target(const struct ofconn *ofconn)
{
- return ofconn->discovery ? "discover" : rconn_get_name(ofconn->rconn);
+ return ofconn->discovery ? "discover" : rconn_get_target(ofconn->rconn);
}
static struct ofconn *
HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) {
struct sockaddr_in *sin = &addrs[n_addrs];
+ if (ofconn->band == OFPROTO_OUT_OF_BAND) {
+ continue;
+ }
+
sin->sin_addr.s_addr = rconn_get_remote_ip(ofconn->rconn);
if (sin->sin_addr.s_addr) {
sin->sin_port = rconn_get_remote_port(ofconn->rconn);
in_band_create(ofproto, ofproto->dpif, ofproto->switch_status,
&ofproto->in_band);
}
- in_band_set_remotes(ofproto->in_band, addrs, n_addrs);
+ if (ofproto->in_band) {
+ in_band_set_remotes(ofproto->in_band, addrs, n_addrs);
+ }
ofproto->next_in_band_update = time_msec() + 1000;
} else {
in_band_destroy(ofproto->in_band);
}
}
+/* Returns a "preference level" for snooping 'ofconn'. A higher return value
+ * means that 'ofconn' is more interesting for monitoring than a lower return
+ * value. */
+static int
+snoop_preference(const struct ofconn *ofconn)
+{
+ switch (ofconn->role) {
+ case NX_ROLE_MASTER:
+ return 3;
+ case NX_ROLE_OTHER:
+ return 2;
+ case NX_ROLE_SLAVE:
+ return 1;
+ default:
+ /* Shouldn't happen. */
+ return 0;
+ }
+}
+
/* One of ofproto's "snoop" pvconns has accepted a new connection on 'vconn'.
* Connects this vconn to a controller. */
static void
add_snooper(struct ofproto *ofproto, struct vconn *vconn)
{
- struct ofconn *ofconn;
+ struct ofconn *ofconn, *best;
- /* Arbitrarily pick the first controller in the list for monitoring. We
- * could do something smarter or more flexible later, if it ever proves
- * useful. */
+ /* Pick a controller for monitoring. */
+ best = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
- if (ofconn->type == OFCONN_CONTROLLER) {
- rconn_add_monitor(ofconn->rconn, vconn);
- return;
+ if (ofconn->type == OFCONN_CONTROLLER
+ && (!best || snoop_preference(ofconn) > snoop_preference(best))) {
+ best = ofconn;
}
+ }
+ if (best) {
+ rconn_add_monitor(best->rconn, vconn);
+ } else {
+ VLOG_INFO_RL(&rl, "no controller connection to snoop");
+ vconn_close(vconn);
}
- VLOG_INFO_RL(&rl, "no controller connection to monitor");
- vconn_close(vconn);
}
int
retval = pvconn_accept(p->listeners[i], OFP_VERSION, &vconn);
if (!retval) {
- ofconn_create(p, rconn_new_from_vconn("passive", vconn),
- OFCONN_TRANSIENT);
+ struct rconn *rconn;
+ char *name;
+
+ rconn = rconn_create(60, 0);
+ name = ofconn_make_name(p, vconn_get_name(vconn));
+ rconn_connect_unreliably(rconn, vconn, name);
+ free(name);
+
+ ofconn_create(p, rconn, OFCONN_TRANSIENT);
} else if (retval != EAGAIN) {
VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval));
}
ofconn_wait(ofconn);
}
if (p->in_band) {
- poll_timer_wait(p->next_in_band_update - time_msec());
+ poll_timer_wait_until(p->next_in_band_update);
in_band_wait(p->in_band);
}
if (p->fail_open) {
VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
poll_immediate_wake();
} else if (p->next_expiration != LLONG_MAX) {
- poll_timer_wait(p->next_expiration - time_msec());
+ poll_timer_wait_until(p->next_expiration);
}
for (i = 0; i < p->n_listeners; i++) {
pvconn_wait(p->listeners[i]);
memset(&netdev_options, 0, sizeof netdev_options);
netdev_options.name = odp_port->devname;
netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
- netdev_options.may_open = true;
error = netdev_open(&netdev_options, &netdev);
if (error) {
struct ofp_port_status *ops;
struct ofpbuf *b;
- if (ofconn->role == NX_ROLE_SLAVE) {
+ if (!ofconn_receives_async_msgs(ofconn)) {
continue;
}
}
if (discovery_run(ofconn->discovery, &controller_name)) {
if (controller_name) {
- rconn_connect(ofconn->rconn, controller_name);
+ char *ofconn_name = ofconn_make_name(p, controller_name);
+ rconn_connect(ofconn->rconn, controller_name, ofconn_name);
+ free(ofconn_name);
} else {
rconn_disconnect(ofconn->rconn);
}
COVERAGE_INC(ofproto_ofconn_stuck);
}
}
+
+/* Returns true if 'ofconn' should receive asynchronous messages. */
+static bool
+ofconn_receives_async_msgs(const struct ofconn *ofconn)
+{
+ if (ofconn->type == OFCONN_CONTROLLER) {
+ /* Ordinary controllers always get asynchronous messages unless they
+ * have configured themselves as "slaves". */
+ return ofconn->role != NX_ROLE_SLAVE;
+ } else {
+ /* Transient connections don't get asynchronous messages unless they
+ * have explicitly asked for them by setting a nonzero miss send
+ * length. */
+ return ofconn->miss_send_len > 0;
+ }
+}
+
+/* Returns a human-readable name for an OpenFlow connection between 'ofproto'
+ * and 'target', suitable for use in log messages for identifying the
+ * connection.
+ *
+ * The name is dynamically allocated. The caller should free it (with free())
+ * when it is no longer needed. */
+static char *
+ofconn_make_name(const struct ofproto *ofproto, const char *target)
+{
+ return xasprintf("%s<->%s", dpif_base_name(ofproto->dpif), target);
+}
\f
/* Caller is responsible for initializing the 'cr' member of the returned
* rule. */
const struct ofp_action_output *oao)
{
union odp_action *a = odp_actions_add(actions, ODPAT_CONTROLLER);
- a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX;
+ a->controller.arg = ntohs(oao->max_len);
}
struct action_xlate_ctx {
static void
append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn,
- struct ofpbuf *msg)
+ struct ofpbuf **msgp)
{
struct netdev_stats stats;
struct ofp_port_stats *ops;
* netdev_get_stats() will log errors. */
netdev_get_stats(port->netdev, &stats);
- ops = append_stats_reply(sizeof *ops, ofconn, &msg);
+ ops = append_stats_reply(sizeof *ops, ofconn, msgp);
ops->port_no = htons(odp_port_to_ofp_port(port_no));
memset(ops->pad, 0, sizeof ops->pad);
ops->rx_packets = htonll(stats.rx_packets);
port = port_array_get(&p->ports,
ofp_port_to_odp_port(ntohs(psr->port_no)));
if (port) {
- append_port_stat(port, ntohs(psr->port_no), ofconn, msg);
+ append_port_stat(port, ntohs(psr->port_no), ofconn, &msg);
}
} else {
PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
- append_port_stat(port, port_no, ofconn, msg);
+ append_port_stat(port, port_no, ofconn, &msg);
}
}
uint32_t role;
if (ntohs(msg->header.length) != sizeof *nrr) {
- VLOG_WARN_RL(&rl, "received role request of length %zu (expected %zu)",
+ VLOG_WARN_RL(&rl, "received role request of length %u (expected %zu)",
ntohs(msg->header.length), sizeof *nrr);
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
}
struct nicira_header *nh;
if (ntohs(ovh->header.length) < sizeof(struct ofp_vendor_header)) {
- VLOG_WARN_RL(&rl, "received vendor message of length %zu "
+ VLOG_WARN_RL(&rl, "received vendor message of length %u "
"(expected at least %zu)",
ntohs(ovh->header.length), sizeof(struct ofp_vendor_header));
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_VENDOR);
}
if (ntohs(ovh->header.length) < sizeof(struct nicira_header)) {
- VLOG_WARN_RL(&rl, "received Nicira vendor message of length %zu "
+ VLOG_WARN_RL(&rl, "received Nicira vendor message of length %u "
"(expected at least %zu)",
ntohs(ovh->header.length), sizeof(struct nicira_header));
return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
prev = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
if (rule->send_flow_removed && rconn_is_connected(ofconn->rconn)
- && ofconn->role != NX_ROLE_SLAVE) {
+ && ofconn_receives_async_msgs(ofconn)) {
if (prev) {
queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
} else {
free(flows);
}
+/* pinsched callback for sending 'packet' on 'ofconn'. */
static void
do_send_packet_in(struct ofpbuf *packet, void *ofconn_)
{
struct ofconn *ofconn = ofconn_;
+
+ rconn_send_with_limit(ofconn->rconn, packet,
+ ofconn->packet_in_counter, 100);
+}
+
+/* Takes 'packet', which has been converted with do_convert_to_packet_in(), and
+ * finalizes its content for sending on 'ofconn', and passes it to 'ofconn''s
+ * packet scheduler for sending.
+ *
+ * 'max_len' specifies the maximum number of bytes of the packet to send on
+ * 'ofconn' (INT_MAX specifies no limit).
+ *
+ * If 'clone' is true, the caller retains ownership of 'packet'. Otherwise,
+ * ownership is transferred to this function. */
+static void
+schedule_packet_in(struct ofconn *ofconn, struct ofpbuf *packet, int max_len,
+ bool clone)
+{
struct ofproto *ofproto = ofconn->ofproto;
- struct odp_msg *msg = packet->data;
- struct ofpbuf payload;
- struct ofpbuf *opi;
+ struct ofp_packet_in *opi = packet->data;
+ uint16_t in_port = ofp_port_to_odp_port(ntohs(opi->in_port));
+ int send_len, trim_size;
uint32_t buffer_id;
- int send_len;
- /* Extract packet payload from 'msg'. */
- payload.data = msg + 1;
- payload.size = msg->length - sizeof *msg;
+ /* Get buffer. */
+ if (opi->reason == OFPR_ACTION) {
+ buffer_id = UINT32_MAX;
+ } else if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
+ buffer_id = pktbuf_get_null();
+ } else if (!ofconn->pktbuf) {
+ buffer_id = UINT32_MAX;
+ } else {
+ struct ofpbuf payload;
+ payload.data = opi->data;
+ payload.size = packet->size - offsetof(struct ofp_packet_in, data);
+ buffer_id = pktbuf_save(ofconn->pktbuf, &payload, in_port);
+ }
+
+ /* Figure out how much of the packet to send. */
+ send_len = ntohs(opi->total_len);
+ if (buffer_id != UINT32_MAX) {
+ send_len = MIN(send_len, ofconn->miss_send_len);
+ }
+ send_len = MIN(send_len, max_len);
+
+ /* Adjust packet length and clone if necessary. */
+ trim_size = offsetof(struct ofp_packet_in, data) + send_len;
+ if (clone) {
+ packet = ofpbuf_clone_data(packet->data, trim_size);
+ opi = packet->data;
+ } else {
+ packet->size = trim_size;
+ }
- /* Construct packet-in message. */
- send_len = INT_MAX;
+ /* Update packet headers. */
+ opi->buffer_id = htonl(buffer_id);
+ update_openflow_length(packet);
+
+ /* Hand over to packet scheduler. It might immediately call into
+ * do_send_packet_in() or it might buffer it for a while (until a later
+ * call to pinsched_run()). */
+ pinsched_send(ofconn->schedulers[opi->reason], in_port,
+ packet, do_send_packet_in, ofconn);
+}
+
+/* Replace struct odp_msg header in 'packet' by equivalent struct
+ * ofp_packet_in. The odp_msg must have sufficient headroom to do so (e.g. as
+ * returned by dpif_recv()).
+ *
+ * The conversion is not complete: the caller still needs to trim any unneeded
+ * payload off the end of the buffer, set the length in the OpenFlow header,
+ * and set buffer_id. Those require us to know the controller settings and so
+ * must be done on a per-controller basis.
+ *
+ * Returns the maximum number of bytes of the packet that should be sent to
+ * the controller (INT_MAX if no limit). */
+static int
+do_convert_to_packet_in(struct ofpbuf *packet)
+{
+ struct odp_msg *msg = packet->data;
+ struct ofp_packet_in *opi;
+ uint8_t reason;
+ uint16_t total_len;
+ uint16_t in_port;
+ int max_len;
+
+ /* Extract relevant header fields */
if (msg->type == _ODPL_ACTION_NR) {
- buffer_id = UINT32_MAX;
+ reason = OFPR_ACTION;
+ max_len = msg->arg;
} else {
- if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
- buffer_id = pktbuf_get_null();
- } else {
- buffer_id = pktbuf_save(ofconn->pktbuf, &payload, msg->port);
- }
- if (buffer_id != UINT32_MAX) {
- send_len = ofconn->miss_send_len;
- }
+ reason = OFPR_NO_MATCH;
+ max_len = INT_MAX;
}
- opi = make_packet_in(buffer_id, odp_port_to_ofp_port(msg->port),
- msg->type, &payload, send_len);
+ total_len = msg->length - sizeof *msg;
+ in_port = odp_port_to_ofp_port(msg->port);
- /* Send. */
- rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100);
+ /* Repurpose packet buffer by overwriting header. */
+ ofpbuf_pull(packet, sizeof(struct odp_msg));
+ opi = ofpbuf_push_zeros(packet, offsetof(struct ofp_packet_in, data));
+ opi->header.version = OFP_VERSION;
+ opi->header.type = OFPT_PACKET_IN;
+ opi->total_len = htons(total_len);
+ opi->in_port = htons(in_port);
+ opi->reason = reason;
- ofpbuf_delete(packet);
+ return max_len;
}
+/* Given 'packet' containing an odp_msg of type _ODPL_ACTION_NR or
+ * _ODPL_MISS_NR, sends an OFPT_PACKET_IN message to each OpenFlow controller
+ * as necessary according to their individual configurations.
+ *
+ * 'packet' must have sufficient headroom to convert it into a struct
+ * ofp_packet_in (e.g. as returned by dpif_recv()).
+ *
+ * Takes ownership of 'packet'. */
static void
send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet)
{
- struct odp_msg *msg = packet->data;
struct ofconn *ofconn, *prev;
+ int max_len;
- assert(msg->type == _ODPL_MISS_NR || msg->type == _ODPL_ACTION_NR);
+ max_len = do_convert_to_packet_in(packet);
prev = NULL;
LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
- if (ofconn->role != NX_ROLE_SLAVE) {
+ if (ofconn_receives_async_msgs(ofconn)) {
if (prev) {
- pinsched_send(prev->schedulers[msg->type], msg->port,
- ofpbuf_clone(packet), do_send_packet_in, prev);
+ schedule_packet_in(prev, packet, max_len, true);
}
prev = ofconn;
}
}
if (prev) {
- pinsched_send(prev->schedulers[msg->type], msg->port,
- packet, do_send_packet_in, prev);
+ schedule_packet_in(prev, packet, max_len, false);
} else {
ofpbuf_delete(packet);
}
/* Learn source MAC (but don't try to learn from revalidation). */
if (packet != NULL) {
tag_type rev_tag = mac_learning_learn(ofproto->ml, flow->dl_src,
- 0, flow->in_port);
+ 0, flow->in_port,
+ GRAT_ARP_LOCK_NONE);
if (rev_tag) {
/* The log messages here could actually be useful in debugging,
* so keep the rate limit relatively high. */
}
/* Determine output port. */
- out_port = mac_learning_lookup_tag(ofproto->ml, flow->dl_dst, 0, tags);
+ out_port = mac_learning_lookup_tag(ofproto->ml, flow->dl_dst, 0, tags,
+ NULL);
if (out_port < 0) {
add_output_group_action(actions, DP_GROUP_FLOOD, nf_output_iface);
} else if (out_port != flow->in_port) {