#include "dirs.h"
#include "dpif.h"
#include "flow.h"
+#include "hash.h"
#include "list.h"
#include "mac-learning.h"
#include "netdev.h"
int dp_ifidx; /* Index within kernel datapath. */
};
+#define BOND_MASK 0xff
+struct bond_entry {
+ int iface_idx; /* Index of assigned iface, or -1 if none. */
+};
+
+#define FLOOD_PORT ((struct port *) 1) /* The 'flood' output port. */
struct port {
struct bridge *bridge;
size_t port_idx;
* A bridge port for bonding has at least 2 interfaces. */
struct iface **ifaces;
size_t n_ifaces, allocated_ifaces;
+
+ /* Bonding info. */
+ struct bond_entry *bond_hash; /* An array of (BOND_MASK + 1) elements. */
+ int active_iface;
};
#define DP_MAX_PORTS 255
uint16_t dp_ifidx;
};
+static struct iface *
+choose_output_iface(const struct port *port, const struct flow *flow)
+{
+ assert(port->n_ifaces);
+ if (port->n_ifaces == 1) {
+ return port->ifaces[0];
+ } else {
+ size_t h = hash_fnv(flow->dl_src, sizeof flow->dl_src, HASH_FNV_BASIS);
+ struct bond_entry *e = &port->bond_hash[h & BOND_MASK];
+ if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces) {
+ /* XXX select interface properly */
+ static int count = 0;
+ e->iface_idx = count++ % port->n_ifaces;
+ }
+ return port->ifaces[e->iface_idx];
+ }
+}
+
static void
set_output(struct output *p, const struct flow *flow,
const struct port *in_port, const struct port *out_port)
p->vlan = (out_port->vlan ? OFP_VLAN_NONE
: in_port->vlan ? in_port->vlan
: ntohs(flow->dl_vlan));
- p->dp_ifidx = out_port->ifaces[0]->dp_ifidx;
+ p->dp_ifidx = choose_output_iface(out_port, flow)->dp_ifidx;
}
static void *
}
n_outs = 0;
- if (out_port) {
- /* Unicast. */
- set_output(&outs[n_outs++], flow, in_port, out_port);
- } else {
+ if (out_port == FLOOD_PORT) {
/* Flood. */
size_t i;
set_output(&outs[n_outs++], flow, in_port, op);
}
}
+ } else if (out_port) {
+ /* Unicast. */
+ set_output(&outs[n_outs++], flow, in_port, out_port);
}
actions_ofs = buf->size;
{
struct ofpbuf *fbuf = NULL;
struct ofpbuf *pbuf = NULL;
+
void *actions = NULL;
- size_t actions_len = 0;
+ size_t actions_len = sizeof(struct ofp_action_header) * 4; /* Estimated. */
if (setup_flow) {
- fbuf = make_add_flow(flow, buffer_id, br->flow_idle_time,
- sizeof(struct ofp_action_header) * 4);
+ fbuf = make_add_flow(flow, buffer_id, br->flow_idle_time, actions_len);
put_actions(br, flow, vlan, in_port, out_port, fbuf,
&actions, &actions_len);
update_openflow_length(fbuf);
opo = put_openflow(sizeof *opo, OFPT_PACKET_OUT, pbuf);
opo->buffer_id = htonl(buffer_id);
opo->in_port = htons(in_ifidx);
- opo->actions_len = htons(actions_len);
put_actions(br, flow, vlan, in_port, out_port, pbuf,
&actions, &actions_len);
+ opo = pbuf->data;
+ opo->actions_len = htons(actions_len);
if (buffer_id == UINT32_MAX) {
ofpbuf_put(pbuf, pkt_data, pkt_len);
}
}
}
+static bool
+is_bcast_arp_reply(const struct flow *flow, const struct ofpbuf *pkt)
+{
+ return (flow->dl_type == htons(ETH_TYPE_ARP)
+ && eth_addr_is_broadcast(flow->dl_dst)
+ && pkt->size >= sizeof(struct arp_eth_header)
+ && ((struct arp_eth_header *) pkt->data)->ar_op == ARP_OP_REQUEST);
+}
+
static void
process_packet_in(struct bridge *br, void *opi_)
{
struct ofp_packet_in *opi = opi_;
uint16_t in_ifidx = ntohs(opi->in_port);
- uint16_t out_ifidx;
struct ofpbuf pkt;
struct flow flow;
- struct iface *ifa;
- struct port *in_port, *out_port;
+ struct iface *in_iface;
+ struct port *in_port;
+ struct port *out_port = NULL; /* By default, drop the packet/flow. */
int vlan;
+ /* Validate Openflow message. */
if (check_ofp_message_array(&opi->header, OFPT_PACKET_IN,
offsetof(struct ofp_packet_in, data),
1, &pkt.size)) {
return;
}
+ /* Extract flow data from 'opi' into 'flow'. */
+ pkt.data = opi->data;
+ flow_extract(&pkt, in_ifidx, &flow);
+
/* Find the interface and port structure for the received packet. */
- if (in_ifidx < 0 || in_ifidx >= ARRAY_SIZE(br->ifaces)
+ if (in_ifidx < 0
+ || in_ifidx >= ARRAY_SIZE(br->ifaces)
|| !br->ifaces[in_ifidx]) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
"interface %"PRIu16, br->name, in_ifidx);
- goto drop;
+ queue_tx(br, make_add_flow(&flow, ntohl(opi->buffer_id),
+ br->flow_idle_time, 0));
+ return;
}
- ifa = br->ifaces[in_ifidx];
- in_port = ifa->port;
-
- /* Extract flow data from 'opi' into 'flow'. */
- pkt.data = opi->data;
- flow_extract(&pkt, in_ifidx, &flow);
+ in_iface = br->ifaces[in_ifidx];
+ in_port = in_iface->port;
/* Figure out what VLAN this packet belongs to.
*
"VLAN %"PRIu16,
br->name, ntohs(flow.dl_vlan),
in_port->name, in_port->vlan);
- goto drop;
+ goto done;
}
vlan = in_port->vlan;
}
+ /* Drop multicast and broadcast packets on inactive bonded interfaces, to
+ * avoid receiving duplicates. */
+ if (in_port->n_ifaces > 0
+ && in_port->active_iface != in_iface->port_ifidx
+ && eth_addr_is_multicast(flow.dl_dst)) {
+ goto done;
+ }
+
/* MAC learning. */
- out_port = NULL;
+ out_port = FLOOD_PORT;
if (br->ml) {
uint16_t out_port_idx;
+ bool may_learn;
+
/* XXX flush learning table entries when port indexes change due to
* reconfiguration */
- if (mac_learning_learn(br->ml, flow.dl_src, vlan, in_port->port_idx)) {
+
+ /* If the packet arrived on a bonded port, don't learn from it unless
+ * we haven't learned any port at all for that address (because we
+ * probably sent the packet on one bonded interface and got it back on
+ * the other). */
+ if (in_port->n_ifaces > 1) {
+ uint16_t src_idx = mac_learning_lookup(br->ml, flow.dl_src, vlan);
+ may_learn = src_idx == OFPP_FLOOD || src_idx == in_port->port_idx;
+
+ /* Broadcast ARP replies are an exception to this rule: the host
+ * has moved to another switch. */
+ if (!may_learn && is_bcast_arp_reply(&flow, &pkt)) {
+ /* OpenFlow can't tell ARP requests from replies so we can't
+ * set up a flow. Fortunately these packets should be rare. */
+ may_learn = true;
+ }
+ } else {
+ may_learn = true;
+ }
+
+ /* Learn source MAC. */
+ if (may_learn &&
+ mac_learning_learn(br->ml, flow.dl_src, vlan, in_port->port_idx)) {
/* The log messages here could actually be useful in debugging, so
* keep the rate limit relatively high. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
br->name, ETH_ADDR_ARGS(flow.dl_src),
in_port->name, vlan);
}
+
+ /* Determine output port. */
out_port_idx = mac_learning_lookup(br->ml, flow.dl_dst, vlan);
if (out_port_idx < br->n_ports) {
out_port = br->ports[out_port_idx];
}
}
- /* Send it out. */
- out_ifidx = out_port ? out_port->ifaces[0]->dp_ifidx : OFPP_FLOOD;
- if (in_port != out_port) {
- /* Add a new flow. */
- send_packets(br, &flow, ntohl(opi->buffer_id), vlan,
- in_ifidx, pkt.data, pkt.size, in_port, out_port,
- br->flow_idle_time >= 0);
- } else {
- /* Don't send out packets on their input ports. */
- goto drop;
+ /* Don't send packets out their input ports. */
+ if (in_port == out_port) {
+ out_port = NULL;
}
- return;
-drop:
- if (br->flow_idle_time >= 0) {
- /* Set up a flow to drop packets. */
- queue_tx(br, make_add_flow(&flow, ntohl(opi->buffer_id),
- br->flow_idle_time, 0));
- } else {
- /* Just drop the packet, since we don't set up flows at all.
- * XXX we should send a packet_out with no actions if buffer_id !=
- * UINT32_MAX, to avoid clogging the kernel buffers. */
- }
+ /*
+ * Add a new flow.
+ *
+ * We send out only a single packet, instead of setting up a flow, if:
+ *
+ * - Flows are disabled entirely; or
+ *
+ * - The packet is an ARP directed to broadcast that arrived on a bonded
+ * interface. In such a situation ARP request and replies must be
+ * handled differently, but OpenFlow unfortunately can't distinguish
+ * them.
+ */
+done:
+ send_packets(br, &flow, ntohl(opi->buffer_id), vlan,
+ in_ifidx, pkt.data, pkt.size, in_port, out_port,
+ (br->flow_idle_time >= 0
+ && (in_port->n_ifaces < 2
+ || flow.dl_type != htons(ETH_TYPE_ARP)
+ || !eth_addr_is_broadcast(flow.dl_dst))));
}
static void
free(port);
}
}
+
+static void
+port_update_bonding(struct port *port)
+{
+ if (port->n_ifaces < 2) {
+ /* Not a bonded port. */
+ free(port->bond_hash);
+ port->bond_hash = NULL;
+ } else {
+ if (!port->bond_hash) {
+ size_t i;
+
+ port->bond_hash = xcalloc(BOND_MASK + 1, sizeof *port->bond_hash);
+ for (i = 0; i <= BOND_MASK; i++) {
+ struct bond_entry *e = &port->bond_hash[i];
+ e->iface_idx = -1;
+ }
+ port->active_iface = 0;
+ }
+ }
+}
\f
/* Interface functions. */
port->ifaces[port->n_ifaces++] = iface;
VLOG_DBG("attached network device %s to port %s", iface->name, port->name);
+
+ port_update_bonding(port);
}
static void
port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces];
free(iface->name);
free(iface);
+
+ port_update_bonding(port);
}
}