flow_extract(&pkt, 0, in_port, &flow);
if (may_learn(sw, in_port) && sw->ml) {
- if (mac_learning_learn(sw->ml, flow.dl_src, 0, in_port)) {
+ if (mac_learning_learn(sw->ml, flow.dl_src, 0, in_port,
+ GRAT_ARP_LOCK_NONE)) {
VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on "
"port %"PRIu16, sw->datapath_id,
ETH_ADDR_ARGS(flow.dl_src), in_port);
}
if (sw->ml) {
- int learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0);
+ int learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0, NULL);
if (learned_port >= 0 && may_send(sw, learned_port)) {
out_port = learned_port;
}
* that now need revalidation.
*
* The 'vlan' parameter is used to maintain separate per-VLAN learning tables.
- * Specify 0 if this behavior is undesirable. */
+ * Specify 0 if this behavior is undesirable.
+ *
+ * 'lock_type' specifies whether the entry should be locked or existing locks
+ * are check. */
tag_type
mac_learning_learn(struct mac_learning *ml,
const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan,
- uint16_t src_port)
+ uint16_t src_port, enum grat_arp_lock_type lock_type)
{
struct mac_entry *e;
struct list *bucket;
e->port = -1;
e->vlan = vlan;
e->tag = make_unknown_mac_tag(ml, src_mac, vlan);
+ e->grat_arp_lock = TIME_MIN;
}
- /* Make the entry most-recently-used. */
- list_remove(&e->lru_node);
- list_push_back(&ml->lrus, &e->lru_node);
- e->expires = time_now() + MAC_ENTRY_IDLE_TIME;
-
- /* Did we learn something? */
- if (e->port != src_port) {
- tag_type old_tag = e->tag;
- e->port = src_port;
- e->tag = tag_create_random();
- COVERAGE_INC(mac_learning_learned);
- return old_tag;
+ if (lock_type != GRAT_ARP_LOCK_CHECK || time_now() >= e->grat_arp_lock) {
+ /* Make the entry most-recently-used. */
+ list_remove(&e->lru_node);
+ list_push_back(&ml->lrus, &e->lru_node);
+ e->expires = time_now() + MAC_ENTRY_IDLE_TIME;
+ if (lock_type == GRAT_ARP_LOCK_SET) {
+ e->grat_arp_lock = time_now() + MAC_GRAT_ARP_LOCK_TIME;
+ }
+
+ /* Did we learn something? */
+ if (e->port != src_port) {
+ tag_type old_tag = e->tag;
+ e->port = src_port;
+ e->tag = tag_create_random();
+ COVERAGE_INC(mac_learning_learned);
+ return old_tag;
+ }
}
+
return 0;
}
/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a
- * frame destined for 'dst' should be sent, -1 if unknown. */
+ * frame destined for 'dst' should be sent, -1 if unknown. 'is_grat_arp_locked'
+ * is an optional parameter that returns whether the entry is currently
+ * locked. */
int
mac_learning_lookup(const struct mac_learning *ml,
- const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan)
+ const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
+ bool *is_grat_arp_locked)
{
tag_type tag = 0;
- return mac_learning_lookup_tag(ml, dst, vlan, &tag);
+ return mac_learning_lookup_tag(ml, dst, vlan, &tag, is_grat_arp_locked);
}
/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a
*
* Adds to '*tag' (which the caller must have initialized) the tag that should
* be attached to any flow created based on the return value, if any, to allow
- * those flows to be revalidated when the MAC learning entry changes. */
+ * those flows to be revalidated when the MAC learning entry changes.
+ *
+ * 'is_grat_arp_locked' is an optional parameter that returns whether the entry
+ * is currently locked.*/
int
mac_learning_lookup_tag(const struct mac_learning *ml,
const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
- tag_type *tag)
+ tag_type *tag, bool *is_grat_arp_locked)
{
if (eth_addr_is_multicast(dst) || !is_learning_vlan(ml, vlan)) {
return -1;
dst, vlan);
if (e) {
*tag |= e->tag;
+
+ if (is_grat_arp_locked) {
+ *is_grat_arp_locked = time_now() < e->grat_arp_lock;
+ }
+
return e->port;
} else {
*tag |= make_unknown_mac_tag(ml, dst, vlan);
/* Time, in seconds, before expiring a mac_entry due to inactivity. */
#define MAC_ENTRY_IDLE_TIME 60
+/* Time, in seconds, to lock an entry updated by a gratuitous ARP to avoid
+ * relearning based on a reflection from a bond slave. */
+#define MAC_GRAT_ARP_LOCK_TIME 5
+
+enum grat_arp_lock_type {
+ GRAT_ARP_LOCK_NONE,
+ GRAT_ARP_LOCK_SET,
+ GRAT_ARP_LOCK_CHECK
+};
+
/* A MAC learning table entry. */
struct mac_entry {
struct list hash_node; /* Element in a mac_learning 'table' list. */
struct list lru_node; /* Element in 'lrus' or 'free' list. */
time_t expires; /* Expiration time. */
+ time_t grat_arp_lock; /* Gratuitous ARP lock expiration time. */
uint8_t mac[ETH_ADDR_LEN]; /* Known MAC address. */
uint16_t vlan; /* VLAN tag. */
int port; /* Port on which MAC was most recently seen. */
unsigned long *bitmap);
tag_type mac_learning_learn(struct mac_learning *,
const uint8_t src[ETH_ADDR_LEN], uint16_t vlan,
- uint16_t src_port);
+ uint16_t src_port, enum grat_arp_lock_type
+ lock_type);
int mac_learning_lookup(const struct mac_learning *,
- const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan);
+ const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
+ bool *is_grat_arp_locked);
int mac_learning_lookup_tag(const struct mac_learning *,
const uint8_t dst[ETH_ADDR_LEN],
- uint16_t vlan, tag_type *tag);
+ uint16_t vlan, tag_type *tag,
+ bool *is_grat_arp_locked);
void mac_learning_flush(struct mac_learning *);
void mac_learning_run(struct mac_learning *, struct tag_set *);
void mac_learning_wait(struct mac_learning *);
/* Learn source MAC (but don't try to learn from revalidation). */
if (packet != NULL) {
tag_type rev_tag = mac_learning_learn(ofproto->ml, flow->dl_src,
- 0, flow->in_port);
+ 0, flow->in_port,
+ GRAT_ARP_LOCK_NONE);
if (rev_tag) {
/* The log messages here could actually be useful in debugging,
* so keep the rate limit relatively high. */
}
/* Determine output port. */
- out_port = mac_learning_lookup_tag(ofproto->ml, flow->dl_dst, 0, tags);
+ out_port = mac_learning_lookup_tag(ofproto->ml, flow->dl_dst, 0, tags,
+ NULL);
if (out_port < 0) {
add_output_group_action(actions, DP_GROUP_FLOOD, nf_output_iface);
} else if (out_port != flow->in_port) {
return vlan;
}
+/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
+ * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
+ * indicate this; newer upstream kernels use gratuitous ARP requests. */
+static bool
+is_gratuitous_arp(const flow_t *flow)
+{
+ return (flow->dl_type == htons(ETH_TYPE_ARP)
+ && eth_addr_is_broadcast(flow->dl_dst)
+ && (flow->nw_proto == ARP_OP_REPLY
+ || (flow->nw_proto == ARP_OP_REQUEST
+ && flow->nw_src == flow->nw_dst)));
+}
+
static void
update_learning_table(struct bridge *br, const flow_t *flow, int vlan,
struct port *in_port)
{
- tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
- vlan, in_port->port_idx);
+ enum grat_arp_lock_type lock_type;
+ tag_type rev_tag;
+
+ /* We don't want to learn from gratuitous ARP packets that are reflected
+ * back over bond slaves so we lock the learning table. */
+ lock_type = !is_gratuitous_arp(flow) ? GRAT_ARP_LOCK_NONE :
+ (in_port->n_ifaces == 1) ? GRAT_ARP_LOCK_SET :
+ GRAT_ARP_LOCK_CHECK;
+
+ rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx,
+ lock_type);
if (rev_tag) {
/* The log messages here could actually be useful in debugging,
* so keep the rate limit relatively high. */
}
}
-/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
- * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
- * indicate this; newer upstream kernels use gratuitous ARP requests. */
-static bool
-is_gratuitous_arp(const flow_t *flow)
-{
- return (flow->dl_type == htons(ETH_TYPE_ARP)
- && eth_addr_is_broadcast(flow->dl_dst)
- && (flow->nw_proto == ARP_OP_REPLY
- || (flow->nw_proto == ARP_OP_REQUEST
- && flow->nw_src == flow->nw_dst)));
-}
-
/* Determines whether packets in 'flow' within 'br' should be forwarded or
* dropped. Returns true if they may be forwarded, false if they should be
* dropped.
/* Packets received on bonds need special attention to avoid duplicates. */
if (in_port->n_ifaces > 1) {
int src_idx;
+ bool is_grat_arp_locked;
if (eth_addr_is_multicast(flow->dl_dst)) {
*tags |= in_port->active_iface_tag;
/* Drop all packets for which we have learned a different input
* port, because we probably sent the packet on one slave and got
* it back on the other. Gratuitous ARP packets are an exception
- * to this rule: the host has moved to another switch. */
- src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+ * to this rule: the host has moved to another switch. The exception
+ * to the exception is if we locked the learning table to avoid
+ * reflections on bond slaves. If this is the case, just drop the
+ * packet now. */
+ src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan,
+ &is_grat_arp_locked);
if (src_idx != -1 && src_idx != in_port->port_idx &&
- !is_gratuitous_arp(flow)) {
+ (!is_gratuitous_arp(flow) || is_grat_arp_locked)) {
return false;
}
}
}
/* Determine output port. */
- out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags);
+ out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags,
+ NULL);
if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
out_port = br->ports[out_port_idx];
} else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) {