From: Ben Pfaff Date: Fri, 16 Dec 2011 18:02:51 +0000 (-0800) Subject: ofproto-dpif: Flush MACs for deleted ports from every bridge. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b44a10b74a1147029f0bdf7d14cd059ba2dfb454;p=openvswitch ofproto-dpif: Flush MACs for deleted ports from every bridge. Consider this scenario: two hypervisors HV-1 and HV-2, connected to a common physical network over SLB bonds. Two virtual machines VM-1 and VM-2 are both running on HV-1. Patch ports are in use, so that each VM is not connected to a bridge with a physical Ethernet port but is actually one virtual "hop" away across a patch port to a second OVS bridge. VM-2 is running a "ping" process directed at VM-1. Now migrate VM-1 to HV-2. Suppose that VM-1 fails to send out a gratuitous ARP following migration, or that the gratuitous ARPs are lost, e.g. because they are sent before the OpenFlow controller manages to populate the flow table with rules to allow the VM's traffic Now we are in a situation where HV-1 has learned that VM-1 is local and HV-2 has learned that VM-1 is on its bond; both are wrong. One would expect the problem to resolve itself as soon the VM-1 sends out its first packet. However, SLB bonds (for important reasons documented in vswitchd/INTERNALS) are very reluctant to learn that a currently local MAC is actually on the bond: the only ways to learn that the MAC is on the bond are to receive a gratuitous ARP (which we won't, since they were dropped) or for the MAC learning entry to expire after 60 seconds. This means that VM-1 can send out as much ordinary traffic as it wants (even ARP requests and other broadcasts) but HV-1 will drop all of it at the physical Ethernet since it believes that VM-1 is local. (In an ordinary setup with a single bridge, HV-1 would have unlearned the address for VM-1 when VM-1's port was deleted, but that didn't happen because HV-1 only learned that VM-1 was on the patch port that leads to the integration bridge. The patch port didn't get deleted.) HV-2 does quickly learn that VM-1 is now local. SLB bonds are only reluctant to learn that something they think is local is actually on the bond, not the reverse. This commit attempts to work around the problem by flushing the MAC associated with a port from *every* bridge when a port is deleted. This commit demonstrates yet another good reason not to use SLB bonds. Build and unit tested only. Bug #7978. Bug #7687. Signed-off-by: Ben Pfaff --- diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index e68bec35..020d4aca 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -477,6 +477,7 @@ struct table_dpif { }; struct ofproto_dpif { + struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ struct ofproto up; struct dpif *dpif; int max_ports; @@ -524,6 +525,9 @@ struct ofproto_dpif { * for debugging the asynchronous flow_mod implementation.) */ static bool clogged; +/* All existing ofproto_dpif instances, indexed by ->up.name. */ +static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs); + static void ofproto_dpif_unixctl_init(void); static struct ofproto_dpif * @@ -669,6 +673,9 @@ construct(struct ofproto *ofproto_, int *n_tablesp) hmap_init(&ofproto->vlandev_map); hmap_init(&ofproto->realdev_vid_map); + hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node, + hash_string(ofproto->up.name, 0)); + *n_tablesp = N_TABLES; memset(&ofproto->stats, 0, sizeof ofproto->stats); return 0; @@ -694,6 +701,7 @@ destruct(struct ofproto *ofproto_) struct classifier *table; int i; + hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node); complete_operations(ofproto); OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { @@ -1389,10 +1397,17 @@ set_queues(struct ofport *ofport_, /* Bundles. */ -/* Expires all MAC learning entries associated with 'port' and forces ofproto - * to revalidate every flow. */ +/* Expires all MAC learning entries associated with 'bundle' and forces its + * ofproto to revalidate every flow. + * + * Normally MAC learning entries are removed only from the ofproto associated + * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries + * are removed from every ofproto. When patch ports and SLB bonds are in use + * and a VM migration happens and the gratuitous ARPs are somehow lost, this + * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate + * with the host from which it migrated. */ static void -bundle_flush_macs(struct ofbundle *bundle) +bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) { struct ofproto_dpif *ofproto = bundle->ofproto; struct mac_learning *ml = ofproto->ml; @@ -1401,6 +1416,23 @@ bundle_flush_macs(struct ofbundle *bundle) ofproto->need_revalidate = true; LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { if (mac->port.p == bundle) { + if (all_ofprotos) { + struct ofproto_dpif *o; + + HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + if (o != ofproto) { + struct mac_entry *e; + + e = mac_learning_lookup(o->ml, mac->mac, mac->vlan, + NULL); + if (e) { + tag_set_add(&o->revalidate_set, e->tag); + mac_learning_expire(o->ml, e); + } + } + } + } + mac_learning_expire(ml, mac); } } @@ -1534,7 +1566,7 @@ bundle_destroy(struct ofbundle *bundle) bundle_del_port(port); } - bundle_flush_macs(bundle); + bundle_flush_macs(bundle, true); hmap_remove(&ofproto->bundles, &bundle->hmap_node); free(bundle->name); free(bundle->trunks); @@ -1722,7 +1754,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, /* If we changed something that would affect MAC learning, un-learn * everything on this port and force flow revalidation. */ if (need_flush) { - bundle_flush_macs(bundle); + bundle_flush_macs(bundle, false); } return 0; @@ -5511,10 +5543,15 @@ send_netflow_active_timeouts(struct ofproto_dpif *ofproto) static struct ofproto_dpif * ofproto_dpif_lookup(const char *name) { - struct ofproto *ofproto = ofproto_lookup(name); - return (ofproto && ofproto->ofproto_class == &ofproto_dpif_class - ? ofproto_dpif_cast(ofproto) - : NULL); + struct ofproto_dpif *ofproto; + + HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node, + hash_string(name, 0), &all_ofproto_dpifs) { + if (!strcmp(ofproto->up.name, name)) { + return ofproto; + } + } + return NULL; } static void diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index ce7e9f66..a6558742 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -653,3 +653,127 @@ AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual" br0=$br0 p1=$p1 p2=$p2], OVS_VSWITCHD_STOP AT_CLEANUP + +m4_define([OFPROTO_TRACE], + [flow="$2" + AT_CHECK([ovs-appctl ofproto/trace $1 "$flow" $3], [0], [stdout]) + actual=`tail -1 stdout | sed 's/Datapath actions: //'` + expected="$4" + AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected" $5], + [0], [stdout]) + mv stdout expout + AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual" $5], + [0], [expout])]) + +AT_SETUP([ofproto-dpif - MAC learning]) +OVS_VSWITCHD_START( + [set bridge br0 fail-mode=standalone -- \ + add-port br0 p1 -- set Interface p1 type=dummy -- \ + add-port br0 p2 -- set Interface p2 type=dummy -- \ + add-port br0 p3 -- set Interface p3 type=dummy]) + +AT_CHECK( + [ovs-vsctl \ + -- get Interface p1 ofport \ + -- get Interface p2 ofport \ + -- get Interface p3 ofport], + [0], [stdout]) +set `cat stdout` +br0=0 p1=$1 p2=$2 p3=$3 +arp='eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)' + +# Trace an ARP packet arriving on p3, to create a MAC learning entry. +OFPROTO_TRACE( + [br0], + [in_port($p3),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),$arp], + [-generate], + [$br0,$p1,$p2], + [br0=$br0 p1=$p1 p2=$p2 p3=$p3]) + +# Check for the MAC learning entry. +AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]$/?/'], [0], [dnl + port VLAN MAC Age + $p3 0 50:54:00:00:00:05 ? +]) + +# Trace a packet arrival destined for the learned MAC. +# (This will also learn a MAC.) +OFPROTO_TRACE( + [br0], + [in_port($p1),eth(src=50:54:00:00:00:06,dst=50:54:00:00:00:05),$arp], + [-generate], + [$p3], + [br0=$br0 p1=$p1 p2=$p2 p3=$p3]) + +# Check for both MAC learning entries. +AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]$/?/'], [0], [dnl + port VLAN MAC Age + $p3 0 50:54:00:00:00:05 ? + $p1 0 50:54:00:00:00:06 ? +]) + +# Trace a packet arrival that updates the first learned MAC entry. +OFPROTO_TRACE( + [br0], + [in_port($p2),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),$arp], + [-generate], + [$br0,$p1,$p3], + [br0=$br0 p1=$p1 p2=$p2 p3=$p3]) + +# Check that the MAC learning entry was updated. +AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]$/?/'], [0], [dnl + port VLAN MAC Age + $p1 0 50:54:00:00:00:06 ? + $p2 0 50:54:00:00:00:05 ? +]) + +# Add another bridge. +AT_CHECK( + [ovs-vsctl \ + -- add-br br1 \ + -- set bridge br1 datapath-type=dummy \ + -- add-port br1 p4 -- set interface p4 type=dummy \ + -- add-port br1 p5 -- set interface p5 type=dummy]) +AT_CHECK( + [ovs-vsctl \ + -- get Interface p4 ofport \ + -- get Interface p5 ofport], + [0], [stdout]) +set `cat stdout` +br1=0 p4=$1 p5=$2 + +# Trace some packet arrivals in br1 to create MAC learning entries there too. +OFPROTO_TRACE( + [br1], + [in_port($p4),eth(src=50:54:00:00:00:06,dst=ff:ff:ff:ff:ff:ff),$arp], + [-generate], + [$br1,$p5], + [br1=$br1 p4=$p4 p5=$p5]) +OFPROTO_TRACE( + [br1], + [in_port($p5),eth(src=50:54:00:00:00:07,dst=ff:ff:ff:ff:ff:ff),$arp], + [-generate], + [$br1,$p4], + [br1=$br1 p4=$p4 p5=$p5]) + +# Check that the MAC learning entries were added. +AT_CHECK_UNQUOTED([ovs-appctl fdb/show br1 | sed 's/[[0-9]]$/?/'], [0], [dnl + port VLAN MAC Age + $p4 0 50:54:00:00:00:06 ? + $p5 0 50:54:00:00:00:07 ? +]) + +# Delete port p1 and see that its MAC learning entry disappeared, and +# that the MAC learning entry for the same MAC was also deleted from br1. +AT_CHECK([ovs-vsctl del-port p1]) +AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]$/?/'], [0], [dnl + port VLAN MAC Age + $p2 0 50:54:00:00:00:05 ? +]) +AT_CHECK_UNQUOTED([ovs-appctl fdb/show br1 | sed 's/[[0-9]]$/?/'], [0], [dnl + port VLAN MAC Age + $p5 0 50:54:00:00:00:07 ? +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP