X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=9097e6b213c7e274a1a02e811c57ae4ffdec2ac1;hb=f19f25a44b30f090d5cad0c51414998c4ddbae6a;hp=e20d407387785f5f6cb999ef0e7b2d092cbc893e;hpb=3e9c481c70473b5faa29c72a1e609a31ddb6b990;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index e20d4073..9097e6b2 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -157,19 +158,11 @@ struct bridge { struct list node; /* Node in global list of bridges. */ char *name; /* User-specified arbitrary name. */ struct mac_learning *ml; /* MAC learning table. */ - bool sent_config_request; /* Successfully sent config request? */ uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */ /* OpenFlow switch processing. */ struct ofproto *ofproto; /* OpenFlow switch. */ - /* Description strings. */ - char *mfr_desc; /* Manufacturer. */ - char *hw_desc; /* Hardware. */ - char *sw_desc; /* Software version. */ - char *serial_desc; /* Serial number. */ - char *dp_desc; /* Datapath description. */ - /* Kernel datapath information. */ struct dpif *dpif; /* Datapath. */ struct port_array ifaces; /* Indexed by kernel datapath port number. */ @@ -186,9 +179,6 @@ struct bridge { /* Flow tracking. */ bool flush; - /* Flow statistics gathering. */ - time_t next_stats_request; - /* Port mirroring. */ struct mirror *mirrors[MAX_MIRRORS]; @@ -395,10 +385,6 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, } netdev_options.args = &options; netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; - netdev_options.may_create = true; - if (iface_is_internal(iface->port->bridge, iface_cfg->name)) { - netdev_options.may_open = true; - } error = netdev_open(&netdev_options, &iface->netdev); @@ -1205,7 +1191,6 @@ bridge_create(const struct ovsrec_bridge *br_cfg) br->name = xstrdup(br_cfg->name); br->cfg = br_cfg; br->ml = mac_learning_create(); - br->sent_config_request = false; eth_addr_nicira_random(br->default_ea); port_array_init(&br->ifaces); @@ -1343,75 +1328,6 @@ bridge_get_controllers(const struct ovsrec_open_vswitch *ovs_cfg, return n_controllers; } -static void -bridge_update_desc(struct bridge *br OVS_UNUSED) -{ -#if 0 - bool changed = false; - const char *desc; - - desc = cfg_get_string(0, "bridge.%s.mfr-desc", br->name); - if (desc != br->mfr_desc) { - free(br->mfr_desc); - if (desc) { - br->mfr_desc = xstrdup(desc); - } else { - br->mfr_desc = xstrdup(DEFAULT_MFR_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.hw-desc", br->name); - if (desc != br->hw_desc) { - free(br->hw_desc); - if (desc) { - br->hw_desc = xstrdup(desc); - } else { - br->hw_desc = xstrdup(DEFAULT_HW_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.sw-desc", br->name); - if (desc != br->sw_desc) { - free(br->sw_desc); - if (desc) { - br->sw_desc = xstrdup(desc); - } else { - br->sw_desc = xstrdup(DEFAULT_SW_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.serial-desc", br->name); - if (desc != br->serial_desc) { - free(br->serial_desc); - if (desc) { - br->serial_desc = xstrdup(desc); - } else { - br->serial_desc = xstrdup(DEFAULT_SERIAL_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.dp-desc", br->name); - if (desc != br->dp_desc) { - free(br->dp_desc); - if (desc) { - br->dp_desc = xstrdup(desc); - } else { - br->dp_desc = xstrdup(DEFAULT_DP_DESC); - } - changed = true; - } - - if (changed) { - ofproto_set_desc(br->ofproto, br->mfr_desc, br->hw_desc, - br->sw_desc, br->serial_desc, br->dp_desc); - } -#endif -} - static void bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, struct bridge *br) @@ -1492,50 +1408,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, * versa. (XXX Should we delete all flows if we are switching from one * controller to another?) */ -#if 0 - /* Configure OpenFlow management listeners. */ - svec_init(&listeners); - cfg_get_all_strings(&listeners, "bridge.%s.openflow.listeners", br->name); - if (!listeners.n) { - svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt", - ovs_rundir, br->name)); - } else if (listeners.n == 1 && !strcmp(listeners.names[0], "none")) { - svec_clear(&listeners); - } - svec_sort_unique(&listeners); - - svec_init(&old_listeners); - ofproto_get_listeners(br->ofproto, &old_listeners); - svec_sort_unique(&old_listeners); - - if (!svec_equal(&listeners, &old_listeners)) { - ofproto_set_listeners(br->ofproto, &listeners); - } - svec_destroy(&listeners); - svec_destroy(&old_listeners); - - /* Configure OpenFlow controller connection snooping. */ - svec_init(&snoops); - cfg_get_all_strings(&snoops, "bridge.%s.openflow.snoops", br->name); - if (!snoops.n) { - svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop", - ovs_rundir, br->name)); - } else if (snoops.n == 1 && !strcmp(snoops.names[0], "none")) { - svec_clear(&snoops); - } - svec_sort_unique(&snoops); - - svec_init(&old_snoops); - ofproto_get_snoops(br->ofproto, &old_snoops); - svec_sort_unique(&old_snoops); - - if (!svec_equal(&snoops, &old_snoops)) { - ofproto_set_snoops(br->ofproto, &snoops); - } - svec_destroy(&snoops); - svec_destroy(&old_snoops); -#else - /* Default listener. */ + /* Configure OpenFlow management listener. */ svec_init(&listeners); svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt", ovs_rundir, br->name)); @@ -1547,7 +1420,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, svec_destroy(&listeners); svec_destroy(&old_listeners); - /* Default snoop. */ + /* Configure OpenFlow controller connection snooping. */ svec_init(&snoops); svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop", ovs_rundir, br->name)); @@ -1558,11 +1431,8 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, } svec_destroy(&snoops); svec_destroy(&old_snoops); -#endif mirror_reconfigure(br); - - bridge_update_desc(br); } static void @@ -1930,10 +1800,19 @@ bond_update_fake_iface_stats(struct port *port) struct netdev_stats slave_stats; if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) { - bond_stats.rx_packets += slave_stats.rx_packets; - bond_stats.rx_bytes += slave_stats.rx_bytes; - bond_stats.tx_packets += slave_stats.tx_packets; - bond_stats.tx_bytes += slave_stats.tx_bytes; + /* XXX: We swap the stats here because they are swapped back when + * reported by the internal device. The reason for this is + * internal devices normally represent packets going into the system + * but when used as fake bond device they represent packets leaving + * the system. We really should do this in the internal device + * itself because changing it here reverses the counts from the + * perspective of the switch. However, the internal device doesn't + * know what type of device it represents so we have to do it here + * for now. */ + bond_stats.tx_packets += slave_stats.rx_packets; + bond_stats.tx_bytes += slave_stats.rx_bytes; + bond_stats.rx_packets += slave_stats.tx_packets; + bond_stats.rx_bytes += slave_stats.tx_bytes; } } @@ -1986,11 +1865,11 @@ bond_wait(struct bridge *br) for (j = 0; j < port->n_ifaces; j++) { struct iface *iface = port->ifaces[j]; if (iface->delay_expires != LLONG_MAX) { - poll_timer_wait(iface->delay_expires - time_msec()); + poll_timer_wait_until(iface->delay_expires); } } if (port->bond_fake_iface) { - poll_timer_wait(port->bond_next_fake_iface_update - time_msec()); + poll_timer_wait_until(port->bond_next_fake_iface_update); } } } @@ -2249,12 +2128,34 @@ static int flow_get_vlan(struct bridge *br, const flow_t *flow, return vlan; } +/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after + * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to + * indicate this; newer upstream kernels use gratuitous ARP requests. */ +static bool +is_gratuitous_arp(const flow_t *flow) +{ + return (flow->dl_type == htons(ETH_TYPE_ARP) + && eth_addr_is_broadcast(flow->dl_dst) + && (flow->nw_proto == ARP_OP_REPLY + || (flow->nw_proto == ARP_OP_REQUEST + && flow->nw_src == flow->nw_dst))); +} + static void update_learning_table(struct bridge *br, const flow_t *flow, int vlan, struct port *in_port) { - tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, - vlan, in_port->port_idx); + enum grat_arp_lock_type lock_type; + tag_type rev_tag; + + /* We don't want to learn from gratuitous ARP packets that are reflected + * back over bond slaves so we lock the learning table. */ + lock_type = !is_gratuitous_arp(flow) ? GRAT_ARP_LOCK_NONE : + (in_port->n_ifaces == 1) ? GRAT_ARP_LOCK_SET : + GRAT_ARP_LOCK_CHECK; + + rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx, + lock_type); if (rev_tag) { /* The log messages here could actually be useful in debugging, * so keep the rate limit relatively high. */ @@ -2268,14 +2169,6 @@ update_learning_table(struct bridge *br, const flow_t *flow, int vlan, } } -static bool -is_bcast_arp_reply(const flow_t *flow) -{ - return (flow->dl_type == htons(ETH_TYPE_ARP) - && flow->nw_proto == ARP_OP_REPLY - && eth_addr_is_broadcast(flow->dl_dst)); -} - /* Determines whether packets in 'flow' within 'br' should be forwarded or * dropped. Returns true if they may be forwarded, false if they should be * dropped. @@ -2354,6 +2247,7 @@ is_admissible(struct bridge *br, const flow_t *flow, bool have_packet, /* Packets received on bonds need special attention to avoid duplicates. */ if (in_port->n_ifaces > 1) { int src_idx; + bool is_grat_arp_locked; if (eth_addr_is_multicast(flow->dl_dst)) { *tags |= in_port->active_iface_tag; @@ -2365,11 +2259,15 @@ is_admissible(struct bridge *br, const flow_t *flow, bool have_packet, /* Drop all packets for which we have learned a different input * port, because we probably sent the packet on one slave and got - * it back on the other. Broadcast ARP replies are an exception - * to this rule: the host has moved to another switch. */ - src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + * it back on the other. Gratuitous ARP packets are an exception + * to this rule: the host has moved to another switch. The exception + * to the exception is if we locked the learning table to avoid + * reflections on bond slaves. If this is the case, just drop the + * packet now. */ + src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan, + &is_grat_arp_locked); if (src_idx != -1 && src_idx != in_port->port_idx && - !is_bcast_arp_reply(flow)) { + (!is_gratuitous_arp(flow) || is_grat_arp_locked)) { return false; } } @@ -2402,7 +2300,8 @@ process_flow(struct bridge *br, const flow_t *flow, } /* Determine output port. */ - out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags); + out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags, + NULL); if (out_port_idx >= 0 && out_port_idx < br->n_ports) { out_port = br->ports[out_port_idx]; } else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) { @@ -3391,8 +3290,6 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) } bitmap_free(port->trunks); port->trunks = trunks; - - shash_destroy(&new_ifaces); } static void @@ -3615,6 +3512,8 @@ iface_create(struct port *port, const struct ovsrec_interface *if_cfg) iface->netdev = NULL; iface->cfg = if_cfg; + shash_add_assert(&br->iface_by_name, iface->name, iface); + /* Attempt to create the network interface in case it doesn't exist yet. */ if (!iface_is_internal(br, iface->name)) { error = set_up_iface(if_cfg, iface, true); @@ -3622,14 +3521,13 @@ iface_create(struct port *port, const struct ovsrec_interface *if_cfg) VLOG_WARN("could not create iface %s: %s", iface->name, strerror(error)); + shash_find_and_delete_assert(&br->iface_by_name, iface->name); free(iface->name); free(iface); return NULL; } } - shash_add_assert(&br->iface_by_name, iface->name, iface); - if (port->n_ifaces >= port->allocated_ifaces) { port->ifaces = x2nrealloc(port->ifaces, &port->allocated_ifaces, sizeof *port->ifaces);