X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fbridge.c;h=605888c61ebb51d6d2252f7150c4f4227a1e7b72;hb=cc020c766e6a691e6e0deed1d9843cdced59445c;hp=24db5b5e6fe7b49e96c4ee66389c0e97968f8dfe;hpb=4a1ee6ae821f73c877af1548d6db9a0e04ad8eaf;p=openvswitch diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 24db5b5e..605888c6 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,8 @@ struct port { struct bridge *bridge; size_t port_idx; int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ - unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. */ + unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. + * NULL if all VLANs are trunked. */ char *name; /* An ordinary bridge port has 1 interface. @@ -156,19 +158,11 @@ struct bridge { struct list node; /* Node in global list of bridges. */ char *name; /* User-specified arbitrary name. */ struct mac_learning *ml; /* MAC learning table. */ - bool sent_config_request; /* Successfully sent config request? */ uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */ /* OpenFlow switch processing. */ struct ofproto *ofproto; /* OpenFlow switch. */ - /* Description strings. */ - char *mfr_desc; /* Manufacturer. */ - char *hw_desc; /* Hardware. */ - char *sw_desc; /* Software version. */ - char *serial_desc; /* Serial number. */ - char *dp_desc; /* Datapath description. */ - /* Kernel datapath information. */ struct dpif *dpif; /* Datapath. */ struct port_array ifaces; /* Indexed by kernel datapath port number. */ @@ -177,6 +171,7 @@ struct bridge { struct port **ports; size_t n_ports, allocated_ports; struct shash iface_by_name; /* "struct iface"s indexed by name. */ + struct shash port_by_name; /* "struct port"s indexed by name. */ /* Bonding. */ bool has_bonded_ports; @@ -184,9 +179,6 @@ struct bridge { /* Flow tracking. */ bool flush; - /* Flow statistics gathering. */ - time_t next_stats_request; - /* Port mirroring. */ struct mirror *mirrors[MAX_MIRRORS]; @@ -261,39 +253,13 @@ static struct iface *iface_from_dp_ifidx(const struct bridge *, uint16_t dp_ifidx); static bool iface_is_internal(const struct bridge *, const char *name); static void iface_set_mac(struct iface *); +static void iface_update_qos(struct iface *, const struct ovsrec_qos *); /* Hooks into ofproto processing. */ static struct ofhooks bridge_ofhooks; /* Public functions. */ -/* Adds the name of each interface used by a bridge, including local and - * internal ports, to 'svec'. */ -void -bridge_get_ifaces(struct svec *svec) -{ - struct bridge *br, *next; - size_t i, j; - - LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { - for (i = 0; i < br->n_ports; i++) { - struct port *port = br->ports[i]; - - for (j = 0; j < port->n_ifaces; j++) { - struct iface *iface = port->ifaces[j]; - if (iface->dp_ifidx < 0) { - VLOG_ERR("%s interface not in datapath %s, ignoring", - iface->name, dpif_name(br->dpif)); - } else { - if (iface->dp_ifidx != ODPP_LOCAL) { - svec_add(svec, iface->name); - } - } - } - } - } -} - void bridge_init(const struct ovsrec_open_vswitch *cfg) { @@ -369,7 +335,6 @@ static int set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, bool create) { - struct shash_node *node; struct shash options; int error = 0; size_t i; @@ -393,10 +358,6 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, } netdev_options.args = &options; netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; - netdev_options.may_create = true; - if (iface_is_internal(iface->port->bridge, iface_cfg->name)) { - netdev_options.may_open = true; - } error = netdev_open(&netdev_options, &iface->netdev); @@ -421,11 +382,7 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface, error = EINVAL; } } - - SHASH_FOR_EACH (node, &options) { - free(node->data); - } - shash_destroy(&options); + shash_destroy_free_data(&options); return error; } @@ -731,7 +688,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) dpid = bridge_pick_datapath_id(br, ea, hw_addr_iface); ofproto_set_datapath_id(br->ofproto, dpid); - dpid_string = xasprintf("%012"PRIx64, dpid); + dpid_string = xasprintf("%016"PRIx64, dpid); ovsrec_bridge_set_datapath_id(br->cfg, dpid_string); free(dpid_string); @@ -844,9 +801,14 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + int j; port_update_vlan_compat(port); port_update_bonding(port); + + for (j = 0; j < port->n_ifaces; j++) { + iface_update_qos(port->ifaces[j], port->cfg->qos); + } } } LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { @@ -1203,11 +1165,11 @@ bridge_create(const struct ovsrec_bridge *br_cfg) br->name = xstrdup(br_cfg->name); br->cfg = br_cfg; br->ml = mac_learning_create(); - br->sent_config_request = false; eth_addr_nicira_random(br->default_ea); port_array_init(&br->ifaces); + shash_init(&br->port_by_name); shash_init(&br->iface_by_name); br->flush = false; @@ -1238,6 +1200,7 @@ bridge_destroy(struct bridge *br) ofproto_destroy(br->ofproto); mac_learning_destroy(br->ml); port_array_destroy(&br->ifaces); + shash_destroy(&br->port_by_name); shash_destroy(&br->iface_by_name); free(br->ports); free(br->name); @@ -1258,19 +1221,6 @@ bridge_lookup(const char *name) return NULL; } -bool -bridge_exists(const char *name) -{ - return bridge_lookup(name) ? true : false; -} - -uint64_t -bridge_get_datapathid(const char *name) -{ - struct bridge *br = bridge_lookup(name); - return br ? ofproto_get_datapath_id(br->ofproto) : 0; -} - /* Handle requests for a listing of all flows known by the OpenFlow * stack, including those normally hidden. */ static void @@ -1339,75 +1289,6 @@ bridge_get_controllers(const struct ovsrec_open_vswitch *ovs_cfg, return n_controllers; } -static void -bridge_update_desc(struct bridge *br OVS_UNUSED) -{ -#if 0 - bool changed = false; - const char *desc; - - desc = cfg_get_string(0, "bridge.%s.mfr-desc", br->name); - if (desc != br->mfr_desc) { - free(br->mfr_desc); - if (desc) { - br->mfr_desc = xstrdup(desc); - } else { - br->mfr_desc = xstrdup(DEFAULT_MFR_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.hw-desc", br->name); - if (desc != br->hw_desc) { - free(br->hw_desc); - if (desc) { - br->hw_desc = xstrdup(desc); - } else { - br->hw_desc = xstrdup(DEFAULT_HW_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.sw-desc", br->name); - if (desc != br->sw_desc) { - free(br->sw_desc); - if (desc) { - br->sw_desc = xstrdup(desc); - } else { - br->sw_desc = xstrdup(DEFAULT_SW_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.serial-desc", br->name); - if (desc != br->serial_desc) { - free(br->serial_desc); - if (desc) { - br->serial_desc = xstrdup(desc); - } else { - br->serial_desc = xstrdup(DEFAULT_SERIAL_DESC); - } - changed = true; - } - - desc = cfg_get_string(0, "bridge.%s.dp-desc", br->name); - if (desc != br->dp_desc) { - free(br->dp_desc); - if (desc) { - br->dp_desc = xstrdup(desc); - } else { - br->dp_desc = xstrdup(DEFAULT_DP_DESC); - } - changed = true; - } - - if (changed) { - ofproto_set_desc(br->ofproto, br->mfr_desc, br->hw_desc, - br->sw_desc, br->serial_desc, br->dp_desc); - } -#endif -} - static void bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, struct bridge *br) @@ -1488,50 +1369,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, * versa. (XXX Should we delete all flows if we are switching from one * controller to another?) */ -#if 0 - /* Configure OpenFlow management listeners. */ - svec_init(&listeners); - cfg_get_all_strings(&listeners, "bridge.%s.openflow.listeners", br->name); - if (!listeners.n) { - svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt", - ovs_rundir, br->name)); - } else if (listeners.n == 1 && !strcmp(listeners.names[0], "none")) { - svec_clear(&listeners); - } - svec_sort_unique(&listeners); - - svec_init(&old_listeners); - ofproto_get_listeners(br->ofproto, &old_listeners); - svec_sort_unique(&old_listeners); - - if (!svec_equal(&listeners, &old_listeners)) { - ofproto_set_listeners(br->ofproto, &listeners); - } - svec_destroy(&listeners); - svec_destroy(&old_listeners); - - /* Configure OpenFlow controller connection snooping. */ - svec_init(&snoops); - cfg_get_all_strings(&snoops, "bridge.%s.openflow.snoops", br->name); - if (!snoops.n) { - svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop", - ovs_rundir, br->name)); - } else if (snoops.n == 1 && !strcmp(snoops.names[0], "none")) { - svec_clear(&snoops); - } - svec_sort_unique(&snoops); - - svec_init(&old_snoops); - ofproto_get_snoops(br->ofproto, &old_snoops); - svec_sort_unique(&old_snoops); - - if (!svec_equal(&snoops, &old_snoops)) { - ofproto_set_snoops(br->ofproto, &snoops); - } - svec_destroy(&snoops); - svec_destroy(&old_snoops); -#else - /* Default listener. */ + /* Configure OpenFlow management listener. */ svec_init(&listeners); svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt", ovs_rundir, br->name)); @@ -1543,7 +1381,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, svec_destroy(&listeners); svec_destroy(&old_listeners); - /* Default snoop. */ + /* Configure OpenFlow controller connection snooping. */ svec_init(&snoops); svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop", ovs_rundir, br->name)); @@ -1554,11 +1392,8 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg, } svec_destroy(&snoops); svec_destroy(&old_snoops); -#endif mirror_reconfigure(br); - - bridge_update_desc(br); } static void @@ -1926,10 +1761,19 @@ bond_update_fake_iface_stats(struct port *port) struct netdev_stats slave_stats; if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) { - bond_stats.rx_packets += slave_stats.rx_packets; - bond_stats.rx_bytes += slave_stats.rx_bytes; - bond_stats.tx_packets += slave_stats.tx_packets; - bond_stats.tx_bytes += slave_stats.tx_bytes; + /* XXX: We swap the stats here because they are swapped back when + * reported by the internal device. The reason for this is + * internal devices normally represent packets going into the system + * but when used as fake bond device they represent packets leaving + * the system. We really should do this in the internal device + * itself because changing it here reverses the counts from the + * perspective of the switch. However, the internal device doesn't + * know what type of device it represents so we have to do it here + * for now. */ + bond_stats.tx_packets += slave_stats.rx_packets; + bond_stats.tx_bytes += slave_stats.rx_bytes; + bond_stats.rx_packets += slave_stats.tx_packets; + bond_stats.rx_bytes += slave_stats.tx_bytes; } } @@ -1982,11 +1826,11 @@ bond_wait(struct bridge *br) for (j = 0; j < port->n_ifaces; j++) { struct iface *iface = port->ifaces[j]; if (iface->delay_expires != LLONG_MAX) { - poll_timer_wait(iface->delay_expires - time_msec()); + poll_timer_wait_until(iface->delay_expires); } } if (port->bond_fake_iface) { - poll_timer_wait(port->bond_next_fake_iface_update - time_msec()); + poll_timer_wait_until(port->bond_next_fake_iface_update); } } } @@ -2070,7 +1914,8 @@ dst_is_duplicate(const struct dst *dsts, size_t n_dsts, static bool port_trunks_vlan(const struct port *port, uint16_t vlan) { - return port->vlan < 0 && bitmap_is_set(port->trunks, vlan); + return (port->vlan < 0 + && (!port->trunks || bitmap_is_set(port->trunks, vlan))); } static bool @@ -2244,12 +2089,34 @@ static int flow_get_vlan(struct bridge *br, const flow_t *flow, return vlan; } +/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after + * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to + * indicate this; newer upstream kernels use gratuitous ARP requests. */ +static bool +is_gratuitous_arp(const flow_t *flow) +{ + return (flow->dl_type == htons(ETH_TYPE_ARP) + && eth_addr_is_broadcast(flow->dl_dst) + && (flow->nw_proto == ARP_OP_REPLY + || (flow->nw_proto == ARP_OP_REQUEST + && flow->nw_src == flow->nw_dst))); +} + static void update_learning_table(struct bridge *br, const flow_t *flow, int vlan, struct port *in_port) { - tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, - vlan, in_port->port_idx); + enum grat_arp_lock_type lock_type; + tag_type rev_tag; + + /* We don't want to learn from gratuitous ARP packets that are reflected + * back over bond slaves so we lock the learning table. */ + lock_type = !is_gratuitous_arp(flow) ? GRAT_ARP_LOCK_NONE : + (in_port->n_ifaces == 1) ? GRAT_ARP_LOCK_SET : + GRAT_ARP_LOCK_CHECK; + + rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx, + lock_type); if (rev_tag) { /* The log messages here could actually be useful in debugging, * so keep the rate limit relatively high. */ @@ -2263,14 +2130,6 @@ update_learning_table(struct bridge *br, const flow_t *flow, int vlan, } } -static bool -is_bcast_arp_reply(const flow_t *flow) -{ - return (flow->dl_type == htons(ETH_TYPE_ARP) - && flow->nw_proto == ARP_OP_REPLY - && eth_addr_is_broadcast(flow->dl_dst)); -} - /* Determines whether packets in 'flow' within 'br' should be forwarded or * dropped. Returns true if they may be forwarded, false if they should be * dropped. @@ -2349,6 +2208,7 @@ is_admissible(struct bridge *br, const flow_t *flow, bool have_packet, /* Packets received on bonds need special attention to avoid duplicates. */ if (in_port->n_ifaces > 1) { int src_idx; + bool is_grat_arp_locked; if (eth_addr_is_multicast(flow->dl_dst)) { *tags |= in_port->active_iface_tag; @@ -2360,11 +2220,15 @@ is_admissible(struct bridge *br, const flow_t *flow, bool have_packet, /* Drop all packets for which we have learned a different input * port, because we probably sent the packet on one slave and got - * it back on the other. Broadcast ARP replies are an exception - * to this rule: the host has moved to another switch. */ - src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); + * it back on the other. Gratuitous ARP packets are an exception + * to this rule: the host has moved to another switch. The exception + * to the exception is if we locked the learning table to avoid + * reflections on bond slaves. If this is the case, just drop the + * packet now. */ + src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan, + &is_grat_arp_locked); if (src_idx != -1 && src_idx != in_port->port_idx && - !is_bcast_arp_reply(flow)) { + (!is_gratuitous_arp(flow) || is_grat_arp_locked)) { return false; } } @@ -2397,7 +2261,8 @@ process_flow(struct bridge *br, const flow_t *flow, } /* Determine output port. */ - out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags); + out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags, + NULL); if (out_port_idx >= 0 && out_port_idx < br->n_ports) { out_port = br->ports[out_port_idx]; } else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) { @@ -3226,6 +3091,7 @@ port_create(struct bridge *br, const char *name) sizeof *br->ports); } br->ports[br->n_ports++] = port; + shash_add_assert(&br->port_by_name, port->name, port); VLOG_INFO("created port %s on bridge %s", port->name, br->name); bridge_flush(br); @@ -3350,7 +3216,7 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) /* Get trunked VLANs. */ trunks = NULL; - if (vlan < 0) { + if (vlan < 0 && cfg->n_trunks) { size_t n_errors; size_t i; @@ -3369,17 +3235,14 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) port->name, cfg->n_trunks); } if (n_errors == cfg->n_trunks) { - if (n_errors) { - VLOG_ERR("port %s: no valid trunks, trunking all VLANs", - port->name); - } - bitmap_set_multiple(trunks, 0, 4096, 1); - } - } else { - if (cfg->n_trunks) { - VLOG_ERR("port %s: ignoring trunks in favor of implicit vlan", + VLOG_ERR("port %s: no valid trunks, trunking all VLANs", port->name); + bitmap_free(trunks); + trunks = NULL; } + } else if (vlan >= 0 && cfg->n_trunks) { + VLOG_ERR("port %s: ignoring trunks in favor of implicit vlan", + port->name); } if (trunks == NULL ? port->trunks != NULL @@ -3388,8 +3251,6 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) } bitmap_free(port->trunks); port->trunks = trunks; - - shash_destroy(&new_ifaces); } static void @@ -3414,6 +3275,8 @@ port_destroy(struct port *port) iface_destroy(port->ifaces[port->n_ifaces - 1]); } + shash_find_and_delete_assert(&br->port_by_name, port->name); + del = br->ports[port->port_idx] = br->ports[--br->n_ports]; del->port_idx = port->port_idx; @@ -3435,15 +3298,7 @@ port_from_dp_ifidx(const struct bridge *br, uint16_t dp_ifidx) static struct port * port_lookup(const struct bridge *br, const char *name) { - size_t i; - - for (i = 0; i < br->n_ports; i++) { - struct port *port = br->ports[i]; - if (!strcmp(port->name, name)) { - return port; - } - } - return NULL; + return shash_find_data(&br->port_by_name, name); } static struct iface * @@ -3618,6 +3473,8 @@ iface_create(struct port *port, const struct ovsrec_interface *if_cfg) iface->netdev = NULL; iface->cfg = if_cfg; + shash_add_assert(&br->iface_by_name, iface->name, iface); + /* Attempt to create the network interface in case it doesn't exist yet. */ if (!iface_is_internal(br, iface->name)) { error = set_up_iface(if_cfg, iface, true); @@ -3625,14 +3482,13 @@ iface_create(struct port *port, const struct ovsrec_interface *if_cfg) VLOG_WARN("could not create iface %s: %s", iface->name, strerror(error)); + shash_find_and_delete_assert(&br->iface_by_name, iface->name); free(iface->name); free(iface); return NULL; } } - shash_add_assert(&br->iface_by_name, iface->name, iface); - if (port->n_ifaces >= port->allocated_ifaces) { port->ifaces = x2nrealloc(port->ifaces, &port->allocated_ifaces, sizeof *port->ifaces); @@ -3749,6 +3605,90 @@ iface_set_mac(struct iface *iface) } } } + +static void +shash_from_ovs_idl_map(char **keys, char **values, size_t n, + struct shash *shash) +{ + size_t i; + + shash_init(shash); + for (i = 0; i < n; i++) { + shash_add(shash, keys[i], values[i]); + } +} + +struct iface_delete_queues_cbdata { + struct netdev *netdev; + const int64_t *queue_ids; + size_t n_queue_ids; +}; + +static bool +queue_ids_include(const int64_t *ids, size_t n, int64_t target) +{ + size_t low = 0; + size_t high = n; + + while (low < high) { + size_t mid = low + (high - low) / 2; + if (target > ids[mid]) { + high = mid; + } else if (target < ids[mid]) { + low = mid + 1; + } else { + return true; + } + } + return false; +} + +static void +iface_delete_queues(unsigned int queue_id, + const struct shash *details OVS_UNUSED, void *cbdata_) +{ + struct iface_delete_queues_cbdata *cbdata = cbdata_; + + if (!queue_ids_include(cbdata->queue_ids, cbdata->n_queue_ids, queue_id)) { + netdev_delete_queue(cbdata->netdev, queue_id); + } +} + +static void +iface_update_qos(struct iface *iface, const struct ovsrec_qos *qos) +{ + if (!qos || qos->type[0] == '\0') { + netdev_set_qos(iface->netdev, NULL, NULL); + } else { + struct iface_delete_queues_cbdata cbdata; + struct shash details; + size_t i; + + /* Configure top-level Qos for 'iface'. */ + shash_from_ovs_idl_map(qos->key_other_config, qos->value_other_config, + qos->n_other_config, &details); + netdev_set_qos(iface->netdev, qos->type, &details); + shash_destroy(&details); + + /* Deconfigure queues that were deleted. */ + cbdata.netdev = iface->netdev; + cbdata.queue_ids = qos->key_queues; + cbdata.n_queue_ids = qos->n_queues; + netdev_dump_queues(iface->netdev, iface_delete_queues, &cbdata); + + /* Configure queues for 'iface'. */ + for (i = 0; i < qos->n_queues; i++) { + const struct ovsrec_queue *queue = qos->value_queues[i]; + unsigned int queue_id = qos->key_queues[i]; + + shash_from_ovs_idl_map(queue->key_other_config, + queue->value_other_config, + queue->n_other_config, &details); + netdev_set_queue(iface->netdev, queue_id, &details); + shash_destroy(&details); + } + } +} /* Port mirroring. */