#include <arpa/inet.h>
#include <ctype.h>
#include <inttypes.h>
+#include <sys/socket.h>
#include <net/if.h>
#include <openflow/openflow.h>
#include <signal.h>
struct list node; /* Node in global list of bridges. */
char *name; /* User-specified arbitrary name. */
struct mac_learning *ml; /* MAC learning table. */
- bool sent_config_request; /* Successfully sent config request? */
uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */
/* OpenFlow switch processing. */
struct ofproto *ofproto; /* OpenFlow switch. */
- /* Description strings. */
- char *mfr_desc; /* Manufacturer. */
- char *hw_desc; /* Hardware. */
- char *sw_desc; /* Software version. */
- char *serial_desc; /* Serial number. */
- char *dp_desc; /* Datapath description. */
-
/* Kernel datapath information. */
struct dpif *dpif; /* Datapath. */
struct port_array ifaces; /* Indexed by kernel datapath port number. */
/* Flow tracking. */
bool flush;
- /* Flow statistics gathering. */
- time_t next_stats_request;
-
/* Port mirroring. */
struct mirror *mirrors[MAX_MIRRORS];
}
netdev_options.args = &options;
netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
- netdev_options.may_create = true;
- if (iface_is_internal(iface->port->bridge, iface_cfg->name)) {
- netdev_options.may_open = true;
- }
error = netdev_open(&netdev_options, &iface->netdev);
br->name = xstrdup(br_cfg->name);
br->cfg = br_cfg;
br->ml = mac_learning_create();
- br->sent_config_request = false;
eth_addr_nicira_random(br->default_ea);
port_array_init(&br->ifaces);
return n_controllers;
}
-static void
-bridge_update_desc(struct bridge *br OVS_UNUSED)
-{
-#if 0
- bool changed = false;
- const char *desc;
-
- desc = cfg_get_string(0, "bridge.%s.mfr-desc", br->name);
- if (desc != br->mfr_desc) {
- free(br->mfr_desc);
- if (desc) {
- br->mfr_desc = xstrdup(desc);
- } else {
- br->mfr_desc = xstrdup(DEFAULT_MFR_DESC);
- }
- changed = true;
- }
-
- desc = cfg_get_string(0, "bridge.%s.hw-desc", br->name);
- if (desc != br->hw_desc) {
- free(br->hw_desc);
- if (desc) {
- br->hw_desc = xstrdup(desc);
- } else {
- br->hw_desc = xstrdup(DEFAULT_HW_DESC);
- }
- changed = true;
- }
-
- desc = cfg_get_string(0, "bridge.%s.sw-desc", br->name);
- if (desc != br->sw_desc) {
- free(br->sw_desc);
- if (desc) {
- br->sw_desc = xstrdup(desc);
- } else {
- br->sw_desc = xstrdup(DEFAULT_SW_DESC);
- }
- changed = true;
- }
-
- desc = cfg_get_string(0, "bridge.%s.serial-desc", br->name);
- if (desc != br->serial_desc) {
- free(br->serial_desc);
- if (desc) {
- br->serial_desc = xstrdup(desc);
- } else {
- br->serial_desc = xstrdup(DEFAULT_SERIAL_DESC);
- }
- changed = true;
- }
-
- desc = cfg_get_string(0, "bridge.%s.dp-desc", br->name);
- if (desc != br->dp_desc) {
- free(br->dp_desc);
- if (desc) {
- br->dp_desc = xstrdup(desc);
- } else {
- br->dp_desc = xstrdup(DEFAULT_DP_DESC);
- }
- changed = true;
- }
-
- if (changed) {
- ofproto_set_desc(br->ofproto, br->mfr_desc, br->hw_desc,
- br->sw_desc, br->serial_desc, br->dp_desc);
- }
-#endif
-}
-
static void
bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg,
struct bridge *br)
* versa. (XXX Should we delete all flows if we are switching from one
* controller to another?) */
-#if 0
- /* Configure OpenFlow management listeners. */
- svec_init(&listeners);
- cfg_get_all_strings(&listeners, "bridge.%s.openflow.listeners", br->name);
- if (!listeners.n) {
- svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt",
- ovs_rundir, br->name));
- } else if (listeners.n == 1 && !strcmp(listeners.names[0], "none")) {
- svec_clear(&listeners);
- }
- svec_sort_unique(&listeners);
-
- svec_init(&old_listeners);
- ofproto_get_listeners(br->ofproto, &old_listeners);
- svec_sort_unique(&old_listeners);
-
- if (!svec_equal(&listeners, &old_listeners)) {
- ofproto_set_listeners(br->ofproto, &listeners);
- }
- svec_destroy(&listeners);
- svec_destroy(&old_listeners);
-
- /* Configure OpenFlow controller connection snooping. */
- svec_init(&snoops);
- cfg_get_all_strings(&snoops, "bridge.%s.openflow.snoops", br->name);
- if (!snoops.n) {
- svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop",
- ovs_rundir, br->name));
- } else if (snoops.n == 1 && !strcmp(snoops.names[0], "none")) {
- svec_clear(&snoops);
- }
- svec_sort_unique(&snoops);
-
- svec_init(&old_snoops);
- ofproto_get_snoops(br->ofproto, &old_snoops);
- svec_sort_unique(&old_snoops);
-
- if (!svec_equal(&snoops, &old_snoops)) {
- ofproto_set_snoops(br->ofproto, &snoops);
- }
- svec_destroy(&snoops);
- svec_destroy(&old_snoops);
-#else
- /* Default listener. */
+ /* Configure OpenFlow management listener. */
svec_init(&listeners);
svec_add_nocopy(&listeners, xasprintf("punix:%s/%s.mgmt",
ovs_rundir, br->name));
svec_destroy(&listeners);
svec_destroy(&old_listeners);
- /* Default snoop. */
+ /* Configure OpenFlow controller connection snooping. */
svec_init(&snoops);
svec_add_nocopy(&snoops, xasprintf("punix:%s/%s.snoop",
ovs_rundir, br->name));
}
svec_destroy(&snoops);
svec_destroy(&old_snoops);
-#endif
mirror_reconfigure(br);
-
- bridge_update_desc(br);
}
static void
struct netdev_stats slave_stats;
if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) {
- bond_stats.rx_packets += slave_stats.rx_packets;
- bond_stats.rx_bytes += slave_stats.rx_bytes;
- bond_stats.tx_packets += slave_stats.tx_packets;
- bond_stats.tx_bytes += slave_stats.tx_bytes;
+ /* XXX: We swap the stats here because they are swapped back when
+ * reported by the internal device. The reason for this is
+ * internal devices normally represent packets going into the system
+ * but when used as fake bond device they represent packets leaving
+ * the system. We really should do this in the internal device
+ * itself because changing it here reverses the counts from the
+ * perspective of the switch. However, the internal device doesn't
+ * know what type of device it represents so we have to do it here
+ * for now. */
+ bond_stats.tx_packets += slave_stats.rx_packets;
+ bond_stats.tx_bytes += slave_stats.rx_bytes;
+ bond_stats.rx_packets += slave_stats.tx_packets;
+ bond_stats.rx_bytes += slave_stats.tx_bytes;
}
}
for (j = 0; j < port->n_ifaces; j++) {
struct iface *iface = port->ifaces[j];
if (iface->delay_expires != LLONG_MAX) {
- poll_timer_wait(iface->delay_expires - time_msec());
+ poll_timer_wait_until(iface->delay_expires);
}
}
if (port->bond_fake_iface) {
- poll_timer_wait(port->bond_next_fake_iface_update - time_msec());
+ poll_timer_wait_until(port->bond_next_fake_iface_update);
}
}
}
return vlan;
}
+/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
+ * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
+ * indicate this; newer upstream kernels use gratuitous ARP requests. */
+static bool
+is_gratuitous_arp(const flow_t *flow)
+{
+ return (flow->dl_type == htons(ETH_TYPE_ARP)
+ && eth_addr_is_broadcast(flow->dl_dst)
+ && (flow->nw_proto == ARP_OP_REPLY
+ || (flow->nw_proto == ARP_OP_REQUEST
+ && flow->nw_src == flow->nw_dst)));
+}
+
static void
update_learning_table(struct bridge *br, const flow_t *flow, int vlan,
struct port *in_port)
{
- tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
- vlan, in_port->port_idx);
+ enum grat_arp_lock_type lock_type;
+ tag_type rev_tag;
+
+ /* We don't want to learn from gratuitous ARP packets that are reflected
+ * back over bond slaves so we lock the learning table. */
+ lock_type = !is_gratuitous_arp(flow) ? GRAT_ARP_LOCK_NONE :
+ (in_port->n_ifaces == 1) ? GRAT_ARP_LOCK_SET :
+ GRAT_ARP_LOCK_CHECK;
+
+ rev_tag = mac_learning_learn(br->ml, flow->dl_src, vlan, in_port->port_idx,
+ lock_type);
if (rev_tag) {
/* The log messages here could actually be useful in debugging,
* so keep the rate limit relatively high. */
}
}
-static bool
-is_bcast_arp_reply(const flow_t *flow)
-{
- return (flow->dl_type == htons(ETH_TYPE_ARP)
- && flow->nw_proto == ARP_OP_REPLY
- && eth_addr_is_broadcast(flow->dl_dst));
-}
-
/* Determines whether packets in 'flow' within 'br' should be forwarded or
* dropped. Returns true if they may be forwarded, false if they should be
* dropped.
/* Packets received on bonds need special attention to avoid duplicates. */
if (in_port->n_ifaces > 1) {
int src_idx;
+ bool is_grat_arp_locked;
if (eth_addr_is_multicast(flow->dl_dst)) {
*tags |= in_port->active_iface_tag;
/* Drop all packets for which we have learned a different input
* port, because we probably sent the packet on one slave and got
- * it back on the other. Broadcast ARP replies are an exception
- * to this rule: the host has moved to another switch. */
- src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+ * it back on the other. Gratuitous ARP packets are an exception
+ * to this rule: the host has moved to another switch. The exception
+ * to the exception is if we locked the learning table to avoid
+ * reflections on bond slaves. If this is the case, just drop the
+ * packet now. */
+ src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan,
+ &is_grat_arp_locked);
if (src_idx != -1 && src_idx != in_port->port_idx &&
- !is_bcast_arp_reply(flow)) {
+ (!is_gratuitous_arp(flow) || is_grat_arp_locked)) {
return false;
}
}
}
/* Determine output port. */
- out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags);
+ out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, tags,
+ NULL);
if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
out_port = br->ports[out_port_idx];
} else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) {
iface->netdev = NULL;
iface->cfg = if_cfg;
+ shash_add_assert(&br->iface_by_name, iface->name, iface);
+
/* Attempt to create the network interface in case it doesn't exist yet. */
if (!iface_is_internal(br, iface->name)) {
error = set_up_iface(if_cfg, iface, true);
VLOG_WARN("could not create iface %s: %s", iface->name,
strerror(error));
+ shash_find_and_delete_assert(&br->iface_by_name, iface->name);
free(iface->name);
free(iface);
return NULL;
}
}
- shash_add_assert(&br->iface_by_name, iface->name, iface);
-
if (port->n_ifaces >= port->allocated_ifaces) {
port->ifaces = x2nrealloc(port->ifaces, &port->allocated_ifaces,
sizeof *port->ifaces);