Needed by XAPI to accurately report bond statistics.
Ugh.
Bug NIC-63.
#include <linux/workqueue.h>
#include "datapath.h"
+#include "openvswitch/internal_dev.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
struct net_device_stats stats;
struct pcpu_lstats *lstats;
+
+ /* This is warty support for XAPI, which does not support summing bond
+ * device statistics itself. 'extra_stats' can be set by userspace via
+ * the DP_DEV_SET_STATS ioctl and, if they are, then they are added to
+ * the real device stats. */
+ struct pcpu_lstats extra_stats;
};
struct vport_ops internal_vport_ops;
int i;
stats = &internal_dev->stats;
- memset(stats, 0, sizeof(struct net_device_stats));
+ stats->rx_bytes = internal_dev->extra_stats.rx_bytes;
+ stats->rx_packets = internal_dev->extra_stats.rx_packets;
+ stats->tx_bytes = internal_dev->extra_stats.tx_bytes;
+ stats->tx_packets = internal_dev->extra_stats.tx_packets;
for_each_possible_cpu(i) {
const struct pcpu_lstats *lb_stats;
static int internal_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
+ struct internal_dev *internal_dev = internal_dev_priv(dev);
+
+ if (cmd == INTERNAL_DEV_SET_STATS) {
+ struct internal_dev_stats stats;
+
+ if (copy_from_user(&stats, ifr->ifr_data, sizeof(stats)))
+ return -EFAULT;
+
+ internal_dev->extra_stats.rx_bytes = stats.rx_bytes;
+ internal_dev->extra_stats.rx_packets = stats.rx_packets;
+ internal_dev->extra_stats.tx_bytes = stats.tx_bytes;
+ internal_dev->extra_stats.tx_packets = stats.tx_packets;
+
+ return 0;
+ }
+
if (dp_ioctl_hook)
return dp_ioctl_hook(dev, ifr, cmd);
return -EOPNOTSUPP;
--- /dev/null
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * ----------------------------------------------------------------------
+ */
+
+/* Ioctl for Open vSwitch "internal ports" to support XAPI, which does not
+ * support summing statistics from bond slaves, but still needs to get bond
+ * statistics.
+ *
+ * This is a nasty wart that needs removing. */
+
+#ifndef OPENVSWITCH_INTERNAL_DEV_H
+#define OPENVSWITCH_INTERNAL_DEV_H 1
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#endif
+
+#include <linux/sockios.h>
+
+struct internal_dev_stats {
+ __u64 rx_packets;
+ __u64 rx_bytes;
+ __u64 tx_packets;
+ __u64 tx_bytes;
+};
+
+#define INTERNAL_DEV_SET_STATS SIOCDEVPRIVATE
+
+#endif /* openvswitch/internal_dev.h */
NULL, /* get_ifindex */
netdev_gre_get_carrier,
netdev_gre_get_stats,
+ NULL, /* set_stats */
NULL, /* get_features */
NULL, /* set_advertisements */
#include "netlink.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
+#include "openvswitch/internal_dev.h"
+#include "openvswitch/gre.h"
#include "packets.h"
#include "poll-loop.h"
#include "rtnetlink.h"
VALID_IN6 = 1 << 3,
VALID_MTU = 1 << 4,
VALID_CARRIER = 1 << 5,
- VALID_IS_INTERNAL = 1 << 6
+ VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */
};
struct tap_state {
struct shash_node *shash_node;
unsigned int cache_valid;
+ /* The following are figured out "on demand" only. They are only valid
+ * when the corresponding VALID_* bit in 'cache_valid' is set. */
int ifindex;
uint8_t etheraddr[ETH_ADDR_LEN];
struct in_addr address, netmask;
struct in6_addr in6;
int mtu;
int carrier;
- bool is_internal;
+ bool is_internal; /* Is this an openvswitch internal device? */
+ bool is_tap; /* Is this a tuntap device? */
union {
struct tap_state tap;
}
}
+/* Brings the 'is_internal' and 'is_tap' members of 'netdev_dev' up-to-date. */
+static void
+netdev_linux_update_is_pseudo(struct netdev_dev_linux *netdev_dev)
+{
+ if (!(netdev_dev->cache_valid & VALID_IS_PSEUDO)) {
+ const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
+ const char *type = netdev_dev_get_type(&netdev_dev->netdev_dev);
+
+ netdev_dev->is_tap = !strcmp(type, "tap");
+ netdev_dev->is_internal = false;
+ if (!netdev_dev->is_tap) {
+ struct ethtool_drvinfo drvinfo;
+ int error;
+
+ memset(&drvinfo, 0, sizeof drvinfo);
+ error = netdev_linux_do_ethtool(name,
+ (struct ethtool_cmd *)&drvinfo,
+ ETHTOOL_GDRVINFO,
+ "ETHTOOL_GDRVINFO");
+
+ if (!error && !strcmp(drvinfo.driver, "openvswitch")) {
+ netdev_dev->is_internal = true;
+ }
+ }
+
+ netdev_dev->cache_valid |= VALID_IS_PSEUDO;
+ }
+}
+
/* Retrieves current device stats for 'netdev'.
*
* XXX All of the members of struct netdev_stats are 64 bits wide, but on
COVERAGE_INC(netdev_get_stats);
- if (!(netdev_dev->cache_valid & VALID_IS_INTERNAL)) {
- netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), "tap");
- if (!netdev_dev->is_internal) {
- struct ethtool_drvinfo drvinfo;
-
- memset(&drvinfo, 0, sizeof drvinfo);
- error = netdev_linux_do_ethtool(netdev_get_name(netdev_),
- (struct ethtool_cmd *)&drvinfo,
- ETHTOOL_GDRVINFO,
- "ETHTOOL_GDRVINFO");
-
- if (!error) {
- netdev_dev->is_internal = !strcmp(drvinfo.driver,
- "openvswitch");
- }
- }
-
- netdev_dev->cache_valid |= VALID_IS_INTERNAL;
- }
-
+ netdev_linux_update_is_pseudo(netdev_dev);
if (netdev_dev->is_internal) {
collect_stats = &raw_stats;
}
* will appear to be swapped relative to the other ports since we are the
* one sending the data, not a remote computer. For consistency, we swap
* them back here. */
- if (!error && netdev_dev->is_internal) {
+ if (!error && (netdev_dev->is_internal || netdev_dev->is_tap)) {
stats->rx_packets = raw_stats.tx_packets;
stats->tx_packets = raw_stats.rx_packets;
stats->rx_bytes = raw_stats.tx_bytes;
return error;
}
+static int
+netdev_linux_set_stats(struct netdev *netdev,
+ const struct netdev_stats *stats)
+{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
+ struct internal_dev_stats dp_dev_stats;
+ struct ifreq ifr;
+
+ /* We must reject this call if 'netdev' is not an Open vSwitch internal
+ * port, because the ioctl that we are about to execute is in the "device
+ * private ioctls" range, which means that executing it on a device that
+ * is not the type we expect could do any random thing.
+ *
+ * (Amusingly, these ioctl numbers are commented "THESE IOCTLS ARE
+ * _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X" in linux/sockios.h. I guess
+ * DaveM is a little behind on that.) */
+ netdev_linux_update_is_pseudo(netdev_dev);
+ if (!netdev_dev->is_internal) {
+ return EOPNOTSUPP;
+ }
+
+ /* This actually only sets the *offset* that the dp_dev applies, but in our
+ * usage for fake bond devices the dp_dev never has any traffic of it own
+ * so it has the same effect. */
+ dp_dev_stats.rx_packets = stats->rx_packets;
+ dp_dev_stats.rx_bytes = stats->rx_bytes;
+ dp_dev_stats.tx_packets = stats->tx_packets;
+ dp_dev_stats.tx_bytes = stats->tx_bytes;
+ ifr.ifr_data = (void *) &dp_dev_stats;
+ return netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr,
+ INTERNAL_DEV_SET_STATS,
+ "INTERNAL_DEV_SET_STATS");
+}
+
/* Stores the features supported by 'netdev' into each of '*current',
* '*advertised', '*supported', and '*peer' that are non-null. Each value is a
* bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
+ netdev_linux_set_stats,
netdev_linux_get_features,
netdev_linux_set_advertisements,
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
+ NULL, /* set_stats */
netdev_linux_get_features,
netdev_linux_set_advertisements,
netdev_linux_get_ifindex,
netdev_linux_get_carrier,
netdev_linux_get_stats,
+ NULL, /* set_stats */
netdev_linux_get_features,
netdev_linux_set_advertisements,
* (UINT64_MAX). */
int (*get_stats)(const struct netdev *netdev, struct netdev_stats *);
+ /* Sets the device stats for 'netdev' to 'stats'.
+ *
+ * Most network devices won't support this feature and will set this
+ * function pointer to NULL, which is equivalent to returning EOPNOTSUPP.
+ *
+ * Some network devices might only allow setting their stats to 0. */
+ int (*set_stats)(struct netdev *netdev, const struct netdev_stats *);
+
/* Stores the features supported by 'netdev' into each of '*current',
* '*advertised', '*supported', and '*peer'. Each value is a bitmap of
* "enum ofp_port_features" bits, in host byte order.
return error;
}
+/* Attempts to change the stats for 'netdev' to those provided in 'stats'.
+ * Returns 0 if successful, otherwise a positive errno value.
+ *
+ * This will probably fail for most network devices. Some devices might only
+ * allow setting their stats to 0. */
+int
+netdev_set_stats(struct netdev *netdev, const struct netdev_stats *stats)
+{
+ return (netdev_get_dev(netdev)->netdev_class->set_stats
+ ? netdev_get_dev(netdev)->netdev_class->set_stats(netdev, stats)
+ : EOPNOTSUPP);
+}
+
/* Attempts to set input rate limiting (policing) policy, such that up to
* 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst
* size of 'kbits' kb. */
int netdev_turn_flags_off(struct netdev *, enum netdev_flags, bool permanent);
int netdev_get_stats(const struct netdev *, struct netdev_stats *);
+int netdev_set_stats(struct netdev *, const struct netdev_stats *);
int netdev_set_policing(struct netdev *, uint32_t kbits_rate,
uint32_t kbits_burst);
int updelay, downdelay; /* Delay before iface goes up/down, in ms. */
bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */
bool bond_fake_iface; /* Fake a bond interface for legacy compat? */
+ long bond_next_fake_iface_update; /* Next update to fake bond stats. */
int bond_rebalance_interval; /* Interval between rebalances, in ms. */
long long int bond_next_rebalance; /* Next rebalancing time. */
port->bond_compat_is_stale = true;
}
+/* Attempts to make the sum of the bond slaves' statistics appear on the fake
+ * bond interface. */
+static void
+bond_update_fake_iface_stats(struct port *port)
+{
+ struct netdev_stats bond_stats;
+ struct netdev *bond_dev;
+ size_t i;
+
+ memset(&bond_stats, 0, sizeof bond_stats);
+
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct netdev_stats slave_stats;
+
+ if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) {
+ bond_stats.rx_packets += slave_stats.rx_packets;
+ bond_stats.rx_bytes += slave_stats.rx_bytes;
+ bond_stats.tx_packets += slave_stats.tx_packets;
+ bond_stats.tx_bytes += slave_stats.tx_bytes;
+ }
+ }
+
+ if (!netdev_open_default(port->name, &bond_dev)) {
+ netdev_set_stats(bond_dev, &bond_stats);
+ netdev_close(bond_dev);
+ }
+}
+
static void
bond_run(struct bridge *br)
{
bond_enable_slave(iface, !iface->enabled);
}
}
+
+ if (port->bond_fake_iface
+ && time_msec() >= port->bond_next_fake_iface_update) {
+ bond_update_fake_iface_stats(port);
+ port->bond_next_fake_iface_update = time_msec() + 1000;
+ }
}
if (port->bond_compat_is_stale) {
poll_timer_wait(iface->delay_expires - time_msec());
}
}
+ if (port->bond_fake_iface) {
+ poll_timer_wait(port->bond_next_fake_iface_update - time_msec());
+ }
}
}
bond_choose_active_iface(port);
port->bond_next_rebalance
= time_msec() + port->bond_rebalance_interval;
+
+ if (port->cfg->bond_fake_iface) {
+ port->bond_next_fake_iface_update = time_msec();
+ }
}
port->bond_compat_is_stale = true;
port->bond_fake_iface = port->cfg->bond_fake_iface;