From: Ben Pfaff Date: Mon, 19 Apr 2010 18:12:27 +0000 (-0700) Subject: Update fake bond devices' statistics with the sum of bond slaves' stats. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8722022c0c0d29d3f998dc26c50944c456e56646;p=openvswitch Update fake bond devices' statistics with the sum of bond slaves' stats. Needed by XAPI to accurately report bond statistics. Ugh. Bug NIC-63. --- diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c index 6d52db0f..39283e05 100644 --- a/datapath/vport-internal_dev.c +++ b/datapath/vport-internal_dev.c @@ -17,6 +17,7 @@ #include #include "datapath.h" +#include "openvswitch/internal_dev.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -32,6 +33,12 @@ struct internal_dev { struct net_device_stats stats; struct pcpu_lstats *lstats; + + /* This is warty support for XAPI, which does not support summing bond + * device statistics itself. 'extra_stats' can be set by userspace via + * the DP_DEV_SET_STATS ioctl and, if they are, then they are added to + * the real device stats. */ + struct pcpu_lstats extra_stats; }; struct vport_ops internal_vport_ops; @@ -48,7 +55,10 @@ static struct net_device_stats *internal_dev_get_stats(struct net_device *netdev int i; stats = &internal_dev->stats; - memset(stats, 0, sizeof(struct net_device_stats)); + stats->rx_bytes = internal_dev->extra_stats.rx_bytes; + stats->rx_packets = internal_dev->extra_stats.rx_packets; + stats->tx_bytes = internal_dev->extra_stats.tx_bytes; + stats->tx_packets = internal_dev->extra_stats.tx_packets; for_each_possible_cpu(i) { const struct pcpu_lstats *lb_stats; @@ -169,6 +179,22 @@ static void internal_dev_free(struct net_device *netdev) static int internal_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct internal_dev *internal_dev = internal_dev_priv(dev); + + if (cmd == INTERNAL_DEV_SET_STATS) { + struct internal_dev_stats stats; + + if (copy_from_user(&stats, ifr->ifr_data, sizeof(stats))) + return -EFAULT; + + internal_dev->extra_stats.rx_bytes = stats.rx_bytes; + internal_dev->extra_stats.rx_packets = stats.rx_packets; + internal_dev->extra_stats.tx_bytes = stats.tx_bytes; + internal_dev->extra_stats.tx_packets = stats.tx_packets; + + return 0; + } + if (dp_ioctl_hook) return dp_ioctl_hook(dev, ifr, cmd); return -EOPNOTSUPP; diff --git a/include/openvswitch/internal_dev.h b/include/openvswitch/internal_dev.h new file mode 100644 index 00000000..26c73598 --- /dev/null +++ b/include/openvswitch/internal_dev.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2010 Nicira Networks. + * + * This file is offered under your choice of two licenses: Apache 2.0 or GNU + * GPL 2.0 or later. The permission statements for each of these licenses is + * given below. You may license your modifications to this file under either + * of these licenses or both. If you wish to license your modifications under + * only one of these licenses, delete the permission text for the other + * license. + * + * ---------------------------------------------------------------------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ---------------------------------------------------------------------- + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * ---------------------------------------------------------------------- + */ + +/* Ioctl for Open vSwitch "internal ports" to support XAPI, which does not + * support summing statistics from bond slaves, but still needs to get bond + * statistics. + * + * This is a nasty wart that needs removing. */ + +#ifndef OPENVSWITCH_INTERNAL_DEV_H +#define OPENVSWITCH_INTERNAL_DEV_H 1 + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#include + +struct internal_dev_stats { + __u64 rx_packets; + __u64 rx_bytes; + __u64 tx_packets; + __u64 tx_bytes; +}; + +#define INTERNAL_DEV_SET_STATS SIOCDEVPRIVATE + +#endif /* openvswitch/internal_dev.h */ diff --git a/lib/netdev-gre.c b/lib/netdev-gre.c index 31fe5249..4e28fee0 100644 --- a/lib/netdev-gre.c +++ b/lib/netdev-gre.c @@ -456,6 +456,7 @@ const struct netdev_class netdev_gre_class = { NULL, /* get_ifindex */ netdev_gre_get_carrier, netdev_gre_get_stats, + NULL, /* set_stats */ NULL, /* get_features */ NULL, /* set_advertisements */ diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index bcc33268..e86a160c 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -50,6 +50,8 @@ #include "netlink.h" #include "ofpbuf.h" #include "openflow/openflow.h" +#include "openvswitch/internal_dev.h" +#include "openvswitch/gre.h" #include "packets.h" #include "poll-loop.h" #include "rtnetlink.h" @@ -79,7 +81,7 @@ enum { VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, VALID_CARRIER = 1 << 5, - VALID_IS_INTERNAL = 1 << 6 + VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */ }; struct tap_state { @@ -96,13 +98,16 @@ struct netdev_dev_linux { struct shash_node *shash_node; unsigned int cache_valid; + /* The following are figured out "on demand" only. They are only valid + * when the corresponding VALID_* bit in 'cache_valid' is set. */ int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; struct in_addr address, netmask; struct in6_addr in6; int mtu; int carrier; - bool is_internal; + bool is_internal; /* Is this an openvswitch internal device? */ + bool is_tap; /* Is this a tuntap device? */ union { struct tap_state tap; @@ -899,6 +904,35 @@ check_for_working_netlink_stats(void) } } +/* Brings the 'is_internal' and 'is_tap' members of 'netdev_dev' up-to-date. */ +static void +netdev_linux_update_is_pseudo(struct netdev_dev_linux *netdev_dev) +{ + if (!(netdev_dev->cache_valid & VALID_IS_PSEUDO)) { + const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev); + const char *type = netdev_dev_get_type(&netdev_dev->netdev_dev); + + netdev_dev->is_tap = !strcmp(type, "tap"); + netdev_dev->is_internal = false; + if (!netdev_dev->is_tap) { + struct ethtool_drvinfo drvinfo; + int error; + + memset(&drvinfo, 0, sizeof drvinfo); + error = netdev_linux_do_ethtool(name, + (struct ethtool_cmd *)&drvinfo, + ETHTOOL_GDRVINFO, + "ETHTOOL_GDRVINFO"); + + if (!error && !strcmp(drvinfo.driver, "openvswitch")) { + netdev_dev->is_internal = true; + } + } + + netdev_dev->cache_valid |= VALID_IS_PSEUDO; + } +} + /* Retrieves current device stats for 'netdev'. * * XXX All of the members of struct netdev_stats are 64 bits wide, but on @@ -916,26 +950,7 @@ netdev_linux_get_stats(const struct netdev *netdev_, COVERAGE_INC(netdev_get_stats); - if (!(netdev_dev->cache_valid & VALID_IS_INTERNAL)) { - netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), "tap"); - if (!netdev_dev->is_internal) { - struct ethtool_drvinfo drvinfo; - - memset(&drvinfo, 0, sizeof drvinfo); - error = netdev_linux_do_ethtool(netdev_get_name(netdev_), - (struct ethtool_cmd *)&drvinfo, - ETHTOOL_GDRVINFO, - "ETHTOOL_GDRVINFO"); - - if (!error) { - netdev_dev->is_internal = !strcmp(drvinfo.driver, - "openvswitch"); - } - } - - netdev_dev->cache_valid |= VALID_IS_INTERNAL; - } - + netdev_linux_update_is_pseudo(netdev_dev); if (netdev_dev->is_internal) { collect_stats = &raw_stats; } @@ -958,7 +973,7 @@ netdev_linux_get_stats(const struct netdev *netdev_, * will appear to be swapped relative to the other ports since we are the * one sending the data, not a remote computer. For consistency, we swap * them back here. */ - if (!error && netdev_dev->is_internal) { + if (!error && (netdev_dev->is_internal || netdev_dev->is_tap)) { stats->rx_packets = raw_stats.tx_packets; stats->tx_packets = raw_stats.rx_packets; stats->rx_bytes = raw_stats.tx_bytes; @@ -985,6 +1000,41 @@ netdev_linux_get_stats(const struct netdev *netdev_, return error; } +static int +netdev_linux_set_stats(struct netdev *netdev, + const struct netdev_stats *stats) +{ + struct netdev_dev_linux *netdev_dev = + netdev_dev_linux_cast(netdev_get_dev(netdev)); + struct internal_dev_stats dp_dev_stats; + struct ifreq ifr; + + /* We must reject this call if 'netdev' is not an Open vSwitch internal + * port, because the ioctl that we are about to execute is in the "device + * private ioctls" range, which means that executing it on a device that + * is not the type we expect could do any random thing. + * + * (Amusingly, these ioctl numbers are commented "THESE IOCTLS ARE + * _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X" in linux/sockios.h. I guess + * DaveM is a little behind on that.) */ + netdev_linux_update_is_pseudo(netdev_dev); + if (!netdev_dev->is_internal) { + return EOPNOTSUPP; + } + + /* This actually only sets the *offset* that the dp_dev applies, but in our + * usage for fake bond devices the dp_dev never has any traffic of it own + * so it has the same effect. */ + dp_dev_stats.rx_packets = stats->rx_packets; + dp_dev_stats.rx_bytes = stats->rx_bytes; + dp_dev_stats.tx_packets = stats->tx_packets; + dp_dev_stats.tx_bytes = stats->tx_bytes; + ifr.ifr_data = (void *) &dp_dev_stats; + return netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr, + INTERNAL_DEV_SET_STATS, + "INTERNAL_DEV_SET_STATS"); +} + /* Stores the features supported by 'netdev' into each of '*current', * '*advertised', '*supported', and '*peer' that are non-null. Each value is a * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if @@ -1668,6 +1718,7 @@ const struct netdev_class netdev_linux_class = { netdev_linux_get_ifindex, netdev_linux_get_carrier, netdev_linux_get_stats, + netdev_linux_set_stats, netdev_linux_get_features, netdev_linux_set_advertisements, @@ -1716,6 +1767,7 @@ const struct netdev_class netdev_tap_class = { netdev_linux_get_ifindex, netdev_linux_get_carrier, netdev_linux_get_stats, + NULL, /* set_stats */ netdev_linux_get_features, netdev_linux_set_advertisements, @@ -1764,6 +1816,7 @@ const struct netdev_class netdev_patch_class = { netdev_linux_get_ifindex, netdev_linux_get_carrier, netdev_linux_get_stats, + NULL, /* set_stats */ netdev_linux_get_features, netdev_linux_set_advertisements, diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 5ec14d73..eb62cc18 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -269,6 +269,14 @@ struct netdev_class { * (UINT64_MAX). */ int (*get_stats)(const struct netdev *netdev, struct netdev_stats *); + /* Sets the device stats for 'netdev' to 'stats'. + * + * Most network devices won't support this feature and will set this + * function pointer to NULL, which is equivalent to returning EOPNOTSUPP. + * + * Some network devices might only allow setting their stats to 0. */ + int (*set_stats)(struct netdev *netdev, const struct netdev_stats *); + /* Stores the features supported by 'netdev' into each of '*current', * '*advertised', '*supported', and '*peer'. Each value is a bitmap of * "enum ofp_port_features" bits, in host byte order. diff --git a/lib/netdev.c b/lib/netdev.c index 4741e24b..09c00af3 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -958,6 +958,19 @@ netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats) return error; } +/* Attempts to change the stats for 'netdev' to those provided in 'stats'. + * Returns 0 if successful, otherwise a positive errno value. + * + * This will probably fail for most network devices. Some devices might only + * allow setting their stats to 0. */ +int +netdev_set_stats(struct netdev *netdev, const struct netdev_stats *stats) +{ + return (netdev_get_dev(netdev)->netdev_class->set_stats + ? netdev_get_dev(netdev)->netdev_class->set_stats(netdev, stats) + : EOPNOTSUPP); +} + /* Attempts to set input rate limiting (policing) policy, such that up to * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst * size of 'kbits' kb. */ diff --git a/lib/netdev.h b/lib/netdev.h index 63414aee..2a096ee2 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -147,6 +147,7 @@ int netdev_turn_flags_on(struct netdev *, enum netdev_flags, bool permanent); int netdev_turn_flags_off(struct netdev *, enum netdev_flags, bool permanent); int netdev_get_stats(const struct netdev *, struct netdev_stats *); +int netdev_set_stats(struct netdev *, const struct netdev_stats *); int netdev_set_policing(struct netdev *, uint32_t kbits_rate, uint32_t kbits_burst); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 04755b7b..0cdf72f7 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -137,6 +137,7 @@ struct port { int updelay, downdelay; /* Delay before iface goes up/down, in ms. */ bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */ bool bond_fake_iface; /* Fake a bond interface for legacy compat? */ + long bond_next_fake_iface_update; /* Next update to fake bond stats. */ int bond_rebalance_interval; /* Interval between rebalances, in ms. */ long long int bond_next_rebalance; /* Next rebalancing time. */ @@ -1847,6 +1848,34 @@ bond_enable_slave(struct iface *iface, bool enable) port->bond_compat_is_stale = true; } +/* Attempts to make the sum of the bond slaves' statistics appear on the fake + * bond interface. */ +static void +bond_update_fake_iface_stats(struct port *port) +{ + struct netdev_stats bond_stats; + struct netdev *bond_dev; + size_t i; + + memset(&bond_stats, 0, sizeof bond_stats); + + for (i = 0; i < port->n_ifaces; i++) { + struct netdev_stats slave_stats; + + if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) { + bond_stats.rx_packets += slave_stats.rx_packets; + bond_stats.rx_bytes += slave_stats.rx_bytes; + bond_stats.tx_packets += slave_stats.tx_packets; + bond_stats.tx_bytes += slave_stats.tx_bytes; + } + } + + if (!netdev_open_default(port->name, &bond_dev)) { + netdev_set_stats(bond_dev, &bond_stats); + netdev_close(bond_dev); + } +} + static void bond_run(struct bridge *br) { @@ -1862,6 +1891,12 @@ bond_run(struct bridge *br) bond_enable_slave(iface, !iface->enabled); } } + + if (port->bond_fake_iface + && time_msec() >= port->bond_next_fake_iface_update) { + bond_update_fake_iface_stats(port); + port->bond_next_fake_iface_update = time_msec() + 1000; + } } if (port->bond_compat_is_stale) { @@ -1887,6 +1922,9 @@ bond_wait(struct bridge *br) poll_timer_wait(iface->delay_expires - time_msec()); } } + if (port->bond_fake_iface) { + poll_timer_wait(port->bond_next_fake_iface_update - time_msec()); + } } } @@ -3364,6 +3402,10 @@ port_update_bonding(struct port *port) bond_choose_active_iface(port); port->bond_next_rebalance = time_msec() + port->bond_rebalance_interval; + + if (port->cfg->bond_fake_iface) { + port->bond_next_fake_iface_update = time_msec(); + } } port->bond_compat_is_stale = true; port->bond_fake_iface = port->cfg->bond_fake_iface;