From 431488e6638d3fbb53d215fa10d2ff2d8f1c2824 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 16 Mar 2011 14:39:17 -0700 Subject: [PATCH] Support vlan_group workaround implemented in XenServer kernels. Some Linux network drivers support a feature called "VLAN acceleration", associated with a data structure called a "vlan_group". A vlan_group is, abstractly, a dictionary that maps from a VLAN ID (in the range 0...4095) to a VLAN device, that is, a Linux network device associated with a particular VLAN, e.g. "eth0.9" for VLAN 9 on eth0. Some drivers that support VLAN acceleration have bugs that fall roughly into the following categories: * Some NICs strip VLAN tags on receive if no vlan_group is registered, so that the tag is completely lost. * Some drivers size their receive buffers based on whether a vlan_group is enabled, meaning that a maximum size packet with a VLAN tag will not fit if a vlan_group is not configured. * On transmit some drivers expect that VLAN acceleration will be used if it is available (which can only be done if a vlan_group is configured). In these cases, the driver may fail to parse the packet and correctly setup checksum offloading and/or TSO. The correct long term solution is to fix these driver bugs. To cope until then, we have prepared a patch to the Linux kernel network stack that works around these problems. This commit adds support for the workaround implemented by that patch. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross --- INSTALL.Linux | 7 +- acinclude.m4 | 3 + datapath/vport-netdev.c | 22 ++- utilities/automake.mk | 7 + utilities/ovs-vlan-bug-workaround.8.in | 90 +++++++++++ utilities/ovs-vlan-bug-workaround.c | 144 ++++++++++++++++++ utilities/ovs-vlan-bugs.man | 17 +++ utilities/ovs-vlan-test.8.in | 18 +-- xenserver/openvswitch-xen.spec | 2 + ..._xensource_libexec_InterfaceReconfigure.py | 1 + ...rce_libexec_InterfaceReconfigureVswitch.py | 59 ++++++- 11 files changed, 345 insertions(+), 25 deletions(-) create mode 100644 utilities/ovs-vlan-bug-workaround.8.in create mode 100644 utilities/ovs-vlan-bug-workaround.c create mode 100644 utilities/ovs-vlan-bugs.man diff --git a/INSTALL.Linux b/INSTALL.Linux index c46ab555..0d56da65 100644 --- a/INSTALL.Linux +++ b/INSTALL.Linux @@ -234,9 +234,10 @@ Prerequisites section, follow the procedure below to build. whether this is the case. If you know that your particular driver can handle it (for example by testing sending large TCP packets over VLANs) then passing in a value of 1 may improve performance. Modules built for - Linux kernels 2.6.37 and later do not need this and do not have this - parameter. If you do not understand what this means or do not know if - your driver will work, do not set this. + Linux kernels 2.6.37 and later, as well as specially patched versions + of earlier kernels, do not need this and do not have this parameter. If + you do not understand what this means or do not know if your driver + will work, do not set this. 7. Initialize the configuration database using ovsdb-tool, e.g.: diff --git a/acinclude.m4 b/acinclude.m4 index 5843bfa5..fa6f534d 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -211,6 +211,9 @@ AC_DEFUN([OVS_CHECK_LINUX26_COMPAT], [ OVS_GREP_IFELSE([$KSRC26/include/linux/if_link.h], [rtnl_link_stats64]) + OVS_GREP_IFELSE([$KSRC26/include/linux/if_vlan.h], [ADD_ALL_VLANS_CMD], + [OVS_DEFINE([HAVE_VLAN_BUG_WORKAROUND])]) + OVS_CHECK_LOG2_H if cmp -s datapath/linux-2.6/kcompat.h.new \ diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c index 11faf8ad..2583566b 100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@ -22,12 +22,15 @@ #include "vport-internal_dev.h" #include "vport-netdev.h" -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \ + !defined(HAVE_VLAN_BUG_WORKAROUND) #include static int vlan_tso __read_mostly = 0; module_param(vlan_tso, int, 0644); MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets"); +#else +#define vlan_tso true #endif /* If the native device stats aren't 64 bit use the vport stats tracking instead. */ @@ -266,6 +269,19 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) vport_receive(vport, skb); } +static bool dev_supports_vlan_tx(struct net_device *dev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) + /* Software fallback means every device supports vlan_tci on TX. */ + return true; +#elif defined(HAVE_VLAN_BUG_WORKAROUND) + return dev->features & NETIF_F_HW_VLAN_TX; +#else + /* Assume that the driver is buggy. */ + return false; +#endif +} + static int netdev_send(struct vport *vport, struct sk_buff *skb) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); @@ -274,8 +290,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) skb->dev = netdev_vport->dev; forward_ip_summed(skb); -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) - if (vlan_tx_tag_present(skb)) { + if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) { int err; int features = 0; @@ -339,7 +354,6 @@ tag: return 0; vlan_set_tci(skb, 0); } -#endif len = skb->len; dev_queue_xmit(skb); diff --git a/utilities/automake.mk b/utilities/automake.mk index 984b47a9..b267c02f 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -32,7 +32,9 @@ EXTRA_DIST += \ utilities/ovs-save \ utilities/ovs-tcpundump.1.in \ utilities/ovs-tcpundump.in \ + utilities/ovs-vlan-bugs.man \ utilities/ovs-vlan-test.in \ + utilities/ovs-vlan-bug-workaround.8.in \ utilities/ovs-vlan-test.8.in \ utilities/ovs-vsctl.8.in DISTCLEANFILES += \ @@ -65,6 +67,7 @@ man_MANS += \ utilities/ovs-pcap.1 \ utilities/ovs-pki.8 \ utilities/ovs-tcpundump.1 \ + utilities/ovs-vlan-bug-workaround.8.in \ utilities/ovs-vlan-test.8 \ utilities/ovs-vsctl.8 @@ -94,6 +97,10 @@ utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c vswitchd/vswitch-idl.c utilities_ovs_vsctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS) if HAVE_NETLINK +sbin_PROGRAMS += utilities/ovs-vlan-bug-workaround +utilities_ovs_vlan_bug_workaround_SOURCES = utilities/ovs-vlan-bug-workaround.c +utilities_ovs_vlan_bug_workaround_LDADD = lib/libopenvswitch.a + noinst_PROGRAMS += utilities/nlmon utilities_nlmon_SOURCES = utilities/nlmon.c utilities_nlmon_LDADD = lib/libopenvswitch.a diff --git a/utilities/ovs-vlan-bug-workaround.8.in b/utilities/ovs-vlan-bug-workaround.8.in new file mode 100644 index 00000000..d05fe938 --- /dev/null +++ b/utilities/ovs-vlan-bug-workaround.8.in @@ -0,0 +1,90 @@ +.\" -*- nroff -*- +.de IQ +. br +. ns +. IP "\\$1" +.. +.TH ovs\-vlan\-bug\-workaround 8 "February 2011" "Open vSwitch" "Open vSwitch Manual" +.ds PN ovs\-vlan\-bug\-workaround +. +.SH NAME +ovs\-vlan\-bug\-workaround \- utility for configuring Linux VLAN driver bug workaround +. +.SH SYNOPSIS +\fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBon\fR +.br +\fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBoff\fR +.br +\fBovs\-vlan\-bug\-workaround \-\-help +.br +\fBovs\-vlan\-bug\-workaround \-\-version +.SH DESCRIPTION +. +.PP +Some Linux network drivers support a feature called ``VLAN +acceleration''. VLAN acceleration is associated with a data structure +called a \fBvlan_group\fR that is, abstractly, a dictionary that maps +from a VLAN ID (in the range 0 to 4095) to a VLAN device, that is, a +Linux network device associated with a particular VLAN, +e.g. \fBeth0.9\fR for VLAN 9 on \fBeth0\fR. +.PP +Some drivers that support VLAN acceleration have bugs that fall +roughly into the categories listed below. \fBovs\-vlan\-test\fR(8) +can test for these driver bugs. +.so utilities/ovs-vlan-bugs.man +.PP +.PP +The correct long term solution is to fix these driver bugs. +.PP +For now, \fBovs\-vlan\-bug\-workaround\fR can enable a special-purpose +workaround for devices with buggy VLAN acceleration. A kernel patch +must be applied for this workaround to work. +.PP +Use the command \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBon\fR to +enable the VLAN driver bug workaround for network device \fInetdev\fR. +Use the command \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBoff\fR to +disable the VLAN driver bug workaround for network device \fInetdev\fR. +.SH "DRIVER DETAILS" +.PP +The following drivers in Linux version +2.6.32.12-0.7.1.xs1.0.0.311.170586 implement VLAN acceleration and are +relevant to Open vSwitch on XenServer. We have not tested any version +of most of these drivers, so we do not know whether they have a VLAN +problem that needs to be fixed. The drivers are listed by the name +that they report in, e.g., \fBethtool \-i\fR output; in a few cases +this differs slightly from the name of the module's \fB.ko\fR file: +. +.nf +.ta T 1i +\fB8139cp acenic amd8111e atl1c ATL1E atl1 +atl2 be2net bna bnx2 bnx2x cnic +cxgb cxgb3 e1000 e1000e enic forcedeth +igb igbvf ixgb ixgbe jme ml4x_core +ns83820 qlge r8169 S2IO sky2 starfire +tehuti tg3 typhoon via-velocity vxge +.fi +.PP +The following drivers use \fBvlan_group\fR but are irrelevant to Open +vSwitch on XenServer: +.IP "\fBbonding\fR" +Not used with Open vSwitch on XenServer. +.IP "\fBgianfar\fR" +Not shipped with XenServer. A FreeScale CPU-integrated device. +.IP "\fBehea\fR" +Cannot be built on x86. IBM Power architecture only. +.IP "\fBstmmac\fR" +Cannot be built on x86. SH4 architecture only. +.IP "\fBvmxnet3\fR" +Not shipped with XenServer. For use inside VMware VMs only. +. +.SH OPTIONS +. +.so lib/common.man +. +.SH BUGS +. +Obviously. +. +.SH "SEE ALSO" +. +.BR ovs\-vlan\-test (8). diff --git a/utilities/ovs-vlan-bug-workaround.c b/utilities/ovs-vlan-bug-workaround.c new file mode 100644 index 00000000..54316ddc --- /dev/null +++ b/utilities/ovs-vlan-bug-workaround.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2011 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "util.h" + +#define ADD_ALL_VLANS_CMD 10 +#define DEL_ALL_VLANS_CMD 11 + +static void usage(void); +static void parse_options(int argc, char *argv[]); + +int +main(int argc, char *argv[]) +{ + struct vlan_ioctl_args vlan_args; + const char *netdev, *setting; + int fd; + + set_program_name(argv[0]); + + parse_options(argc, argv); + if (argc - optind != 2) { + ovs_fatal(0, "exactly two non-option arguments are required " + "(use --help for help)"); + } + + memset(&vlan_args, 0, sizeof vlan_args); + + /* Get command. */ + setting = argv[optind + 1]; + if (!strcmp(setting, "on")) { + vlan_args.cmd = ADD_ALL_VLANS_CMD; + } else if (!strcmp(setting, "off")) { + vlan_args.cmd = DEL_ALL_VLANS_CMD; + } else { + ovs_fatal(0, "second command line argument must be \"on\" or \"off\" " + "(not \"%s\")", setting); + } + + /* Get network device name. */ + netdev = argv[optind]; + if (strlen(netdev) >= IFNAMSIZ) { + ovs_fatal(0, "%s: network device name too long", netdev); + } + strcpy(vlan_args.device1, netdev); + + /* Execute operation. */ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + ovs_fatal(errno, "socket creation failed"); + } + if (ioctl(fd, SIOCSIFVLAN, &vlan_args) < 0) { + if (errno == ENOPKG) { + ovs_fatal(0, "operation failed (8021q module not loaded)"); + } else if (errno == EOPNOTSUPP) { + ovs_fatal(0, "operation failed (kernel does not support the " + "VLAN bug workaround)"); + } else { + ovs_fatal(errno, "operation failed"); + } + } + close(fd); + + return 0; +} + +static void +usage(void) +{ + printf("\ +%s, for enabling or disabling the kernel VLAN bug workaround\n\ +usage: %s NETDEV SETTING\n\ +where NETDEV is a network device (e.g. \"eth0\")\n\ + and SETTING is \"on\" to enable the workaround or \"off\" to disable it.\n\ +\n\ +Options:\n\ + -h, --help Print this helpful information\n\ + -V, --version Display version information\n", + program_name, program_name); + exit(EXIT_SUCCESS); +} + +static void +parse_options(int argc, char *argv[]) +{ + static const struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int option; + + option = getopt_long(argc, argv, "+t:hVe", long_options, NULL); + if (option == -1) { + break; + } + switch (option) { + case 'h': + usage(); + break; + + case 'V': + OVS_PRINT_VERSION(0, 0); + exit(EXIT_SUCCESS); + + case '?': + exit(EXIT_FAILURE); + + default: + NOT_REACHED(); + } + } + free(short_options); +} diff --git a/utilities/ovs-vlan-bugs.man b/utilities/ovs-vlan-bugs.man new file mode 100644 index 00000000..bdca8fcc --- /dev/null +++ b/utilities/ovs-vlan-bugs.man @@ -0,0 +1,17 @@ +.IP \(bu +When NICs use VLAN stripping on receive they must pass a pointer to a +\fBvlan_group\fR when reporting the stripped tag to the networking +core. If no \fBvlan_group\fR is in use then some drivers just drop +the extracted tag. Drivers are supposed to only enable stripping if a +\fBvlan_group\fR is registered but not all of them do that. +. +.IP \(bu +Some drivers size their receive buffers based on whether a +\fBvlan_group\fR is enabled, meaning that a maximum size packet with a +VLAN tag will not fit if no \fBvlan_group\fR is configured. +. +.IP \(bu +On transmit, some drivers expect that VLAN acceleration will be used +if it is available, which can only be done if a \fBvlan_group\fR is +configured. In these cases, the driver may fail to parse the packet +and correctly setup checksum offloading or TSO. diff --git a/utilities/ovs-vlan-test.8.in b/utilities/ovs-vlan-test.8.in index fbf1552f..602d785d 100644 --- a/utilities/ovs-vlan-test.8.in +++ b/utilities/ovs-vlan-test.8.in @@ -18,23 +18,7 @@ client mode connecting to an \fBovs\-vlan\-test\fR server. \fBovs\-vlan\-test\fR will display "OK" if it did not detect problems. .PP Some examples of the types of problems that may be encountered are: -.IP \(bu -When NICs use vlan stripping on receive they must pass a pointer to -a vlan group when reporting the stripped tag to the networking core. -If there is no vlan group in use then some drivers just drop the -extracted tag. Drivers are supposed to only enable stripping if a -vlan group is registered but not all of them do that. -. -.IP \(bu -Some drivers size their receive buffers based on whether a vlan -group is enabled, meaning that a maximum size packet with a vlan tag -will not fit if a vlan group is not configured. -. -.IP \(bu -On transmit some drivers expect that vlan acceleration will be used -if it is available (which can only be done if a vlan group is -configured). In these cases, the driver may fail to parse the packet -and correctly setup checksum offloading and/or TSO. +.so utilities/ovs-vlan-bugs.man . .SS "Client Mode" An \fBovs\-vlan\-test\fR client may be run on a host to check for VLAN diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec index ab06a06b..7f2cabd1 100644 --- a/xenserver/openvswitch-xen.spec +++ b/xenserver/openvswitch-xen.spec @@ -349,6 +349,7 @@ fi /usr/share/openvswitch/scripts/xen-bugtool-tc-class-show /usr/share/openvswitch/scripts/ovs-save /usr/share/openvswitch/vswitch.ovsschema +/usr/sbin/ovs-vlan-bug-workaround /usr/sbin/ovs-vswitchd /usr/sbin/ovsdb-server /usr/bin/ovs-appctl @@ -371,6 +372,7 @@ fi /usr/share/man/man8/ovs-parse-leaks.8.gz /usr/share/man/man1/ovs-pcap.1.gz /usr/share/man/man1/ovs-tcpundump.1.gz +/usr/share/man/man8/ovs-vlan-bug-workaround.8.gz /usr/share/man/man8/ovs-vlan-test.8.gz /usr/share/man/man8/ovs-vsctl.8.gz /usr/share/man/man8/ovs-vswitchd.8.gz diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigure.py b/xenserver/opt_xensource_libexec_InterfaceReconfigure.py index 0fd79e69..7a2fe4cb 100644 --- a/xenserver/opt_xensource_libexec_InterfaceReconfigure.py +++ b/xenserver/opt_xensource_libexec_InterfaceReconfigure.py @@ -284,6 +284,7 @@ _ETHTOOL_OTHERCONFIG_ATTRS = ['ethtool-%s' % x for x in 'autoneg', 'speed', 'dup _PIF_OTHERCONFIG_ATTRS = [ 'domain', 'peerdns', 'defaultroute', 'mtu', 'static-routes' ] + \ [ 'bond-%s' % x for x in 'mode', 'miimon', 'downdelay', 'updelay', 'use_carrier', 'hashing-algorithm' ] + \ + [ 'vlan-bug-workaround' ] + \ _ETHTOOL_OTHERCONFIG_ATTRS _PIF_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py index 6c9e3fa8..697df5f1 100644 --- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py +++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2008,2009 Citrix Systems, Inc. +# Copyright (c) 2008,2009,2011 Citrix Systems, Inc. # Copyright (c) 2009,2010,2011 Nicira Networks. # # This program is free software; you can redistribute it and/or modify @@ -38,6 +38,49 @@ def netdev_up(netdev, mtu=None): run_command(["/sbin/ifconfig", netdev, 'up'] + mtu) +# This is a list of drivers that do support VLAN tx or rx acceleration, but +# to which the VLAN bug workaround should not be applied. This could be +# because these are known-good drivers (that is, they do not have any of +# the bugs that the workaround avoids) or because the VLAN bug workaround +# will not work for them and may cause other problems. +# +# This is a very short list because few drivers have been tested. +NO_VLAN_WORKAROUND_DRIVERS = ( + "bonding", +) +def netdev_get_driver_name(netdev): + """Returns the name of the driver for network device 'netdev'""" + symlink = '%s/sys/class/net/%s/device/driver' % (root_prefix(), netdev) + try: + target = os.readlink(symlink) + except OSError, e: + log("%s: could not read netdev's driver name (%s)" % (netdev, e)) + return None + + slash = target.rfind('/') + if slash < 0: + log("target %s of symbolic link %s does not contain slash" + % (target, symlink)) + return None + + return target[slash + 1:] + +def netdev_get_features(netdev): + """Returns the features bitmap for the driver for 'netdev'. + The features bitmap is a set of NETIF_F_ flags supported by its driver.""" + try: + features = open("%s/sys/class/net/%s/features" % (root_prefix(), netdev)).read().strip() + return int(features, 0) + except: + return 0 # interface prolly doesn't exist + +def netdev_has_vlan_accel(netdev): + """Returns True if 'netdev' supports VLAN acceleration, False otherwise.""" + NETIF_F_HW_VLAN_TX = 128 + NETIF_F_HW_VLAN_RX = 256 + NETIF_F_VLAN = NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX + return (netdev_get_features(netdev) & NETIF_F_VLAN) != 0 + # # PIF miscellanea # @@ -545,6 +588,20 @@ class DatapathVswitch(Datapath): if len(offload): run_command(['/sbin/ethtool', '-K', dev] + offload) + driver = netdev_get_driver_name(dev) + if 'vlan-bug-workaround' in oc: + vlan_bug_workaround = oc['vlan-bug-workaround'] == 'true' + elif driver in NO_VLAN_WORKAROUND_DRIVERS: + vlan_bug_workaround = False + else: + vlan_bug_workaround = netdev_has_vlan_accel(dev) + + if vlan_bug_workaround: + setting = 'on' + else: + setting = 'off' + run_command(['/usr/sbin/ovs-vlan-bug-workaround', dev, setting]) + datapath_modify_config(self._vsctl_argv) def post(self): -- 2.30.2