datapath: Use vlan acceleration for vlan operations.
authorJesse Gross <jesse@nicira.com>
Thu, 30 Dec 2010 06:13:15 +0000 (22:13 -0800)
committerJesse Gross <jesse@nicira.com>
Mon, 7 Feb 2011 21:49:01 +0000 (13:49 -0800)
Using the kernel vlan acceleration has a number of benefits:
it enables hardware tagging, allows usage of TSO and checksum
offloading, and is generally easier to manipulate.  This switches
the vlan actions to use skb->vlan_tci field for any necessary
changes.  In places that do not support vlan acceleration in a way
that we can use (in particular kernels before 2.6.37) we perform
any necessary conversions, such as tagging and GSO before the
packet leaves Open vSwitch.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
datapath/actions.c
datapath/datapath.c
datapath/flow.c
datapath/tunnel.c
datapath/vlan.h
datapath/vport-internal_dev.c
datapath/vport-netdev.c

index 3223c65bd66b75680caf790ac470ff9f498e6d58..8939377104d6e54e0f9820cad63d9146ce1217c8 100644 (file)
@@ -24,6 +24,7 @@
 #include "checksum.h"
 #include "datapath.h"
 #include "openvswitch/datapath-protocol.h"
+#include "vlan.h"
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *, struct sk_buff *,
@@ -52,20 +53,28 @@ static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom)
        return NULL;
 }
 
-static struct sk_buff *vlan_pull_tag(struct sk_buff *skb)
+static struct sk_buff *strip_vlan(struct sk_buff *skb)
 {
-       struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
        struct ethhdr *eh;
 
-       /* Verify we were given a vlan packet */
-       if (vh->h_vlan_proto != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)
+       if (vlan_tx_tag_present(skb)) {
+               vlan_set_tci(skb, 0);
                return skb;
+       }
+
+       if (unlikely(vlan_eth_hdr(skb)->h_vlan_proto != htons(ETH_P_8021Q) ||
+           skb->len < VLAN_ETH_HLEN))
+               return skb;
+
+       skb = make_writable(skb, 0);
+       if (unlikely(!skb))
+               return NULL;
 
        if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
                skb->csum = csum_sub(skb->csum, csum_partial(skb->data
                                        + ETH_HLEN, VLAN_HLEN, 0));
 
-       memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
+       memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
 
        eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
 
@@ -80,133 +89,32 @@ static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
                                       const struct nlattr *a, u32 actions_len)
 {
        __be16 tci = nla_get_be16(a);
+       struct vlan_ethhdr *vh;
+       __be16 old_tci;
 
-       skb = make_writable(skb, VLAN_HLEN);
-       if (!skb)
-               return ERR_PTR(-ENOMEM);
+       if (vlan_tx_tag_present(skb) || skb->protocol != htons(ETH_P_8021Q))
+               return __vlan_hwaccel_put_tag(skb, ntohs(tci));
 
-       if (skb->protocol == htons(ETH_P_8021Q)) {
-               /* Modify vlan id, but maintain other TCI values */
-               struct vlan_ethhdr *vh;
-               __be16 old_tci;
-
-               if (skb->len < VLAN_ETH_HLEN)
-                       return skb;
-
-               vh = vlan_eth_hdr(skb);
-               old_tci = vh->h_vlan_TCI;
-
-               vh->h_vlan_TCI = tci;
-
-               if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
-                       __be16 diff[] = { ~old_tci, vh->h_vlan_TCI };
-
-                       skb->csum = ~csum_partial((char *)diff, sizeof(diff),
-                                               ~skb->csum);
-               }
-       } else {
-               int err;
-
-               /* Add vlan header */
+       skb = make_writable(skb, 0);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* Set up checksumming pointers for checksum-deferred packets
-                * on Xen.  Otherwise, dev_queue_xmit() will try to do this
-                * when we send the packet out on the wire, and it will fail at
-                * that point because skb_checksum_setup() will not look inside
-                * an 802.1Q header. */
-               err = vswitch_skb_checksum_setup(skb);
-               if (unlikely(err)) {
-                       kfree_skb(skb);
-                       return ERR_PTR(err);
-               }
+       if (unlikely(skb->len < VLAN_ETH_HLEN))
+               return skb;
 
-               /* GSO is not implemented for packets with an 802.1Q header, so
-                * we have to do segmentation before we add that header.
-                *
-                * GSO does work with hardware-accelerated VLAN tagging, but we
-                * can't use hardware-accelerated VLAN tagging since it
-                * requires the device to have a VLAN group configured (with
-                * e.g. vconfig(8)) and we don't do that.
-                *
-                * Having to do this here may be a performance loss, since we
-                * can't take advantage of TSO hardware support, although it
-                * does not make a measurable network performance difference
-                * for 1G Ethernet.  Fixing that would require patching the
-                * kernel (either to add GSO support to the VLAN protocol or to
-                * support hardware-accelerated VLAN tagging without VLAN
-                * groups configured). */
-               if (skb_is_gso(skb)) {
-                       const struct nlattr *actions_left;
-                       int actions_len_left;
-                       struct sk_buff *segs;
-
-                       segs = skb_gso_segment(skb, 0);
-                       kfree_skb(skb);
-                       if (IS_ERR(segs))
-                               return ERR_CAST(segs);
-
-                       actions_len_left = actions_len;
-                       actions_left = nla_next(a, &actions_len_left);
-
-                       do {
-                               struct sk_buff *nskb = segs->next;
-
-                               segs->next = NULL;
-
-                               /* GSO can change the checksum type so update.*/
-                               compute_ip_summed(segs, true);
-
-                               segs = __vlan_put_tag(segs, ntohs(tci));
-                               err = -ENOMEM;
-                               if (segs) {
-                                       err = do_execute_actions(
-                                               dp, segs, key, actions_left,
-                                               actions_len_left);
-                               }
-
-                               if (unlikely(err)) {
-                                       while ((segs = nskb)) {
-                                               nskb = segs->next;
-                                               segs->next = NULL;
-                                               kfree_skb(segs);
-                                       }
-                                       return ERR_PTR(err);
-                               }
-
-                               segs = nskb;
-                       } while (segs->next);
-
-                       skb = segs;
-                       compute_ip_summed(skb, true);
-               }
+       vh = vlan_eth_hdr(skb);
 
-               /* The hardware-accelerated version of vlan_put_tag() works
-                * only for a device that has a VLAN group configured (with
-                * e.g. vconfig(8)), so call the software-only version
-                * __vlan_put_tag() directly instead.
-                */
-               skb = __vlan_put_tag(skb, ntohs(tci));
-               if (!skb)
-                       return ERR_PTR(-ENOMEM);
+       old_tci = vh->h_vlan_TCI;
+       vh->h_vlan_TCI = tci;
 
-               /* GSO doesn't fix up the hardware computed checksum so this
-                * will only be hit in the non-GSO case. */
-               if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
-                       skb->csum = csum_add(skb->csum, csum_partial(skb->data
-                                               + ETH_HLEN, VLAN_HLEN, 0));
+       if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
+               __be16 diff[] = { ~old_tci, vh->h_vlan_TCI };
+               skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
        }
 
        return skb;
 }
 
-static struct sk_buff *strip_vlan(struct sk_buff *skb)
-{
-       skb = make_writable(skb, 0);
-       if (skb)
-               vlan_pull_tag(skb);
-       return skb;
-}
-
 static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key)
 {
        return (key->dl_type == htons(ETH_P_IP) &&
@@ -417,8 +325,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
                case ODP_ACTION_ATTR_SET_DL_TCI:
                        skb = modify_vlan_tci(dp, skb, key, a, rem);
-                       if (IS_ERR(skb))
-                               return PTR_ERR(skb);
                        break;
 
                case ODP_ACTION_ATTR_STRIP_VLAN:
index ba32e37fae9dbb91797e7de4af6266b0724f5d12..c48dc9dfb5f42dcbe84346f99f89a8a08510199a 100644 (file)
@@ -483,6 +483,14 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
                nskb = skb->next;
                skb->next = NULL;
 
+               if (vlan_tx_tag_present(skb)) {
+                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                       if (unlikely(!skb)) {
+                               err = -ENOMEM;
+                               goto err_kfree_skbs;
+                       }
+               }
+
                len = sizeof(struct odp_header);
                len += nla_total_size(skb->len);
                len += nla_total_size(FLOW_BUFSIZE);
index 735e147959edfa8f47933befaa72d897761a9f36..4b0e6cc310c4bdac563a616737f5d8bfa1853920 100644 (file)
@@ -34,6 +34,8 @@
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 
+#include "vlan.h"
+
 static struct kmem_cache *flow_cache;
 static unsigned int hash_seed __read_mostly;
 
@@ -449,8 +451,12 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 
        /* dl_type, dl_vlan, dl_vlan_pcp. */
        __skb_pull(skb, 2 * ETH_ALEN);
-       if (eth->h_proto == htons(ETH_P_8021Q))
+
+       if (vlan_tx_tag_present(skb))
+               key->dl_tci = htons(vlan_get_tci(skb));
+       else if (eth->h_proto == htons(ETH_P_8021Q))
                parse_vlan(skb, key);
+
        key->dl_type = parse_ethertype(skb);
        skb_reset_network_header(skb);
        __skb_push(skb, skb->data - (unsigned char *)eth);
index 95492f76f07d7e51f527afe3338d6d51234a97c0..4cabd93c595741ddc04823d922dc74bd082afbf3 100644 (file)
@@ -33,6 +33,7 @@
 #include "datapath.h"
 #include "table.h"
 #include "tunnel.h"
+#include "vlan.h"
 #include "vport.h"
 #include "vport-generic.h"
 #include "vport-internal_dev.h"
@@ -439,6 +440,7 @@ void tnl_rcv(struct vport *vport, struct sk_buff *skb)
 
        ecn_decapsulate(skb);
        compute_ip_summed(skb, false);
+       vlan_set_tci(skb, 0);
 
        vport_receive(vport, skb);
 }
@@ -682,7 +684,8 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab
 
                vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
                vh->h_vlan_encapsulated_proto = skb->protocol;
-       }
+       } else
+               vlan_set_tci(nskb, vlan_get_tci(skb));
        skb_reset_mac_header(nskb);
 
        /* Protocol */
@@ -720,17 +723,27 @@ static bool check_mtu(struct sk_buff *skb,
        int mtu = 0;
        unsigned int packet_length = skb->len - ETH_HLEN;
 
-       if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+       /* Allow for one level of tagging in the packet length. */
+       if (!vlan_tx_tag_present(skb) &&
+           eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
                packet_length -= VLAN_HLEN;
 
        if (pmtud) {
+               int vlan_header = 0;
+
                frag_off = htons(IP_DF);
 
+               /* The tag needs to go in packet regardless of where it
+                * currently is, so subtract it from the MTU.
+                */
+               if (vlan_tx_tag_present(skb) ||
+                   eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+                       vlan_header = VLAN_HLEN;
+
                mtu = dst_mtu(&rt_dst(rt))
                        - ETH_HLEN
                        - mutable->tunnel_hlen
-                       - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
-                               VLAN_HLEN : 0);
+                       - vlan_header;
        }
 
        if (skb->protocol == htons(ETH_P_IP)) {
@@ -1041,28 +1054,18 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
                goto error_free;
 
        min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-                       + mutable->tunnel_hlen;
+                       + mutable->tunnel_hlen
+                       + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+       skb = check_headroom(skb, min_headroom);
+       if (IS_ERR(skb)) {
+               err = PTR_ERR(skb);
+               goto error;
+       }
 
        if (skb_is_gso(skb)) {
                struct sk_buff *nskb;
 
-               /*
-                * If we are doing GSO on a pskb it is better to make sure that
-                * the headroom is correct now.  We will only have to copy the
-                * portion in the linear data area and GSO will preserve
-                * headroom when it creates the segments.  This is particularly
-                * beneficial on Xen where we get a lot of GSO pskbs.
-                * Conversely, we avoid copying if it is just to get our own
-                * writable clone because GSO will do the copy for us.
-                */
-               if (skb_headroom(skb) < min_headroom) {
-                       skb = check_headroom(skb, min_headroom);
-                       if (IS_ERR(skb)) {
-                               err = PTR_ERR(skb);
-                               goto error;
-                       }
-               }
-
                nskb = skb_gso_segment(skb, 0);
                kfree_skb(skb);
                if (IS_ERR(nskb)) {
@@ -1071,32 +1074,23 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
                }
 
                skb = nskb;
-       } else {
-               skb = check_headroom(skb, min_headroom);
-               if (IS_ERR(skb)) {
-                       err = PTR_ERR(skb);
-                       goto error;
-               }
-
-               if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                       /*
-                        * Pages aren't locked and could change at any time.
-                        * If this happens after we compute the checksum, the
-                        * checksum will be wrong.  We linearize now to avoid
-                        * this problem.
-                        */
-                       if (unlikely(need_linearize(skb))) {
-                               err = __skb_linearize(skb);
-                               if (unlikely(err))
-                                       goto error_free;
-                       }
-
-                       err = skb_checksum_help(skb);
+       } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               /* Pages aren't locked and could change at any time.
+                * If this happens after we compute the checksum, the
+                * checksum will be wrong.  We linearize now to avoid
+                * this problem.
+                */
+               if (unlikely(need_linearize(skb))) {
+                       err = __skb_linearize(skb);
                        if (unlikely(err))
                                goto error_free;
-               } else if (skb->ip_summed == CHECKSUM_COMPLETE)
-                       skb->ip_summed = CHECKSUM_NONE;
-       }
+               }
+
+               err = skb_checksum_help(skb);
+               if (unlikely(err))
+                       goto error_free;
+       } else if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->ip_summed = CHECKSUM_NONE;
 
        return skb;
 
@@ -1159,7 +1153,8 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
        u8 tos;
 
        /* Validate the protocol headers before we try to use them. */
-       if (skb->protocol == htons(ETH_P_8021Q)) {
+       if (skb->protocol == htons(ETH_P_8021Q) &&
+           !vlan_tx_tag_present(skb)) {
                if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
                        goto error_free;
 
@@ -1250,6 +1245,9 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
                struct sk_buff *next_skb = skb->next;
                skb->next = NULL;
 
+               if (unlikely(vlan_deaccel_tag(skb)))
+                       goto next;
+
                if (likely(cache)) {
                        skb_push(skb, cache->len);
                        memcpy(skb->data, get_cached_header(cache), cache->len);
index dc900183d4babe13e576986eac8b9e19c4a46d4c..02a62909f5b3a08726664dd2a53d13a6b3f3536f 100644 (file)
@@ -43,4 +43,18 @@ u16 vlan_tx_tag_get(struct sk_buff *skb);
 #define __vlan_hwaccel_put_tag rpl__vlan_hwaccel_put_tag
 struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, u16 vlan_tci);
 #endif /* NEED_VLAN_FIELD */
+
+static inline int vlan_deaccel_tag(struct sk_buff *skb)
+{
+       if (!vlan_tx_tag_present(skb))
+               return 0;
+
+       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+       if (unlikely(!skb))
+               return -ENOMEM;
+
+       vlan_set_tci(skb, 0);
+       return 0;
+}
+
 #endif /* vlan.h */
index be2907454ebe5fe4870522036d1884f76326829f..0b91b345f0fe89b393f825f7e81949659e0cced6 100644 (file)
@@ -6,6 +6,7 @@
  * kernel, by Linus Torvalds and others.
  */
 
+#include <linux/if_vlan.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -230,8 +231,13 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
        struct net_device *netdev = netdev_vport_priv(vport)->dev;
        int len;
 
-       skb->dev = netdev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+       if (unlikely(vlan_deaccel_tag(skb)))
+               return 0;
+#endif
+
        len = skb->len;
+       skb->dev = netdev;
        skb->pkt_type = PACKET_HOST;
        skb->protocol = eth_type_trans(skb, netdev);
 
index 85e0eb9a26a259b8ad662fa0e6717bd033230abb..3693004c670c52eb3835ab041ff604794892d7da 100644 (file)
@@ -261,10 +261,56 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 static int netdev_send(struct vport *vport, struct sk_buff *skb)
 {
        struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-       int len = skb->len;
+       int len;
 
        skb->dev = netdev_vport->dev;
        forward_ip_summed(skb);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+       if (vlan_tx_tag_present(skb)) {
+               int err;
+
+               err = vswitch_skb_checksum_setup(skb);
+               if (unlikely(err)) {
+                       kfree_skb(skb);
+                       return 0;
+               }
+
+               if (skb_is_gso(skb)) {
+                       struct sk_buff *nskb;
+
+                       nskb = skb_gso_segment(skb, 0);
+                       kfree_skb(skb);
+                       skb = nskb;
+                       if (IS_ERR(skb))
+                               return 0;
+
+                       len = 0;
+                       do {
+                               nskb = skb->next;
+                               skb->next = NULL;
+
+                               skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                               if (likely(skb)) {
+                                       len += skb->len;
+                                       vlan_set_tci(skb, 0);
+                                       dev_queue_xmit(skb);
+                               }
+
+                               skb = nskb;
+                       } while (skb);
+
+                       return len;
+               } else {
+                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                       if (unlikely(!skb))
+                               return 0;
+                       vlan_set_tci(skb, 0);
+               }
+       }
+#endif
+
+       len = skb->len;
        dev_queue_xmit(skb);
 
        return len;