From 6ce39213456b27257acbaf146398dce26d1466b9 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 29 Dec 2010 22:13:15 -0800 Subject: [PATCH] datapath: Use vlan acceleration for vlan operations. Using the kernel vlan acceleration has a number of benefits: it enables hardware tagging, allows usage of TSO and checksum offloading, and is generally easier to manipulate. This switches the vlan actions to use skb->vlan_tci field for any necessary changes. In places that do not support vlan acceleration in a way that we can use (in particular kernels before 2.6.37) we perform any necessary conversions, such as tagging and GSO before the packet leaves Open vSwitch. Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- datapath/actions.c | 152 +++++++--------------------------- datapath/datapath.c | 8 ++ datapath/flow.c | 8 +- datapath/tunnel.c | 92 ++++++++++---------- datapath/vlan.h | 14 ++++ datapath/vport-internal_dev.c | 8 +- datapath/vport-netdev.c | 48 ++++++++++- 7 files changed, 157 insertions(+), 173 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 3223c65b..89393771 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -24,6 +24,7 @@ #include "checksum.h" #include "datapath.h" #include "openvswitch/datapath-protocol.h" +#include "vlan.h" #include "vport.h" static int do_execute_actions(struct datapath *, struct sk_buff *, @@ -52,20 +53,28 @@ static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom) return NULL; } -static struct sk_buff *vlan_pull_tag(struct sk_buff *skb) +static struct sk_buff *strip_vlan(struct sk_buff *skb) { - struct vlan_ethhdr *vh = vlan_eth_hdr(skb); struct ethhdr *eh; - /* Verify we were given a vlan packet */ - if (vh->h_vlan_proto != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN) + if (vlan_tx_tag_present(skb)) { + vlan_set_tci(skb, 0); return skb; + } + + if (unlikely(vlan_eth_hdr(skb)->h_vlan_proto != htons(ETH_P_8021Q) || + skb->len < VLAN_ETH_HLEN)) + return skb; + + skb = make_writable(skb, 0); + if (unlikely(!skb)) + return NULL; if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->data + ETH_HLEN, VLAN_HLEN, 0)); - memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); + memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); @@ -80,133 +89,32 @@ static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a, u32 actions_len) { __be16 tci = nla_get_be16(a); + struct vlan_ethhdr *vh; + __be16 old_tci; - skb = make_writable(skb, VLAN_HLEN); - if (!skb) - return ERR_PTR(-ENOMEM); + if (vlan_tx_tag_present(skb) || skb->protocol != htons(ETH_P_8021Q)) + return __vlan_hwaccel_put_tag(skb, ntohs(tci)); - if (skb->protocol == htons(ETH_P_8021Q)) { - /* Modify vlan id, but maintain other TCI values */ - struct vlan_ethhdr *vh; - __be16 old_tci; - - if (skb->len < VLAN_ETH_HLEN) - return skb; - - vh = vlan_eth_hdr(skb); - old_tci = vh->h_vlan_TCI; - - vh->h_vlan_TCI = tci; - - if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) { - __be16 diff[] = { ~old_tci, vh->h_vlan_TCI }; - - skb->csum = ~csum_partial((char *)diff, sizeof(diff), - ~skb->csum); - } - } else { - int err; - - /* Add vlan header */ + skb = make_writable(skb, 0); + if (unlikely(!skb)) + return NULL; - /* Set up checksumming pointers for checksum-deferred packets - * on Xen. Otherwise, dev_queue_xmit() will try to do this - * when we send the packet out on the wire, and it will fail at - * that point because skb_checksum_setup() will not look inside - * an 802.1Q header. */ - err = vswitch_skb_checksum_setup(skb); - if (unlikely(err)) { - kfree_skb(skb); - return ERR_PTR(err); - } + if (unlikely(skb->len < VLAN_ETH_HLEN)) + return skb; - /* GSO is not implemented for packets with an 802.1Q header, so - * we have to do segmentation before we add that header. - * - * GSO does work with hardware-accelerated VLAN tagging, but we - * can't use hardware-accelerated VLAN tagging since it - * requires the device to have a VLAN group configured (with - * e.g. vconfig(8)) and we don't do that. - * - * Having to do this here may be a performance loss, since we - * can't take advantage of TSO hardware support, although it - * does not make a measurable network performance difference - * for 1G Ethernet. Fixing that would require patching the - * kernel (either to add GSO support to the VLAN protocol or to - * support hardware-accelerated VLAN tagging without VLAN - * groups configured). */ - if (skb_is_gso(skb)) { - const struct nlattr *actions_left; - int actions_len_left; - struct sk_buff *segs; - - segs = skb_gso_segment(skb, 0); - kfree_skb(skb); - if (IS_ERR(segs)) - return ERR_CAST(segs); - - actions_len_left = actions_len; - actions_left = nla_next(a, &actions_len_left); - - do { - struct sk_buff *nskb = segs->next; - - segs->next = NULL; - - /* GSO can change the checksum type so update.*/ - compute_ip_summed(segs, true); - - segs = __vlan_put_tag(segs, ntohs(tci)); - err = -ENOMEM; - if (segs) { - err = do_execute_actions( - dp, segs, key, actions_left, - actions_len_left); - } - - if (unlikely(err)) { - while ((segs = nskb)) { - nskb = segs->next; - segs->next = NULL; - kfree_skb(segs); - } - return ERR_PTR(err); - } - - segs = nskb; - } while (segs->next); - - skb = segs; - compute_ip_summed(skb, true); - } + vh = vlan_eth_hdr(skb); - /* The hardware-accelerated version of vlan_put_tag() works - * only for a device that has a VLAN group configured (with - * e.g. vconfig(8)), so call the software-only version - * __vlan_put_tag() directly instead. - */ - skb = __vlan_put_tag(skb, ntohs(tci)); - if (!skb) - return ERR_PTR(-ENOMEM); + old_tci = vh->h_vlan_TCI; + vh->h_vlan_TCI = tci; - /* GSO doesn't fix up the hardware computed checksum so this - * will only be hit in the non-GSO case. */ - if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) { + __be16 diff[] = { ~old_tci, vh->h_vlan_TCI }; + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } return skb; } -static struct sk_buff *strip_vlan(struct sk_buff *skb) -{ - skb = make_writable(skb, 0); - if (skb) - vlan_pull_tag(skb); - return skb; -} - static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key) { return (key->dl_type == htons(ETH_P_IP) && @@ -417,8 +325,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case ODP_ACTION_ATTR_SET_DL_TCI: skb = modify_vlan_tci(dp, skb, key, a, rem); - if (IS_ERR(skb)) - return PTR_ERR(skb); break; case ODP_ACTION_ATTR_STRIP_VLAN: diff --git a/datapath/datapath.c b/datapath/datapath.c index ba32e37f..c48dc9df 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -483,6 +483,14 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb, nskb = skb->next; skb->next = NULL; + if (vlan_tx_tag_present(skb)) { + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) { + err = -ENOMEM; + goto err_kfree_skbs; + } + } + len = sizeof(struct odp_header); len += nla_total_size(skb->len); len += nla_total_size(FLOW_BUFSIZE); diff --git a/datapath/flow.c b/datapath/flow.c index 735e1479..4b0e6cc3 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -34,6 +34,8 @@ #include #include +#include "vlan.h" + static struct kmem_cache *flow_cache; static unsigned int hash_seed __read_mostly; @@ -449,8 +451,12 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, /* dl_type, dl_vlan, dl_vlan_pcp. */ __skb_pull(skb, 2 * ETH_ALEN); - if (eth->h_proto == htons(ETH_P_8021Q)) + + if (vlan_tx_tag_present(skb)) + key->dl_tci = htons(vlan_get_tci(skb)); + else if (eth->h_proto == htons(ETH_P_8021Q)) parse_vlan(skb, key); + key->dl_type = parse_ethertype(skb); skb_reset_network_header(skb); __skb_push(skb, skb->data - (unsigned char *)eth); diff --git a/datapath/tunnel.c b/datapath/tunnel.c index 95492f76..4cabd93c 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -33,6 +33,7 @@ #include "datapath.h" #include "table.h" #include "tunnel.h" +#include "vlan.h" #include "vport.h" #include "vport-generic.h" #include "vport-internal_dev.h" @@ -439,6 +440,7 @@ void tnl_rcv(struct vport *vport, struct sk_buff *skb) ecn_decapsulate(skb); compute_ip_summed(skb, false); + vlan_set_tci(skb, 0); vport_receive(vport, skb); } @@ -682,7 +684,8 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI; vh->h_vlan_encapsulated_proto = skb->protocol; - } + } else + vlan_set_tci(nskb, vlan_get_tci(skb)); skb_reset_mac_header(nskb); /* Protocol */ @@ -720,17 +723,27 @@ static bool check_mtu(struct sk_buff *skb, int mtu = 0; unsigned int packet_length = skb->len - ETH_HLEN; - if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) + /* Allow for one level of tagging in the packet length. */ + if (!vlan_tx_tag_present(skb) && + eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) packet_length -= VLAN_HLEN; if (pmtud) { + int vlan_header = 0; + frag_off = htons(IP_DF); + /* The tag needs to go in packet regardless of where it + * currently is, so subtract it from the MTU. + */ + if (vlan_tx_tag_present(skb) || + eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) + vlan_header = VLAN_HLEN; + mtu = dst_mtu(&rt_dst(rt)) - ETH_HLEN - mutable->tunnel_hlen - - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? - VLAN_HLEN : 0); + - vlan_header; } if (skb->protocol == htons(ETH_P_IP)) { @@ -1041,28 +1054,18 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb, goto error_free; min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len - + mutable->tunnel_hlen; + + mutable->tunnel_hlen + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + skb = check_headroom(skb, min_headroom); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto error; + } if (skb_is_gso(skb)) { struct sk_buff *nskb; - /* - * If we are doing GSO on a pskb it is better to make sure that - * the headroom is correct now. We will only have to copy the - * portion in the linear data area and GSO will preserve - * headroom when it creates the segments. This is particularly - * beneficial on Xen where we get a lot of GSO pskbs. - * Conversely, we avoid copying if it is just to get our own - * writable clone because GSO will do the copy for us. - */ - if (skb_headroom(skb) < min_headroom) { - skb = check_headroom(skb, min_headroom); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto error; - } - } - nskb = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(nskb)) { @@ -1071,32 +1074,23 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb, } skb = nskb; - } else { - skb = check_headroom(skb, min_headroom); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto error; - } - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - /* - * Pages aren't locked and could change at any time. - * If this happens after we compute the checksum, the - * checksum will be wrong. We linearize now to avoid - * this problem. - */ - if (unlikely(need_linearize(skb))) { - err = __skb_linearize(skb); - if (unlikely(err)) - goto error_free; - } - - err = skb_checksum_help(skb); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Pages aren't locked and could change at any time. + * If this happens after we compute the checksum, the + * checksum will be wrong. We linearize now to avoid + * this problem. + */ + if (unlikely(need_linearize(skb))) { + err = __skb_linearize(skb); if (unlikely(err)) goto error_free; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - } + } + + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error_free; + } else if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; return skb; @@ -1159,7 +1153,8 @@ int tnl_send(struct vport *vport, struct sk_buff *skb) u8 tos; /* Validate the protocol headers before we try to use them. */ - if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb->protocol == htons(ETH_P_8021Q) && + !vlan_tx_tag_present(skb)) { if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) goto error_free; @@ -1250,6 +1245,9 @@ int tnl_send(struct vport *vport, struct sk_buff *skb) struct sk_buff *next_skb = skb->next; skb->next = NULL; + if (unlikely(vlan_deaccel_tag(skb))) + goto next; + if (likely(cache)) { skb_push(skb, cache->len); memcpy(skb->data, get_cached_header(cache), cache->len); diff --git a/datapath/vlan.h b/datapath/vlan.h index dc900183..02a62909 100644 --- a/datapath/vlan.h +++ b/datapath/vlan.h @@ -43,4 +43,18 @@ u16 vlan_tx_tag_get(struct sk_buff *skb); #define __vlan_hwaccel_put_tag rpl__vlan_hwaccel_put_tag struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, u16 vlan_tci); #endif /* NEED_VLAN_FIELD */ + +static inline int vlan_deaccel_tag(struct sk_buff *skb) +{ + if (!vlan_tx_tag_present(skb)) + return 0; + + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + return -ENOMEM; + + vlan_set_tci(skb, 0); + return 0; +} + #endif /* vlan.h */ diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c index be290745..0b91b345 100644 --- a/datapath/vport-internal_dev.c +++ b/datapath/vport-internal_dev.c @@ -6,6 +6,7 @@ * kernel, by Linus Torvalds and others. */ +#include #include #include #include @@ -230,8 +231,13 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) struct net_device *netdev = netdev_vport_priv(vport)->dev; int len; - skb->dev = netdev; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) + if (unlikely(vlan_deaccel_tag(skb))) + return 0; +#endif + len = skb->len; + skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c index 85e0eb9a..3693004c 100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@ -261,10 +261,56 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) static int netdev_send(struct vport *vport, struct sk_buff *skb) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); - int len = skb->len; + int len; skb->dev = netdev_vport->dev; forward_ip_summed(skb); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) + if (vlan_tx_tag_present(skb)) { + int err; + + err = vswitch_skb_checksum_setup(skb); + if (unlikely(err)) { + kfree_skb(skb); + return 0; + } + + if (skb_is_gso(skb)) { + struct sk_buff *nskb; + + nskb = skb_gso_segment(skb, 0); + kfree_skb(skb); + skb = nskb; + if (IS_ERR(skb)) + return 0; + + len = 0; + do { + nskb = skb->next; + skb->next = NULL; + + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (likely(skb)) { + len += skb->len; + vlan_set_tci(skb, 0); + dev_queue_xmit(skb); + } + + skb = nskb; + } while (skb); + + return len; + } else { + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + return 0; + vlan_set_tci(skb, 0); + } + } +#endif + + len = skb->len; dev_queue_xmit(skb); return len; -- 2.30.2