datapath: Fix Xen performance when adding a VLAN tag in presence of GSO.
authorBen Pfaff <blp@nicira.com>
Fri, 17 Apr 2009 22:00:55 +0000 (15:00 -0700)
committerBen Pfaff <blp@nicira.com>
Fri, 17 Apr 2009 22:00:55 +0000 (15:00 -0700)
When the datapath needed to add a VLAN tag to a GSO packet, it would stick
it on the front of the packet and pass it along.  Then GSO would later fail
because it didn't know how to segment an 802.1Q packet.  So we need to
segment GSO packets by ourselves before we put the 802.1Q header on them.

We could avoid this problem if we could patch the main kernel, either to
add GSO support to the VLAN protocol or to support hardware-accelerated
VLAN tagging without VLAN groups configured).

Bug #1231.

datapath/actions.c
datapath/actions.h
datapath/datapath.c
datapath/datapath.h

index 86892e8dc4da253bd036e6620acd66e27e03fe04..bd511e929c1edec685bc9ffac9d8894b2a3deaf6 100644 (file)
@@ -64,15 +64,30 @@ vlan_pull_tag(struct sk_buff *skb)
 
 
 static struct sk_buff *
-modify_vlan_tci(struct sk_buff *skb, struct odp_flow_key *key, 
-               u16 tci, u16 mask)
+modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
+               struct odp_flow_key *key, const union odp_action *a,
+               int n_actions, gfp_t gfp)
 {
-       struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
+       u16 tci, mask;
+
+       if (a->type == ODPAT_SET_VLAN_VID) {
+               tci = ntohs(a->vlan_vid.vlan_vid);
+               mask = VLAN_VID_MASK;
+               key->dl_vlan = htons(tci & mask);
+       } else {
+               tci = a->vlan_pcp.vlan_pcp << 13;
+               mask = VLAN_PCP_MASK;
+       }
+
+       skb = make_writable(skb, gfp);
+       if (!skb)
+               return ERR_PTR(-ENOMEM);
 
-       if (key->dl_vlan != htons(ODP_VLAN_NONE)) {
+       if (skb->protocol == htons(ETH_P_8021Q)) {
                /* Modify vlan id, but maintain other TCI values */
-               vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci);
-       } else  {
+               struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
+               vh->h_vlan_TCI = htons((ntohs(vh->h_vlan_TCI) & ~mask) | tci);
+       } else {
                /* Add vlan header */
 
                /* Set up checksumming pointers for checksum-deferred packets
@@ -82,46 +97,70 @@ modify_vlan_tci(struct sk_buff *skb, struct odp_flow_key *key,
                 * an 802.1Q header. */
                skb_checksum_setup(skb);
 
+               /* GSO is not implemented for packets with an 802.1Q header, so
+                * we have to do segmentation before we add that header.
+                *
+                * GSO does work with hardware-accelerated VLAN tagging, but we
+                * can't use hardware-accelerated VLAN tagging since it
+                * requires the device to have a VLAN group configured (with
+                * e.g. vconfig(8)) and we don't do that.
+                *
+                * Having to do this here may be a performance loss, since we
+                * can't take advantage of TSO hardware support, although it
+                * does not make a measurable network performance difference
+                * for 1G Ethernet.  Fixing that would require patching the
+                * kernel (either to add GSO support to the VLAN protocol or to
+                * support hardware-accelerated VLAN tagging without VLAN
+                * groups configured). */
+               if (skb_is_gso(skb)) {
+                       struct sk_buff *segs;
+
+                       segs = skb_gso_segment(skb, 0);
+                       kfree_skb(skb);
+                       if (unlikely(IS_ERR(segs)))
+                               return ERR_CAST(segs);
+
+                       do {
+                               struct sk_buff *nskb = segs->next;
+                               int err;
+
+                               segs->next = NULL;
+
+                               segs = __vlan_put_tag(segs, tci);
+                               err = -ENOMEM;
+                               if (segs) {
+                                       struct odp_flow_key segkey = *key;
+                                       err = execute_actions(dp, segs,
+                                                             &segkey, a + 1,
+                                                             n_actions - 1,
+                                                             gfp);
+                               }
+
+                               if (unlikely(err)) {
+                                       while ((segs = nskb)) {
+                                               nskb = segs->next;
+                                               segs->next = NULL;
+                                               kfree_skb(segs);
+                                       }
+                                       return ERR_PTR(err);
+                               }
+
+                               segs = nskb;
+                       } while (segs->next);
+
+                       skb = segs;
+               }
+
                /* The hardware-accelerated version of vlan_put_tag() works
                 * only for a device that has a VLAN group configured (with
                 * e.g. vconfig(8)), so call the software-only version
                 * __vlan_put_tag() directly instead.
                 */
-               skb = __vlan_put_tag(skb, tci & mask);
+               skb = __vlan_put_tag(skb, tci);
                if (!skb)
-                       return NULL;
-               vh = vlan_eth_hdr(skb);
+                       return ERR_PTR(-ENOMEM);
        }
-       key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
-
-       return skb;
-}
-
-static struct sk_buff *set_vlan_vid(struct sk_buff *skb,
-                                   struct odp_flow_key *key,
-                                   const struct odp_action_vlan_vid *a,
-                                   gfp_t gfp)
-{
-       u16 tci = ntohs(a->vlan_vid);
-       skb = make_writable(skb, gfp);
-       if (skb)
-               skb = modify_vlan_tci(skb, key, tci, VLAN_VID_MASK);
-       return skb;
-}
-
-/* Mask for the priority bits in a vlan header.  The kernel doesn't
- * define this like it does for VID. */
-#define VLAN_PCP_MASK 0xe000
 
-static struct sk_buff *set_vlan_pcp(struct sk_buff *skb,
-                                   struct odp_flow_key *key,
-                                   const struct odp_action_vlan_pcp *a,
-                                   gfp_t gfp)
-{
-       u16 tci = a->vlan_pcp << 13;
-       skb = make_writable(skb, gfp);
-       if (skb)
-               skb = modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK);
        return skb;
 }
 
@@ -370,7 +409,7 @@ output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp)
 /* Execute a list of actions against 'skb'. */
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
                    struct odp_flow_key *key,
-                   const struct sw_flow_actions *actions,
+                   const union odp_action *a, int n_actions,
                    gfp_t gfp)
 {
        /* Every output action needs a separate clone of 'skb', but the common
@@ -378,10 +417,8 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb,
         * then freeing the original skbuff is wasteful.  So the following code
         * is slightly obscure just to avoid that. */
        int prev_port = -1;
-       unsigned int i;
        int err = 0;
-       for (i = 0; i < actions->n_actions; i++) {
-               const union odp_action *a = &actions->actions[i];
+       for (; n_actions > 0; a++, n_actions--) {
                WARN_ON_ONCE(skb_shared(skb));
                if (prev_port != -1) {
                        do_output(dp, skb_clone(skb, gfp), prev_port);
@@ -407,11 +444,10 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb,
                        break;
 
                case ODPAT_SET_VLAN_VID:
-                       skb = set_vlan_vid(skb, key, &a->vlan_vid, gfp);
-                       break;
-
                case ODPAT_SET_VLAN_PCP:
-                       skb = set_vlan_pcp(skb, key, &a->vlan_pcp, gfp);
+                       skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp);
+                       if (IS_ERR(skb))
+                               return PTR_ERR(skb);
                        break;
 
                case ODPAT_STRIP_VLAN:
index 981e0eacd3e408f50998454d585bcf68bca1262e..410e3ba79da92e4ece97595bb8e2a1765aad2d40 100644 (file)
@@ -6,12 +6,13 @@
 struct datapath;
 struct sk_buff;
 struct odp_flow_key;
-struct sw_flow_actions;
+union odp_action;
 
 struct sk_buff *make_writable(struct sk_buff *, gfp_t gfp);
 int dp_xmit_skb(struct sk_buff *);
 int execute_actions(struct datapath *dp, struct sk_buff *skb,
                    struct odp_flow_key *key,
-                   const struct sw_flow_actions *, gfp_t gfp);
+                   const union odp_action *, int n_actions,
+                   gfp_t gfp);
 
 #endif /* actions.h */
index ce0042bb6708728c0cf7edadaee90d145f8230e5..6c8630b9e3ea62f183a9ec7f59e5e1ce98e39659 100644 (file)
@@ -575,8 +575,9 @@ void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p)
 
        flow = dp_table_lookup(rcu_dereference(dp->table), &key);
        if (flow) {
+               struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts);
                flow_used(flow, skb);
-               execute_actions(dp, skb, &key, rcu_dereference(flow->sf_acts),
+               execute_actions(dp, skb, &key, acts->actions, acts->n_actions,
                                GFP_ATOMIC);
                stats->n_hit++;
        } else {
@@ -774,6 +775,16 @@ static int validate_actions(const struct sw_flow_actions *actions)
                                return -EINVAL;
                        break;
 
+               case ODPAT_SET_VLAN_VID:
+                       if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK))
+                               return -EINVAL;
+                       break;
+
+               case ODPAT_SET_VLAN_PCP:
+                       if (a->vlan_pcp.vlan_pcp & ~VLAN_PCP_MASK)
+                               return -EINVAL;
+                       break;
+
                default:
                        if (a->type >= ODPAT_N_ACTIONS)
                                return -EOPNOTSUPP;
@@ -1127,7 +1138,8 @@ static int do_execute(struct datapath *dp, const struct odp_execute *executep)
                goto error_free_skb;
 
        flow_extract(skb, execute.in_port, &key);
-       err = execute_actions(dp, skb, &key, actions, GFP_KERNEL);
+       err = execute_actions(dp, skb, &key, actions->actions,
+                             actions->n_actions, GFP_KERNEL);
        kfree(actions);
        return err;
 
index 608e74373956a10441f1d2de185d1cb982071f2b..dbb02c7d7f7dff9abbe27e35309e5d57a336ef82 100644 (file)
 
 struct sk_buff;
 
+/* Mask for the priority bits in a vlan header.  If we ever merge upstream
+ * then this should go into include/linux/if_vlan.h. */
+#define VLAN_PCP_MASK 0xe000
+
 #define DP_MAX_PORTS 256
 #define DP_MAX_GROUPS 16