datapath: Fix up checksum on Xen before forwarding to controller.
authorBen Pfaff <blp@nicira.com>
Mon, 2 Feb 2009 17:55:32 +0000 (09:55 -0800)
committerBen Pfaff <blp@nicira.com>
Mon, 2 Feb 2009 17:55:32 +0000 (09:55 -0800)
On Xen, the datapath can receive a packet that lacks a correct checksum
from a VM, because the VMs expect to use the host's hardware TX
checksumming.  Until now, we haven't fixed up the checksum before we sent
the packet to the controller.  The controller doesn't normally verify
the checksum (nor can it in general, since it doesn't necessarily get the
entire packet), so that part isn't a problem.

The problem here is in the buffered packet.  fwd_save_skb() makes a copy
(not a clone) of the packet, but skb_copy() doesn't make a copy of the
skbuff's proto_csum_blank, which is what dev_queue_xmit() uses (via
skb_checksum_setup()) to decide whether checksumming needs to be forced.
Thus, the buffered packet is transmitted with a bad checksum.

A partial solution would be to copy proto_csum_blank from the original
skb into the buffered copy, or to make the buffers use clones instead of
copies (they really should do this anyhow).  But this would still send
a bad checksum to the controller.  So instead we do the full checksum
calculation before we send the packet to the controller.

This change affects only Xen.  This situation cannot occur without Xen,
because any packets that arrive on physical interfaces must already have
correct checksums.

datapath/datapath.c

index 7cdea6a7d3238fe72b26b306b88d4efefadbdcd3..2e1ba6abad87e144b629e2bf71fc9fd1270b0ab2 100644 (file)
@@ -800,6 +800,55 @@ bad_port:
        return -ENOENT;
 }
 
+#ifdef CONFIG_XEN
+/* This code is copied verbatim from net/dev/core.c in Xen's
+ * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644.  We can't call those functions
+ * directly because they aren't exported. */
+static int skb_pull_up_to(struct sk_buff *skb, void *ptr)
+{
+       if (ptr < (void *)skb->tail)
+               return 1;
+       if (__pskb_pull_tail(skb,
+                            ptr - (void *)skb->data - skb_headlen(skb))) {
+               return 1;
+       } else {
+               return 0;
+       }
+}
+
+inline int skb_checksum_setup(struct sk_buff *skb)
+{
+       if (skb->proto_csum_blank) {
+               if (skb->protocol != htons(ETH_P_IP))
+                       goto out;
+               if (!skb_pull_up_to(skb, skb->nh.iph + 1))
+                       goto out;
+               skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+               switch (skb->nh.iph->protocol) {
+               case IPPROTO_TCP:
+                       skb->csum = offsetof(struct tcphdr, check);
+                       break;
+               case IPPROTO_UDP:
+                       skb->csum = offsetof(struct udphdr, check);
+                       break;
+               default:
+                       if (net_ratelimit())
+                               printk(KERN_ERR "Attempting to checksum a non-"
+                                      "TCP/UDP packet, dropping a protocol"
+                                      " %d packet", skb->nh.iph->protocol);
+                       goto out;
+               }
+               if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2))
+                       goto out;
+               skb->ip_summed = CHECKSUM_HW;
+               skb->proto_csum_blank = 0;
+       }
+       return 0;
+out:
+       return -EPROTO;
+}
+#endif
+
 /* Takes ownership of 'skb' and transmits it to 'dp''s control path.  'reason'
  * indicates why 'skb' is being sent. 'max_len' sets the maximum number of
  * bytes that the caller wants to be sent; a value of 0 indicates the entire
@@ -816,6 +865,22 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb,
        uint32_t buffer_id;
        int err;
 
+       WARN_ON_ONCE(skb_shared(skb));
+
+#ifdef CONFIG_XEN
+       /* If a checksum-deferred packet is forwarded to the controller,
+        * correct the pointers and checksum.
+        */
+       err = skb_checksum_setup(skb);
+       if (err)
+               goto out;
+       if (skb->ip_summed == CHECKSUM_HW) {
+               err = skb_checksum_help(skb, 0);
+               if (err)
+                       goto out;
+       }
+#endif
+
        buffer_id = fwd_save_skb(skb);
 
        fwd_len = skb->len;