From 62d3df93418f7b2bcfeef72e2159e0ac4076bdaf Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 30 Apr 2009 13:46:44 -0700 Subject: [PATCH] datapath: Break up GSO packets before sending to userspace. On a Xen host, over-MTU GSO packets from virtual machines can end up sent down by the virtual switch to userspace. This happens, for example, if a TCP flow has a long enough "pause" that the datapath flow times out. When this happens, the packet is not marked as GSO when secchan sends it back up via dpif_execute(), and the packet is then discarded in dp_xmit_skb() as too large. This commit solves the problem by breaking GSO packets into MTU-size pieces before passing them along to userspace. Tested by running "netperf" between two VMs on different boxes and running "dpctl dp-del-flows" on the appropriate datapath a few times in the middle and seeing that the total bandwidth didn't change much. Verified that packets were actually being broken up by adding a printk call inside the "if (skb_is_gso())" block. Thanks to Justin and Keith for review. Bug #1133. --- datapath/datapath.c | 70 +++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 6c8630b9..2be82df0 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -682,7 +682,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, queue = &dp->queues[queue_no]; err = -ENOBUFS; if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN) - goto err; + goto err_kfree_skb; /* If a checksum-deferred packet is forwarded to the controller, * correct the pointers and checksum. This happens on a regular basis @@ -693,7 +693,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, */ err = skb_checksum_setup(skb); if (err) - goto err; + goto err_kfree_skb; #ifndef CHECKSUM_HW if (skb->ip_summed == CHECKSUM_PARTIAL) { WARN_ON_ONCE(1); @@ -708,38 +708,72 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, #endif err = skb_checksum_help(skb); if (err) - goto err; + goto err_kfree_skb; } #else if (skb->ip_summed == CHECKSUM_HW) { err = skb_checksum_help(skb, 0); if (err) - goto err; + goto err_kfree_skb; } #endif - err = skb_cow(skb, sizeof *header); - if (err) - goto err; - - header = (struct odp_msg*)__skb_push(skb, sizeof *header); - header->type = queue_no; - header->length = skb->len; - header->port = (skb->dev && skb->dev->br_port - ? skb->dev->br_port->port_no - : ODPP_LOCAL); - header->reserved = 0; - header->arg = arg; - skb_queue_tail(queue, skb); + /* Break apart GSO packets into their component pieces. Otherwise + * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ + if (skb_is_gso(skb)) { + struct sk_buff *nskb = skb_gso_segment(skb, 0); + if (nskb) { + kfree_skb(skb); + skb = nskb; + if (unlikely(IS_ERR(skb))) { + err = PTR_ERR(skb); + goto err; + } + } else { + /* XXX This case might not be possible. It's hard to + * tell from the skb_gso_segment() code and comment. */ + } + } + + /* Append each packet to queue. There will be only one packet unless + * we broke up a GSO packet above. */ + do { + struct sk_buff *nskb = skb->next; + skb->next = NULL; + + err = skb_cow(skb, sizeof *header); + if (err) { + while (nskb) { + kfree_skb(skb); + skb = nskb; + nskb = skb->next; + } + goto err_kfree_skb; + } + + header = (struct odp_msg*)__skb_push(skb, sizeof *header); + header->type = queue_no; + header->length = skb->len; + header->port = (skb->dev && skb->dev->br_port + ? skb->dev->br_port->port_no + : ODPP_LOCAL); + header->reserved = 0; + header->arg = arg; + skb_queue_tail(queue, skb); + + skb = nskb; + } while (skb); + wake_up_interruptible(&dp->waitqueue); return 0; +err_kfree_skb: + kfree_skb(skb); err: stats = percpu_ptr(dp->stats_percpu, get_cpu()); stats->n_lost++; put_cpu(); - kfree_skb(skb); return err; } -- 2.30.2