/*
- * Copyright (c) 2010 Nicira Networks.
+ * Copyright (c) 2010, 2011 Nicira Networks.
* Distributed under the terms of the GNU GPL version 2.
*
* Significant portions of this file may be copied from parts of the Linux
#include "checksum.h"
#include "datapath.h"
- /* Types of checksums that we can receive (these all refer to L4 checksums):
+#ifdef NEED_CSUM_NORMALIZE
+
+#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
+/* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We
+ * can't call this function directly because it isn't exported in all
+ * versions. */
+static int vswitch_skb_checksum_setup(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ unsigned char *th;
+ int err = -EPROTO;
+ __u16 csum_start, csum_offset;
+
+ if (!skb->proto_csum_blank)
+ return 0;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ goto out;
+
+ if (!pskb_may_pull(skb, skb_network_header(skb) + sizeof(struct iphdr) - skb->data))
+ goto out;
+
+ iph = ip_hdr(skb);
+ th = skb_network_header(skb) + 4 * iph->ihl;
+
+ csum_start = th - skb->head;
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ csum_offset = offsetof(struct tcphdr, check);
+ break;
+ case IPPROTO_UDP:
+ csum_offset = offsetof(struct udphdr, check);
+ break;
+ default:
+ if (net_ratelimit())
+ pr_err("Attempting to checksum a non-TCP/UDP packet, "
+ "dropping a protocol %d packet",
+ iph->protocol);
+ goto out;
+ }
+
+ if (!pskb_may_pull(skb, th + csum_offset + 2 - skb->data))
+ goto out;
+
+ skb->proto_csum_blank = 0;
+ set_ip_summed(skb, OVS_CSUM_PARTIAL);
+ set_skb_csum_pointers(skb, csum_start, csum_offset);
+
+ err = 0;
+
+out:
+ return err;
+}
+#else
+static int vswitch_skb_checksum_setup(struct sk_buff *skb)
+{
+ return 0;
+}
+#endif /* not Xen old style checksums */
+
+/*
+ * compute_ip_summed - map external checksum state onto OVS representation
+ *
+ * @skb: Packet to manipulate.
+ * @xmit: Whether we were on transmit path of network stack. For example,
+ * this is true for the internal dev vport because it receives skbs
+ * that passed through dev_queue_xmit() but false for the netdev vport
+ * because its packets come from netif_receive_skb().
+ *
+ * Older kernels (and various versions of Xen) were not explicit enough about
+ * checksum offload parameters and rely on a combination of context and
+ * non standard fields. This deals with all those variations so that we
+ * can internally manipulate checksum offloads without worrying about kernel
+ * version.
+ *
+ * Types of checksums that we can receive (these all refer to L4 checksums):
* 1. CHECKSUM_NONE: Device that did not compute checksum, contains full
* (though not verified) checksum in packet but not in skb->csum. Packets
* from the bridge local port will also have this type.
* CHECKSUM_PARTIAL, it will be sent with the wrong checksum. However, there
* shouldn't be any devices that do this with bridging.
*/
-#ifdef NEED_CSUM_NORMALIZE
-void compute_ip_summed(struct sk_buff *skb, bool xmit)
+int compute_ip_summed(struct sk_buff *skb, bool xmit)
{
/* For our convenience these defines change repeatedly between kernel
* versions, so we can't just copy them over...
*/
switch (skb->ip_summed) {
case CHECKSUM_NONE:
- OVS_CB(skb)->ip_summed = OVS_CSUM_NONE;
+ set_ip_summed(skb, OVS_CSUM_NONE);
break;
case CHECKSUM_UNNECESSARY:
- OVS_CB(skb)->ip_summed = OVS_CSUM_UNNECESSARY;
+ set_ip_summed(skb, OVS_CSUM_UNNECESSARY);
break;
#ifdef CHECKSUM_HW
/* In theory this could be either CHECKSUM_PARTIAL or CHECKSUM_COMPLETE.
* However, on the receive side we should only get CHECKSUM_PARTIAL
* packets from Xen, which uses some special fields to represent this
- * (see below). Since we can only make one type work, pick the one
- * that actually happens in practice.
+ * (see vswitch_skb_checksum_setup()). Since we can only make one type work,
+ * pick the one that actually happens in practice.
*
* On the transmit side (basically after skb_checksum_setup()
* has been run or on internal dev transmit), packets with
*/
case CHECKSUM_HW:
if (!xmit)
- OVS_CB(skb)->ip_summed = OVS_CSUM_COMPLETE;
+ set_ip_summed(skb, OVS_CSUM_COMPLETE);
else
- OVS_CB(skb)->ip_summed = OVS_CSUM_PARTIAL;
-
+ set_ip_summed(skb, OVS_CSUM_PARTIAL);
break;
#else
case CHECKSUM_COMPLETE:
- OVS_CB(skb)->ip_summed = OVS_CSUM_COMPLETE;
+ set_ip_summed(skb, OVS_CSUM_COMPLETE);
break;
case CHECKSUM_PARTIAL:
- OVS_CB(skb)->ip_summed = OVS_CSUM_PARTIAL;
+ set_ip_summed(skb, OVS_CSUM_PARTIAL);
break;
#endif
}
-#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
- /* Xen has a special way of representing CHECKSUM_PARTIAL on older
- * kernels. It should not be set on the transmit path though.
- */
- if (skb->proto_csum_blank)
- OVS_CB(skb)->ip_summed = OVS_CSUM_PARTIAL;
+ OVS_CB(skb)->csum_start = skb_headroom(skb) + skb_transport_offset(skb);
- WARN_ON_ONCE(skb->proto_csum_blank && xmit);
-#endif
+ return vswitch_skb_checksum_setup(skb);
}
-u8 get_ip_summed(struct sk_buff *skb)
+/*
+ * forward_ip_summed - map internal checksum state back onto native kernel fields
+ *
+ * @skb: Packet to manipulate.
+ * @xmit: Whether we are about send on the transmit path the network stack. This
+ * follows the same logic as the @xmit field in compute_ip_summed().
+ * Generally, a given vport will have opposite values for @xmit passed to these
+ * two functions.
+ *
+ * When a packet is about to egress from OVS take our internal fields (including
+ * any modifications we have made) and recreate the correct representation for
+ * this kernel. This may do things like change the transport header offset.
+ */
+void forward_ip_summed(struct sk_buff *skb, bool xmit)
{
- return OVS_CB(skb)->ip_summed;
-}
-#endif /* NEED_CSUM_NORMALIZE */
-
+ switch(get_ip_summed(skb)) {
+ case OVS_CSUM_NONE:
+ skb->ip_summed = CHECKSUM_NONE;
+ break;
+ case OVS_CSUM_UNNECESSARY:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
-/* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We
- * can't call this function directly because it isn't exported in all
- * versions. */
-int vswitch_skb_checksum_setup(struct sk_buff *skb)
-{
- struct iphdr *iph;
- unsigned char *th;
- int err = -EPROTO;
- __u16 csum_start, csum_offset;
-
- if (!skb->proto_csum_blank)
- return 0;
-
- if (skb->protocol != htons(ETH_P_IP))
- goto out;
-
- if (!pskb_may_pull(skb, skb_network_header(skb) + sizeof(struct iphdr) - skb->data))
- goto out;
-
- iph = ip_hdr(skb);
- th = skb_network_header(skb) + 4 * iph->ihl;
-
- csum_start = th - skb->head;
- switch (iph->protocol) {
- case IPPROTO_TCP:
- csum_offset = offsetof(struct tcphdr, check);
+ skb->proto_data_valid = 1;
+#endif
break;
- case IPPROTO_UDP:
- csum_offset = offsetof(struct udphdr, check);
+#ifdef CHECKSUM_HW
+ case OVS_CSUM_COMPLETE:
+ if (!xmit)
+ skb->ip_summed = CHECKSUM_HW;
+ else
+ skb->ip_summed = CHECKSUM_NONE;
break;
- default:
- if (net_ratelimit())
- pr_err("Attempting to checksum a non-TCP/UDP packet, "
- "dropping a protocol %d packet",
- iph->protocol);
- goto out;
+ case OVS_CSUM_PARTIAL:
+ if (!xmit) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
+ skb->proto_csum_blank = 1;
+#endif
+ } else {
+ skb->ip_summed = CHECKSUM_HW;
+ }
+ break;
+#else
+ case OVS_CSUM_COMPLETE:
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ break;
+ case OVS_CSUM_PARTIAL:
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ break;
+#endif
}
- if (!pskb_may_pull(skb, th + csum_offset + 2 - skb->data))
- goto out;
+ if (get_ip_summed(skb) == OVS_CSUM_PARTIAL)
+ skb_set_transport_header(skb, OVS_CB(skb)->csum_start - skb_headroom(skb));
+}
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->proto_csum_blank = 0;
- set_skb_csum_pointers(skb, csum_start, csum_offset);
+u8 get_ip_summed(struct sk_buff *skb)
+{
+ return OVS_CB(skb)->ip_summed;
+}
- err = 0;
+void set_ip_summed(struct sk_buff *skb, u8 ip_summed)
+{
+ OVS_CB(skb)->ip_summed = ip_summed;
+}
-out:
- return err;
+void get_skb_csum_pointers(const struct sk_buff *skb, u16 *csum_start,
+ u16 *csum_offset)
+{
+ *csum_start = OVS_CB(skb)->csum_start;
+ *csum_offset = skb->csum;
}
-#endif /* CONFIG_XEN && HAVE_PROTO_DATA_VALID */
+
+void set_skb_csum_pointers(struct sk_buff *skb, u16 csum_start, u16 csum_offset)
+{
+ OVS_CB(skb)->csum_start = csum_start;
+ skb->csum = csum_offset;
+}
+#endif /* NEED_CSUM_NORMALIZE */
/*
- * Copyright (c) 2010 Nicira Networks.
+ * Copyright (c) 2010, 2011 Nicira Networks.
* Distributed under the terms of the GNU GPL version 2.
*
* Significant portions of this file may be copied from parts of the Linux
};
#ifdef NEED_CSUM_NORMALIZE
-void compute_ip_summed(struct sk_buff *skb, bool xmit);
+int compute_ip_summed(struct sk_buff *skb, bool xmit);
+void forward_ip_summed(struct sk_buff *skb, bool xmit);
u8 get_ip_summed(struct sk_buff *skb);
+void set_ip_summed(struct sk_buff *skb, u8 ip_summed);
+void get_skb_csum_pointers(const struct sk_buff *skb, u16 *csum_start,
+ u16 *csum_offset);
+void set_skb_csum_pointers(struct sk_buff *skb, u16 csum_start, u16 csum_offset);
#else
-static inline void compute_ip_summed(struct sk_buff *skb, bool xmit) { }
-static inline u8 get_ip_summed(struct sk_buff *skb)
+static inline int compute_ip_summed(struct sk_buff *skb, bool xmit)
{
- return skb->ip_summed;
+ return 0;
}
-#endif
-/* This function closely resembles skb_forward_csum() used by the bridge. It
- * is slightly different because we are only concerned with bridging and not
- * other types of forwarding and can get away with slightly more optimal
- * behavior.
- */
-static inline void forward_ip_summed(struct sk_buff *skb)
+static inline void forward_ip_summed(struct sk_buff *skb, bool xmit) { }
+
+static inline u8 get_ip_summed(struct sk_buff *skb)
{
-#ifdef CHECKSUM_HW
- if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
-#endif
+ return skb->ip_summed;
}
-#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
-int vswitch_skb_checksum_setup(struct sk_buff *skb);
-#else
-static inline int vswitch_skb_checksum_setup(struct sk_buff *skb)
+static inline void set_ip_summed(struct sk_buff *skb, u8 ip_summed)
{
- return 0;
+ skb->ip_summed = ip_summed;
}
-#endif
static inline void get_skb_csum_pointers(const struct sk_buff *skb,
u16 *csum_start, u16 *csum_offset)
{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
*csum_start = skb->csum_start;
*csum_offset = skb->csum_offset;
-#else
- *csum_start = skb_headroom(skb) + skb_transport_offset(skb);
- *csum_offset = skb->csum;
-#endif
}
static inline void set_skb_csum_pointers(struct sk_buff *skb, u16 csum_start,
u16 csum_offset)
{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
skb->csum_start = csum_start;
skb->csum_offset = csum_offset;
-#else
- skb_set_transport_header(skb, csum_start - skb_headroom(skb));
- skb->csum = csum_offset;
-#endif
}
+#endif
-#if defined(NEED_CSUM_NORMALIZE) || LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
/* This is really compatibility code that belongs in the compat directory.
* However, it needs access to our normalized checksum values, so put it here.
*/
+#if defined(NEED_CSUM_NORMALIZE) || LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
#define inet_proto_csum_replace4 rpl_inet_proto_csum_replace4
static inline void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to,
}
#endif
+#ifdef NEED_CSUM_NORMALIZE
+static inline void update_csum_start(struct sk_buff *skb, int delta)
+{
+ if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
+ u16 csum_start, csum_offset;
+
+ get_skb_csum_pointers(skb, &csum_start, &csum_offset);
+ set_skb_csum_pointers(skb, csum_start + delta, csum_offset);
+ }
+}
+
+static inline int rpl_pskb_expand_head(struct sk_buff *skb, int nhead,
+ int ntail, gfp_t gfp_mask)
+{
+ int err;
+ int old_headroom = skb_headroom(skb);
+
+ err = pskb_expand_head(skb, nhead, ntail, gfp_mask);
+ if (unlikely(err))
+ return err;
+
+ update_csum_start(skb, skb_headroom(skb) - old_headroom);
+
+ return 0;
+}
+#define pskb_expand_head rpl_pskb_expand_head
+
+static inline unsigned char *rpl__pskb_pull_tail(struct sk_buff *skb,
+ int delta)
+{
+ unsigned char *ret;
+ int old_headroom = skb_headroom(skb);
+
+ ret = __pskb_pull_tail(skb, delta);
+ if (unlikely(!ret))
+ return ret;
+
+ update_csum_start(skb, skb_headroom(skb) - old_headroom);
+
+ return ret;
+}
+#define __pskb_pull_tail rpl__pskb_pull_tail
+#endif
+
#endif /* checksum.h */
secpath_reset(skb);
ecn_decapsulate(skb, tos);
- compute_ip_summed(skb, false);
vlan_set_tci(skb, 0);
+ if (unlikely(compute_ip_summed(skb, false))) {
+ kfree_skb(skb);
+ return;
+ }
+
vport_receive(vport, skb);
}
(TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
OVS_CB(nskb)->tun_id = flow_key;
- compute_ip_summed(nskb, false);
+ if (unlikely(compute_ip_summed(nskb, false))) {
+ kfree_skb(nskb);
+ return false;
+ }
+
vport_receive(vport, nskb);
return true;
int min_headroom;
int err;
- forward_ip_summed(skb);
-
- err = vswitch_skb_checksum_setup(skb);
- if (unlikely(err))
- goto error_free;
-
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ mutable->tunnel_hlen
+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
goto error_free;
}
+ forward_ip_summed(skb, true);
+
if (skb_is_gso(skb)) {
struct sk_buff *nskb;
}
skb = nskb;
- } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
/* Pages aren't locked and could change at any time.
* If this happens after we compute the checksum, the
* checksum will be wrong. We linearize now to avoid
err = skb_checksum_help(skb);
if (unlikely(err))
goto error_free;
- } else if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
+ }
+
+ set_ip_summed(skb, OVS_CSUM_NONE);
return skb;
ip_send_check(iph);
if (cache_vport) {
+ if (unlikely(compute_ip_summed(skb, true))) {
+ kfree_skb(skb);
+ goto next;
+ }
+
OVS_CB(skb)->flow = cache->flow;
- compute_ip_summed(skb, true);
vport_receive(cache_vport, skb);
sent_len += orig_len;
} else {