2 * Copyright (c) 2010, 2011 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
19 #ifdef NEED_CSUM_NORMALIZE
21 #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
22 /* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We
23 * can't call this function directly because it isn't exported in all
25 static int vswitch_skb_checksum_setup(struct sk_buff *skb)
30 __u16 csum_start, csum_offset;
32 if (!skb->proto_csum_blank)
35 if (skb->protocol != htons(ETH_P_IP))
38 if (!pskb_may_pull(skb, skb_network_header(skb) + sizeof(struct iphdr) - skb->data))
42 th = skb_network_header(skb) + 4 * iph->ihl;
44 csum_start = th - skb->head;
45 switch (iph->protocol) {
47 csum_offset = offsetof(struct tcphdr, check);
50 csum_offset = offsetof(struct udphdr, check);
54 pr_err("Attempting to checksum a non-TCP/UDP packet, "
55 "dropping a protocol %d packet",
60 if (!pskb_may_pull(skb, th + csum_offset + 2 - skb->data))
63 skb->proto_csum_blank = 0;
64 set_ip_summed(skb, OVS_CSUM_PARTIAL);
65 set_skb_csum_pointers(skb, csum_start, csum_offset);
73 static int vswitch_skb_checksum_setup(struct sk_buff *skb)
77 #endif /* not Xen old style checksums */
80 * compute_ip_summed - map external checksum state onto OVS representation
82 * @skb: Packet to manipulate.
83 * @xmit: Whether we were on transmit path of network stack. For example,
84 * this is true for the internal dev vport because it receives skbs
85 * that passed through dev_queue_xmit() but false for the netdev vport
86 * because its packets come from netif_receive_skb().
88 * Older kernels (and various versions of Xen) were not explicit enough about
89 * checksum offload parameters and rely on a combination of context and
90 * non standard fields. This deals with all those variations so that we
91 * can internally manipulate checksum offloads without worrying about kernel
94 * Types of checksums that we can receive (these all refer to L4 checksums):
95 * 1. CHECKSUM_NONE: Device that did not compute checksum, contains full
96 * (though not verified) checksum in packet but not in skb->csum. Packets
97 * from the bridge local port will also have this type.
98 * 2. CHECKSUM_COMPLETE (CHECKSUM_HW): Good device that computes checksums,
99 * also the GRE module. This is the same as CHECKSUM_NONE, except it has
100 * a valid skb->csum. Importantly, both contain a full checksum (not
101 * verified) in the packet itself. The only difference is that if the
102 * packet gets to L4 processing on this machine (not in DomU) we won't
103 * have to recompute the checksum to verify. Most hardware devices do not
104 * produce packets with this type, even if they support receive checksum
105 * offloading (they produce type #5).
106 * 3. CHECKSUM_PARTIAL (CHECKSUM_HW): Packet without full checksum and needs to
107 * be computed if it is sent off box. Unfortunately on earlier kernels,
108 * this case is impossible to distinguish from #2, despite having opposite
109 * meanings. Xen adds an extra field on earlier kernels (see #4) in order
110 * to distinguish the different states.
111 * 4. CHECKSUM_UNNECESSARY (with proto_csum_blank true): This packet was
112 * generated locally by a Xen DomU and has a partial checksum. If it is
113 * handled on this machine (Dom0 or DomU), then the checksum will not be
114 * computed. If it goes off box, the checksum in the packet needs to be
115 * completed. Calling skb_checksum_setup converts this to CHECKSUM_HW
116 * (CHECKSUM_PARTIAL) so that the checksum can be completed. In later
117 * kernels, this combination is replaced with CHECKSUM_PARTIAL.
118 * 5. CHECKSUM_UNNECESSARY (with proto_csum_blank false): Packet with a correct
119 * full checksum or using a protocol without a checksum. skb->csum is
120 * undefined. This is common from devices with receive checksum
121 * offloading. This is somewhat similar to CHECKSUM_NONE, except that
122 * nobody will try to verify the checksum with CHECKSUM_UNNECESSARY.
124 * Note that on earlier kernels, CHECKSUM_COMPLETE and CHECKSUM_PARTIAL are
125 * both defined as CHECKSUM_HW. Normally the meaning of CHECKSUM_HW is clear
126 * based on whether it is on the transmit or receive path. After the datapath
127 * it will be intepreted as CHECKSUM_PARTIAL. If the packet already has a
128 * checksum, we will panic. Since we can receive packets with checksums, we
129 * assume that all CHECKSUM_HW packets have checksums and map them to
130 * CHECKSUM_NONE, which has a similar meaning (the it is only different if the
131 * packet is processed by the local IP stack, in which case it will need to
132 * be reverified). If we receive a packet with CHECKSUM_HW that really means
133 * CHECKSUM_PARTIAL, it will be sent with the wrong checksum. However, there
134 * shouldn't be any devices that do this with bridging.
136 int compute_ip_summed(struct sk_buff *skb, bool xmit)
138 /* For our convenience these defines change repeatedly between kernel
139 * versions, so we can't just copy them over...
141 switch (skb->ip_summed) {
143 set_ip_summed(skb, OVS_CSUM_NONE);
145 case CHECKSUM_UNNECESSARY:
146 set_ip_summed(skb, OVS_CSUM_UNNECESSARY);
149 /* In theory this could be either CHECKSUM_PARTIAL or CHECKSUM_COMPLETE.
150 * However, on the receive side we should only get CHECKSUM_PARTIAL
151 * packets from Xen, which uses some special fields to represent this
152 * (see vswitch_skb_checksum_setup()). Since we can only make one type work,
153 * pick the one that actually happens in practice.
155 * On the transmit side (basically after skb_checksum_setup()
156 * has been run or on internal dev transmit), packets with
157 * CHECKSUM_COMPLETE aren't generated, so assume CHECKSUM_PARTIAL.
161 set_ip_summed(skb, OVS_CSUM_COMPLETE);
163 set_ip_summed(skb, OVS_CSUM_PARTIAL);
166 case CHECKSUM_COMPLETE:
167 set_ip_summed(skb, OVS_CSUM_COMPLETE);
169 case CHECKSUM_PARTIAL:
170 set_ip_summed(skb, OVS_CSUM_PARTIAL);
175 OVS_CB(skb)->csum_start = skb_headroom(skb) + skb_transport_offset(skb);
177 return vswitch_skb_checksum_setup(skb);
181 * forward_ip_summed - map internal checksum state back onto native kernel fields
183 * @skb: Packet to manipulate.
184 * @xmit: Whether we are about send on the transmit path the network stack. This
185 * follows the same logic as the @xmit field in compute_ip_summed().
186 * Generally, a given vport will have opposite values for @xmit passed to these
189 * When a packet is about to egress from OVS take our internal fields (including
190 * any modifications we have made) and recreate the correct representation for
191 * this kernel. This may do things like change the transport header offset.
193 void forward_ip_summed(struct sk_buff *skb, bool xmit)
195 switch(get_ip_summed(skb)) {
197 skb->ip_summed = CHECKSUM_NONE;
199 case OVS_CSUM_UNNECESSARY:
200 skb->ip_summed = CHECKSUM_UNNECESSARY;
201 #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
202 skb->proto_data_valid = 1;
206 case OVS_CSUM_COMPLETE:
208 skb->ip_summed = CHECKSUM_HW;
210 skb->ip_summed = CHECKSUM_NONE;
212 case OVS_CSUM_PARTIAL:
214 skb->ip_summed = CHECKSUM_UNNECESSARY;
215 #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
216 skb->proto_csum_blank = 1;
219 skb->ip_summed = CHECKSUM_HW;
223 case OVS_CSUM_COMPLETE:
224 skb->ip_summed = CHECKSUM_COMPLETE;
226 case OVS_CSUM_PARTIAL:
227 skb->ip_summed = CHECKSUM_PARTIAL;
232 if (get_ip_summed(skb) == OVS_CSUM_PARTIAL)
233 skb_set_transport_header(skb, OVS_CB(skb)->csum_start - skb_headroom(skb));
236 u8 get_ip_summed(struct sk_buff *skb)
238 return OVS_CB(skb)->ip_summed;
241 void set_ip_summed(struct sk_buff *skb, u8 ip_summed)
243 OVS_CB(skb)->ip_summed = ip_summed;
246 void get_skb_csum_pointers(const struct sk_buff *skb, u16 *csum_start,
249 *csum_start = OVS_CB(skb)->csum_start;
250 *csum_offset = skb->csum;
253 void set_skb_csum_pointers(struct sk_buff *skb, u16 csum_start, u16 csum_offset)
255 OVS_CB(skb)->csum_start = csum_start;
256 skb->csum = csum_offset;
258 #endif /* NEED_CSUM_NORMALIZE */