#include <net/icmp.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#include <net/ipv6.h>
+#endif
#include <net/protocol.h>
#include <net/route.h>
#include <net/xfrm.h>
#include "openvswitch/gre.h"
#include "table.h"
#include "vport.h"
+#include "vport-generic.h"
/* The absolute minimum fragment size. Note that there are many other
* definitions of the minimum MTU. */
struct mutable_config *mutable;
};
-struct vport_ops gre_vport_ops;
-
/* Protected by RCU. */
static struct tbl *port_table;
icmph->checksum = csum_fold(nskb->csum);
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static bool
ipv6_should_icmp(struct sk_buff *skb)
{
+ payload_length,
ipv6h->nexthdr, nskb->csum);
}
+#endif /* IPv6 */
static bool
send_frag_needed(struct vport *vport, const struct mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu)
+ struct sk_buff *skb, unsigned int mtu, __be32 flow_key)
{
unsigned int eth_hdr_len = ETH_HLEN;
- unsigned int total_length, header_length, payload_length;
+ unsigned int total_length = 0, header_length = 0, payload_length;
struct ethhdr *eh, *old_eh = eth_hdr(skb);
struct sk_buff *nskb;
if (!ipv4_should_icmp(skb))
return true;
- } else {
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU)
return false;
if (!ipv6_should_icmp(skb))
return true;
}
+#endif
+ else
+ return false;
/* Allocate */
if (old_eh->h_proto == htons(ETH_P_8021Q))
header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
total_length = min_t(unsigned int, header_length +
payload_length, 576);
- } else {
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else {
header_length = sizeof(struct ipv6hdr) +
sizeof(struct icmp6hdr);
total_length = min_t(unsigned int, header_length +
payload_length, IPV6_MIN_MTU);
}
+#endif
+
total_length = min(total_length, mutable->mtu);
payload_length = total_length - header_length;
/* Protocol */
if (skb->protocol == htons(ETH_P_IP))
ipv4_build_icmp(skb, nskb, mtu, payload_length);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else
ipv6_build_icmp(skb, nskb, mtu, payload_length);
+#endif
/* Assume that flow based keys are symmetric with respect to input
* and output and use the key that we were going to put on the
* outgoing packet for the fake received packet. If the keys are
* not symmetric then PMTUD needs to be disabled since we won't have
* any way of synthesizing packets. */
- if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) {
- if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
- OVS_CB(nskb)->tun_id = OVS_CB(skb)->tun_id;
- else
- OVS_CB(nskb)->tun_id = mutable->port_config.out_key;
- }
+ if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH &&
+ mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
+ OVS_CB(nskb)->tun_id = flow_key;
+ compute_ip_summed(nskb, false);
vport_receive(vport, nskb);
return true;
static struct sk_buff *
check_headroom(struct sk_buff *skb, int headroom)
{
- if (skb_headroom(skb) < headroom ||
- (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
- struct sk_buff *nskb = skb_realloc_headroom(skb, headroom);
+ if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
+ struct sk_buff *nskb = skb_realloc_headroom(skb, max(headroom, 64));
if (!nskb) {
kfree_skb(skb);
return ERR_PTR(-ENOMEM);
static int
parse_gre_header(struct iphdr *iph, __be16 *flags, __be32 *key)
{
- __be16 *flagsp = (__be16 *)(iph + 1);
+ /* IP and ICMP protocol handlers check that the IHL is valid. */
+ __be16 *flagsp = (__be16 *)((u8 *)iph + (iph->ihl << 2));
__be16 *protocol = flagsp + 1;
__be32 *options = (__be32 *)(protocol + 1);
int hdr_len;
if (skb->protocol == htons(ETH_P_IP))
inner = ((struct iphdr *)skb_network_header(skb))->tos;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb));
+#endif
else
inner = 0;
return;
IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data));
- } else if (protocol == htons(ETH_P_IPV6)) {
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (protocol == htons(ETH_P_IPV6)) {
if (unlikely(!pskb_may_pull(skb, nw_header
+ sizeof(struct ipv6hdr))))
return;
IP6_ECN_set_ce((struct ipv6hdr *)(nw_header
+ skb->data));
}
+#endif
}
}
{
if (skb->ip_summed == CHECKSUM_PARTIAL)
return skb_checksum_help(skb);
- else
+ else {
+ skb->ip_summed = CHECKSUM_NONE;
return 0;
+ }
}
-/* Called with rcu_read_lock and bottom-halves disabled. */
+/* Called with rcu_read_lock. */
static void
gre_err(struct sk_buff *skb, u32 info)
{
if (!vport)
return;
- if ((mutable->port_config.flags & GRE_F_IN_CSUM) && !(flags & GRE_CSUM))
+ /* Packets received by this function were previously sent by us, so
+ * any comparisons should be to the output values, not the input.
+ * However, it's not really worth it to have a hash table based on
+ * output keys (especially since ICMP error handling of tunneled packets
+ * isn't that reliable anyways). Therefore, we do a lookup based on the
+ * out key as if it were the in key and then check to see if the input
+ * and output keys are the same. */
+ if (mutable->port_config.in_key != mutable->port_config.out_key)
+ return;
+
+ if (!!(mutable->port_config.flags & GRE_F_IN_KEY_MATCH) !=
+ !!(mutable->port_config.flags & GRE_F_OUT_KEY_ACTION))
+ return;
+
+ if ((mutable->port_config.flags & GRE_F_OUT_CSUM) && !(flags & GRE_CSUM))
return;
- tot_hdr_len = sizeof(struct iphdr) + tunnel_hdr_len;
+ tunnel_hdr_len += iph->ihl << 2;
orig_mac_header = skb_mac_header(skb) - skb->data;
orig_nw_header = skb_network_header(skb) - skb->data;
- skb_set_mac_header(skb, tot_hdr_len);
+ skb_set_mac_header(skb, tunnel_hdr_len);
- tot_hdr_len += ETH_HLEN;
+ tot_hdr_len = tunnel_hdr_len + ETH_HLEN;
skb->protocol = eth_hdr(skb)->h_proto;
if (skb->protocol == htons(ETH_P_8021Q)) {
skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
}
+ skb_set_network_header(skb, tot_hdr_len);
+ mtu -= tot_hdr_len;
+
if (skb->protocol == htons(ETH_P_IP))
tot_hdr_len += sizeof(struct iphdr);
- else if (skb->protocol == htons(ETH_P_IP))
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6))
tot_hdr_len += sizeof(struct ipv6hdr);
+#endif
else
goto out;
if (!pskb_may_pull(skb, tot_hdr_len))
goto out;
- skb_set_network_header(skb, tot_hdr_len);
- mtu -= tot_hdr_len;
-
if (skb->protocol == htons(ETH_P_IP)) {
if (mtu < IP_MIN_MTU) {
if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU)
goto out;
}
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU) {
unsigned int packet_length = sizeof(struct ipv6hdr) +
ntohs(ipv6_hdr(skb)->payload_len);
goto out;
}
}
+#endif
__pskb_pull(skb, tunnel_hdr_len);
- send_frag_needed(vport, mutable, skb, mtu);
+ send_frag_needed(vport, mutable, skb, mtu, key);
skb_push(skb, tunnel_hdr_len);
out:
skb->protocol = htons(ETH_P_IP);
}
-/* Called with rcu_read_lock and bottom-halves disabled. */
+/* Called with rcu_read_lock. */
static int
gre_rcv(struct sk_buff *skb)
{
OVS_CB(skb)->tun_id = 0;
skb_push(skb, ETH_HLEN);
+ compute_ip_summed(skb, false);
+
vport_receive(vport, skb);
return 0;
if ((old_iph->frag_off & htons(IP_DF)) &&
mtu < ntohs(old_iph->tot_len)) {
- if (send_frag_needed(vport, mutable, skb, mtu))
+ if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
goto error_free;
}
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
unsigned int packet_length = skb->len - ETH_HLEN
- (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
frag_off = htons(IP_DF);
if (mtu < packet_length) {
- if (send_frag_needed(vport, mutable, skb, mtu))
+ if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
goto error_free;
}
}
+#endif
skb_reset_transport_header(skb);
new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen);
create_gre_header(skb, mutable);
+ /* Allow our local IP stack to fragment the outer packet even if the
+ * DF bit is set as a last resort. */
+ skb->local_df = 1;
+
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags = 0;
const struct mutable_config *mutable = rcu_dereference(gre_vport->mutable);
struct iphdr *old_iph;
- struct ipv6hdr *old_ipv6h;
int orig_len;
struct iphdr iph;
struct rtable *rt;
if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
+ sizeof(struct iphdr) - skb->data)))
skb->protocol = 0;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
+ sizeof(struct ipv6hdr) - skb->data)))
skb->protocol = 0;
}
-
+#endif
old_iph = ip_hdr(skb);
- old_ipv6h = ipv6_hdr(skb);
iph.tos = mutable->port_config.tos;
if (mutable->port_config.flags & GRE_F_TOS_INHERIT) {
if (skb->protocol == htons(ETH_P_IP))
iph.tos = old_iph->tos;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
iph.tos = ipv6_get_dsfield(ipv6_hdr(skb));
+#endif
}
iph.tos = ecn_encapsulate(iph.tos, skb);
if (mutable->port_config.flags & GRE_F_TTL_INHERIT) {
if (skb->protocol == htons(ETH_P_IP))
iph.ttl = old_iph->ttl;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
- iph.ttl = old_ipv6h->hop_limit;
+ iph.ttl = ipv6_hdr(skb)->hop_limit;
+#endif
}
if (!iph.ttl)
iph.ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
if (skb->protocol == htons(ETH_P_IP)) {
iph.frag_off |= old_iph->frag_off & htons(IP_DF);
mtu = max(mtu, IP_MIN_MTU);
-
- } else if (skb->protocol == htons(ETH_P_IPV6))
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6))
mtu = max(mtu, IPV6_MIN_MTU);
+#endif
iph.version = 4;
iph.ihl = sizeof(struct iphdr) >> 2;
iph.daddr = rt->rt_dst;
iph.saddr = rt->rt_src;
- /* Allow our local IP stack to fragment the outer packet even if the
- * DF bit is set as a last resort. */
- skb->local_df = 1;
-
nf_reset(skb);
secpath_reset(skb);
skb_dst_drop(skb);
skb_dst_set(skb, &rt->u.dst);
- skb->ip_summed = CHECKSUM_NONE;
/* If we are doing GSO on a pskb it is better to make sure that the
* headroom is correct now. We will only have to copy the portion in
* the segments. This is particularly beneficial on Xen where we get
* lots of GSO pskbs. Conversely, we delay copying if it is just to
* get our own writable clone because GSO may do the copy for us. */
- max_headroom = LL_RESERVED_SPACE(rt->u.dst.dev) + mutable->tunnel_hlen;
+ max_headroom = LL_RESERVED_SPACE(rt->u.dst.dev) + rt->u.dst.header_len
+ + mutable->tunnel_hlen;
+
if (skb_headroom(skb) < max_headroom) {
skb = check_headroom(skb, max_headroom);
if (unlikely(IS_ERR(skb))) {
}
}
+ forward_ip_summed(skb);
vswitch_skb_checksum_setup(skb);
+
skb = handle_gso(skb);
if (unlikely(IS_ERR(skb))) {
vport_record_error(vport, VPORT_E_TX_DROPPED);
goto error;
}
- /* Process GSO segments. Try to do any work on the entire packet that
+ /* Process GSO segments. Try to do any work for the entire packet that
* doesn't involve actually writing to it before this point. */
orig_len = 0;
do {
if (old_vport && old_vport != cur_vport)
return -EEXIST;
+ if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
+ mutable->port_config.out_key = 0;
+
mutable->tunnel_hlen = sizeof(struct iphdr) + GRE_HEADER_SECTION;
if (mutable->port_config.flags & GRE_F_OUT_CSUM)
goto error_free_vport;
}
- vport_gen_ether_addr(gre_vport->mutable->eth_addr);
+ vport_gen_rand_ether_addr(gre_vport->mutable->eth_addr);
gre_vport->mutable->mtu = ETH_DATA_LEN;
err = set_config(NULL, gre_vport->mutable, config);
{
struct gre_vport *gre_vport = gre_vport_priv(vport);
struct mutable_config *mutable;
- struct dp_port *dp_port;
mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
if (!mutable)
mutable->mtu = mtu;
assign_config_rcu(vport, mutable);
- dp_port = vport_get_dp_port(vport);
- if (dp_port)
- set_internal_devs_mtu(dp_port->dp);
-
return 0;
}
return rcu_dereference(gre_vport->mutable)->eth_addr;
}
-static unsigned
-gre_get_dev_flags(const struct vport *vport)
-{
- return IFF_UP | IFF_RUNNING | IFF_LOWER_UP;
-}
-
-static int
-gre_is_running(const struct vport *vport)
-{
- return 1;
-}
-
-static unsigned char
-gre_get_operstate(const struct vport *vport)
-{
- return IF_OPER_UP;
-}
-
static int
gre_get_mtu(const struct vport *vport)
{
.set_addr = gre_set_addr,
.get_name = gre_get_name,
.get_addr = gre_get_addr,
- .get_dev_flags = gre_get_dev_flags,
- .is_running = gre_is_running,
- .get_operstate = gre_get_operstate,
+ .get_dev_flags = vport_gen_get_dev_flags,
+ .is_running = vport_gen_is_running,
+ .get_operstate = vport_gen_get_operstate,
.get_mtu = gre_get_mtu,
.send = gre_send,
};