#include "datapath.h"
#include "table.h"
#include "tunnel.h"
+#include "vlan.h"
#include "vport.h"
#include "vport-generic.h"
#include "vport-internal_dev.h"
return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node));
}
-static inline void ecn_decapsulate(struct sk_buff *skb)
+static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
{
- /* This is accessing the outer IP header of the tunnel, which we've
- * already validated to be OK. skb->data is currently set to the start
- * of the inner Ethernet header, and we've validated ETH_HLEN.
- */
- if (unlikely(INET_ECN_is_ce(ip_hdr(skb)->tos))) {
+ if (unlikely(INET_ECN_is_ce(tos))) {
__be16 protocol = skb->protocol;
skb_set_network_header(skb, ETH_HLEN);
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (protocol == htons(ETH_P_8021Q)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
return;
}
}
-/* Called with rcu_read_lock. */
-void tnl_rcv(struct vport *vport, struct sk_buff *skb)
+/**
+ * tnl_rcv - ingress point for generic tunnel code
+ *
+ * @vport: port this packet was received on
+ * @skb: received packet
+ * @tos: ToS from encapsulating IP packet, used to copy ECN bits
+ *
+ * Must be called with rcu_read_lock.
+ *
+ * Packets received by this function are in the following state:
+ * - skb->data points to the inner Ethernet header.
+ * - The inner Ethernet header is in the linear data area.
+ * - skb->csum does not include the inner Ethernet header.
+ * - The layer pointers are undefined.
+ */
+void tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
{
- /* Packets received by this function are in the following state:
- * - skb->data points to the inner Ethernet header.
- * - The inner Ethernet header is in the linear data area.
- * - skb->csum does not include the inner Ethernet header.
- * - The layer pointers point at the outer headers.
- */
+ struct ethhdr *eh;
- struct ethhdr *eh = (struct ethhdr *)skb->data;
+ skb_reset_mac_header(skb);
+ eh = eth_hdr(skb);
if (likely(ntohs(eh->h_proto) >= 1536))
skb->protocol = eh->h_proto;
skb_dst_drop(skb);
nf_reset(skb);
+ skb_clear_rxhash(skb);
secpath_reset(skb);
- ecn_decapsulate(skb);
+ ecn_decapsulate(skb, tos);
compute_ip_summed(skb, false);
+ vlan_set_tci(skb, 0);
vport_receive(vport, skb);
}
}
#endif
- total_length = min(total_length, mutable->mtu);
payload_length = total_length - header_length;
nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
vh->h_vlan_encapsulated_proto = skb->protocol;
- }
+ } else
+ vlan_set_tci(nskb, vlan_get_tci(skb));
skb_reset_mac_header(nskb);
/* Protocol */
{
bool pmtud = mutable->flags & TNL_F_PMTUD;
__be16 frag_off = 0;
- int mtu;
+ int mtu = 0;
+ unsigned int packet_length = skb->len - ETH_HLEN;
+
+ /* Allow for one level of tagging in the packet length. */
+ if (!vlan_tx_tag_present(skb) &&
+ eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+ packet_length -= VLAN_HLEN;
if (pmtud) {
+ int vlan_header = 0;
+
frag_off = htons(IP_DF);
+ /* The tag needs to go in packet regardless of where it
+ * currently is, so subtract it from the MTU.
+ */
+ if (vlan_tx_tag_present(skb) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+ vlan_header = VLAN_HLEN;
+
mtu = dst_mtu(&rt_dst(rt))
- ETH_HLEN
- mutable->tunnel_hlen
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
- VLAN_HLEN : 0);
+ - vlan_header;
}
if (skb->protocol == htons(ETH_P_IP)) {
if (pmtud && iph->frag_off & htons(IP_DF)) {
mtu = max(mtu, IP_MIN_MTU);
- if (ntohs(iph->tot_len) > mtu &&
+ if (packet_length > mtu &&
tnl_frag_needed(vport, mutable, skb, mtu,
OVS_CB(skb)->tun_id))
return false;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- unsigned int packet_length = skb->len - ETH_HLEN
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
- VLAN_HLEN : 0);
-
/* IPv6 requires PMTUD if the packet is above the minimum MTU. */
if (packet_length > IPV6_MIN_MTU)
frag_off = htons(IP_DF);
iph->saddr = rt->rt_src;
iph->ttl = mutable->ttl;
if (!iph->ttl)
- iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+ iph->ttl = ip4_dst_hoplimit(&rt_dst(rt));
tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
}
* change them from underneath us and we can skip the linearization.
*/
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (unlikely(page_count(skb_shinfo(skb)->frags[0].page) > 1))
+ if (unlikely(page_count(skb_shinfo(skb)->frags[i].page) > 1))
return true;
return false;
goto error_free;
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + mutable->tunnel_hlen;
+ + mutable->tunnel_hlen
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ skb = check_headroom(skb, min_headroom);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto error;
+ }
if (skb_is_gso(skb)) {
struct sk_buff *nskb;
- /*
- * If we are doing GSO on a pskb it is better to make sure that
- * the headroom is correct now. We will only have to copy the
- * portion in the linear data area and GSO will preserve
- * headroom when it creates the segments. This is particularly
- * beneficial on Xen where we get a lot of GSO pskbs.
- * Conversely, we avoid copying if it is just to get our own
- * writable clone because GSO will do the copy for us.
- */
- if (skb_headroom(skb) < min_headroom) {
- skb = check_headroom(skb, min_headroom);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto error;
- }
- }
-
nskb = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (IS_ERR(nskb)) {
}
skb = nskb;
- } else {
- skb = check_headroom(skb, min_headroom);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto error;
- }
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- /*
- * Pages aren't locked and could change at any time.
- * If this happens after we compute the checksum, the
- * checksum will be wrong. We linearize now to avoid
- * this problem.
- */
- if (unlikely(need_linearize(skb))) {
- err = __skb_linearize(skb);
- if (unlikely(err))
- goto error_free;
- }
-
- err = skb_checksum_help(skb);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ /* Pages aren't locked and could change at any time.
+ * If this happens after we compute the checksum, the
+ * checksum will be wrong. We linearize now to avoid
+ * this problem.
+ */
+ if (unlikely(need_linearize(skb))) {
+ err = __skb_linearize(skb);
if (unlikely(err))
goto error_free;
- } else if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- }
+ }
+
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
+ goto error_free;
+ } else if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
return skb;
const struct tnl_mutable_config *mutable)
{
int sent_len;
- int err;
sent_len = 0;
while (skb) {
struct sk_buff *next = skb->next;
int frag_len = skb->len - mutable->tunnel_hlen;
+ int err;
skb->next = NULL;
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
err = ip_local_out(skb);
- if (likely(net_xmit_eval(err) == 0))
- sent_len += frag_len;
- else {
- skb = next;
- goto free_frags;
- }
-
skb = next;
+ if (unlikely(net_xmit_eval(err)))
+ goto free_frags;
+ sent_len += frag_len;
}
return sent_len;
u8 tos;
/* Validate the protocol headers before we try to use them. */
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (skb->protocol == htons(ETH_P_8021Q) &&
+ !vlan_tx_tag_present(skb)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
goto error_free;
nf_reset(skb);
secpath_reset(skb);
skb_dst_drop(skb);
+ skb_clear_rxhash(skb);
/* Offloading */
skb = handle_offloads(skb, mutable, rt);
/* TTL */
ttl = mutable->ttl;
if (!ttl)
- ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+ ttl = ip4_dst_hoplimit(&rt_dst(rt));
if (mutable->flags & TNL_F_TTL_INHERIT) {
if (skb->protocol == htons(ETH_P_IP))
struct sk_buff *next_skb = skb->next;
skb->next = NULL;
+ if (unlikely(vlan_deaccel_tag(skb)))
+ goto next;
+
if (likely(cache)) {
skb_push(skb, cache->len);
memcpy(skb->data, get_cached_header(cache), cache->len);
error_free:
tnl_free_linked_skbs(skb);
error:
- dst_release(unattached_dst);
vport_record_error(vport, err);
out:
+ dst_release(unattached_dst);
return sent_len;
}
if (a[ODP_TUNNEL_ATTR_TTL])
mutable->ttl = nla_get_u8(a[ODP_TUNNEL_ATTR_TTL]);
- mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
- if (mutable->tunnel_hlen < 0)
- return mutable->tunnel_hlen;
-
- mutable->tunnel_hlen += sizeof(struct iphdr);
-
mutable->tunnel_type = tnl_ops->tunnel_type;
if (!a[ODP_TUNNEL_ATTR_IN_KEY]) {
mutable->tunnel_type |= TNL_T_KEY_MATCH;
else
mutable->out_key = nla_get_be64(a[ODP_TUNNEL_ATTR_OUT_KEY]);
+ mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
+ if (mutable->tunnel_hlen < 0)
+ return mutable->tunnel_hlen;
+
+ mutable->tunnel_hlen += sizeof(struct iphdr);
+
old_vport = tnl_find_port(mutable->saddr, mutable->daddr,
mutable->in_key, mutable->tunnel_type,
&old_mutable);
}
vport_gen_rand_ether_addr(mutable->eth_addr);
- mutable->mtu = ETH_DATA_LEN;
get_random_bytes(&initial_frag_id, sizeof(int));
atomic_set(&tnl_vport->frag_id, initial_frag_id);
old_mutable = rtnl_dereference(tnl_vport->mutable);
mutable->seq = old_mutable->seq + 1;
memcpy(mutable->eth_addr, old_mutable->eth_addr, ETH_ALEN);
- mutable->mtu = old_mutable->mtu;
/* Parse the others configured by userspace. */
err = tnl_set_config(options, tnl_vport->tnl_ops, vport, mutable);
return 0;
}
-int tnl_set_mtu(struct vport *vport, int mtu)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct tnl_mutable_config *mutable;
-
- mutable = kmemdup(rtnl_dereference(tnl_vport->mutable),
- sizeof(struct tnl_mutable_config), GFP_KERNEL);
- if (!mutable)
- return -ENOMEM;
-
- mutable->mtu = mtu;
- assign_config_rcu(vport, mutable);
-
- return 0;
-}
-
int tnl_set_addr(struct vport *vport, const unsigned char *addr)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return rcu_dereference_rtnl(tnl_vport->mutable)->eth_addr;
}
-int tnl_get_mtu(const struct vport *vport)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- return rcu_dereference_rtnl(tnl_vport->mutable)->mtu;
-}
-
void tnl_free_linked_skbs(struct sk_buff *skb)
{
- if (unlikely(!skb))
- return;
-
while (skb) {
struct sk_buff *next = skb->next;
kfree_skb(skb);