/*
- * Copyright (c) 2010, 2011 Nicira Networks.
- * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007-2012 Nicira, Inc.
*
- * Significant portions of this file may be copied from parts of the Linux
- * kernel, by Linus Torvalds and others.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <net/route.h>
#include <net/xfrm.h>
-#include "actions.h"
#include "checksum.h"
#include "datapath.h"
#include "tunnel.h"
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
static struct hh_cache *rt_hh(struct rtable *rt)
{
- struct neighbour *neigh = dst_get_neighbour(&rt->dst);
+ struct neighbour *neigh = dst_get_neighbour_noref(&rt->dst);
if (!neigh || !(neigh->nud_state & NUD_CONNECTED) ||
!neigh->hh.hh_len)
return NULL;
if (!cache)
return;
- flow_put(cache->flow);
+ ovs_flow_put(cache->flow);
ip_rt_put(cache->rt);
kfree(cache);
}
ASSERT_RTNL();
if (ipv4_is_multicast(mutable->key.daddr) && mutable->mlink) {
struct in_device *in_dev;
- in_dev = inetdev_by_index(&init_net, mutable->mlink);
+ in_dev = inetdev_by_index(port_key_get_net(&mutable->key), mutable->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, mutable->key.daddr);
}
return NULL;
}
-struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
- int tunnel_type,
- const struct tnl_mutable_config **mutable)
+struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
+ __be64 key, int tunnel_type,
+ const struct tnl_mutable_config **mutable)
{
struct port_lookup_key lookup;
struct vport *vport;
bool is_multicast = ipv4_is_multicast(saddr);
+ port_key_set_net(&lookup, net);
lookup.saddr = saddr;
lookup.daddr = daddr;
}
/**
- * tnl_rcv - ingress point for generic tunnel code
+ * ovs_tnl_rcv - ingress point for generic tunnel code
*
* @vport: port this packet was received on
* @skb: received packet
* - skb->csum does not include the inner Ethernet header.
* - The layer pointers are undefined.
*/
-void tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
{
struct ethhdr *eh;
return;
}
- vport_receive(vport, skb);
+ ovs_vport_receive(vport, skb);
}
static bool check_ipv4_address(__be32 addr)
int addr_type;
int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
/* Check source address is valid. */
addr_type = ipv6_addr_type(&old_ipv6h->saddr);
return false;
/* Don't respond to ICMP error messages. */
- payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr);
+ payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr, &frag_off);
if (payload_off < 0)
return false;
+ payload_length);
ipv6h->nexthdr = NEXTHDR_ICMP;
ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
- ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr);
- ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr);
+ ipv6h->daddr = old_ipv6h->saddr;
+ ipv6h->saddr = old_ipv6h->daddr;
/* ICMPv6 */
icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
}
#endif /* IPv6 */
-bool tnl_frag_needed(struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
+bool ovs_tnl_frag_needed(struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
{
unsigned int eth_hdr_len = ETH_HLEN;
unsigned int total_length = 0, header_length = 0, payload_length;
return false;
}
- vport_receive(vport, nskb);
+ ovs_vport_receive(vport, nskb);
return true;
}
mtu = max(mtu, IP_MIN_MTU);
if (packet_length > mtu &&
- tnl_frag_needed(vport, mutable, skb, mtu,
- OVS_CB(skb)->tun_id))
+ ovs_tnl_frag_needed(vport, mutable, skb, mtu,
+ OVS_CB(skb)->tun_id))
return false;
}
}
mtu = max(mtu, IPV6_MIN_MTU);
if (packet_length > mtu &&
- tnl_frag_needed(vport, mutable, skb, mtu,
- OVS_CB(skb)->tun_id))
+ ovs_tnl_frag_needed(vport, mutable, skb, mtu,
+ OVS_CB(skb)->tun_id))
return false;
}
}
return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN);
}
+#ifdef HAVE_RT_GENID
+static inline int rt_genid(struct net *net)
+{
+ return atomic_read(&net->ipv4.rt_genid);
+}
+#endif
+
static bool check_cache_valid(const struct tnl_cache *cache,
const struct tnl_mutable_config *mutable)
{
time_before(jiffies, cache->expiration) &&
#endif
#ifdef HAVE_RT_GENID
- atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid &&
+ rt_genid(dev_net(rt_dst(cache->rt).dev)) == cache->rt->rt_genid &&
#endif
#ifdef HAVE_HH_SEQ
hh->hh_lock.sequence == cache->hh_seq &&
#endif
mutable->seq == cache->mutable_seq &&
- (!is_internal_dev(rt_dst(cache->rt).dev) ||
+ (!ovs_is_internal_dev(rt_dst(cache->rt).dev) ||
(cache->flow && !cache->flow->dead));
}
for (i = 0; i < PORT_TABLE_SIZE; i++) {
struct hlist_node *n;
struct hlist_head *bucket;
- struct tnl_vport *tnl_vport;
+ struct tnl_vport *tnl_vport;
bucket = &port_table[i];
hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node)
cache->expiration = jiffies + tnl_vport->cache_exp_interval;
#endif
- if (is_internal_dev(rt_dst(rt).dev)) {
+ if (ovs_is_internal_dev(rt_dst(rt).dev)) {
struct sw_flow_key flow_key;
struct vport *dst_vport;
struct sk_buff *skb;
int flow_key_len;
struct sw_flow *flow;
- dst_vport = internal_dev_get_vport(rt_dst(rt).dev);
+ dst_vport = ovs_internal_dev_get_vport(rt_dst(rt).dev);
if (!dst_vport)
goto done;
__skb_put(skb, cache->len);
memcpy(skb->data, get_cached_header(cache), cache->len);
- err = flow_extract(skb, dst_vport->port_no, &flow_key,
- &flow_key_len);
+ err = ovs_flow_extract(skb, dst_vport->port_no, &flow_key,
+ &flow_key_len);
consume_skb(skb);
if (err)
goto done;
- flow = flow_tbl_lookup(rcu_dereference(dst_vport->dp->table),
- &flow_key, flow_key_len);
+ flow = ovs_flow_tbl_lookup(rcu_dereference(dst_vport->dp->table),
+ &flow_key, flow_key_len);
if (flow) {
cache->flow = flow;
- flow_hold(flow);
+ ovs_flow_hold(flow);
}
}
static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
u8 ipproto, u8 tos)
{
+ /* Tunnel configuration keeps DSCP part of TOS bits, But Linux
+ * router expect RT_TOS bits only. */
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
struct flowi fl = { .nl_u = { .ip4_u = {
.daddr = mutable->key.daddr,
.saddr = mutable->key.saddr,
- .tos = tos } },
- .proto = ipproto };
+ .tos = RT_TOS(tos) } },
+ .proto = ipproto };
struct rtable *rt;
- if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
+ if (unlikely(ip_route_output_key(port_key_get_net(&mutable->key), &rt, &fl)))
return ERR_PTR(-EADDRNOTAVAIL);
return rt;
#else
struct flowi4 fl = { .daddr = mutable->key.daddr,
.saddr = mutable->key.saddr,
- .flowi4_tos = tos,
+ .flowi4_tos = RT_TOS(tos),
.flowi4_proto = ipproto };
- return ip_route_output_key(&init_net, &fl);
+ return ip_route_output_key(port_key_get_net(&mutable->key), &fl);
#endif
}
*cache = NULL;
tos = RT_TOS(tos);
- if (likely(tos == mutable->tos &&
+ if (likely(tos == RT_TOS(mutable->tos) &&
check_cache_valid(cur_cache, mutable))) {
*cache = cur_cache;
return cur_cache->rt;
if (IS_ERR(rt))
return NULL;
- if (likely(tos == mutable->tos))
+ if (likely(tos == RT_TOS(mutable->tos)))
*cache = build_cache(vport, mutable, rt);
return rt;
* dropped so just free the rest. This may help improve the congestion
* that caused the first packet to be dropped.
*/
- tnl_free_linked_skbs(skb);
+ ovs_tnl_free_linked_skbs(skb);
return sent_len;
}
-int tnl_send(struct vport *vport, struct sk_buff *skb)
+int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
else
tos = mutable->tos;
- tos = INET_ECN_encapsulate(tos, inner_tos);
-
/* Route lookup */
rt = find_route(vport, mutable, tos, &cache);
if (unlikely(!rt))
if (unlikely(!cache))
unattached_dst = &rt_dst(rt);
+ tos = INET_ECN_encapsulate(tos, inner_tos);
+
/* Reset SKB */
nf_reset(skb);
secpath_reset(skb);
int orig_len = skb->len - cache->len;
struct vport *cache_vport;
- cache_vport = internal_dev_get_vport(rt_dst(rt).dev);
+ cache_vport = ovs_internal_dev_get_vport(rt_dst(rt).dev);
skb->protocol = htons(ETH_P_IP);
iph = ip_hdr(skb);
iph->tot_len = htons(skb->len - skb_network_offset(skb));
}
OVS_CB(skb)->flow = cache->flow;
- vport_receive(cache_vport, skb);
+ ovs_vport_receive(cache_vport, skb);
sent_len += orig_len;
} else {
int xmit_err;
}
if (unlikely(sent_len == 0))
- vport_record_error(vport, VPORT_E_TX_DROPPED);
+ ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
goto out;
error_free:
- tnl_free_linked_skbs(skb);
+ ovs_tnl_free_linked_skbs(skb);
error:
- vport_record_error(vport, err);
+ ovs_vport_record_error(vport, err);
out:
dst_release(unattached_dst);
return sent_len;
/* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
* zeroed. */
-static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops,
+static int tnl_set_config(struct net *net, struct nlattr *options,
+ const struct tnl_ops *tnl_ops,
const struct vport *cur_vport,
struct tnl_mutable_config *mutable)
{
mutable->flags = nla_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_PUBLIC;
+ port_key_set_net(&mutable->key, net);
mutable->key.daddr = nla_get_be32(a[OVS_TUNNEL_ATTR_DST_IPV4]);
if (a[OVS_TUNNEL_ATTR_SRC_IPV4]) {
if (ipv4_is_multicast(mutable->key.daddr))
if (a[OVS_TUNNEL_ATTR_TOS]) {
mutable->tos = nla_get_u8(a[OVS_TUNNEL_ATTR_TOS]);
- if (mutable->tos != RT_TOS(mutable->tos))
+ /* Reject ToS config with ECN bits set. */
+ if (mutable->tos & INET_ECN_MASK)
return -EINVAL;
}
return 0;
}
-struct vport *tnl_create(const struct vport_parms *parms,
- const struct vport_ops *vport_ops,
- const struct tnl_ops *tnl_ops)
+struct vport *ovs_tnl_create(const struct vport_parms *parms,
+ const struct vport_ops *vport_ops,
+ const struct tnl_ops *tnl_ops)
{
struct vport *vport;
struct tnl_vport *tnl_vport;
int initial_frag_id;
int err;
- vport = vport_alloc(sizeof(struct tnl_vport), vport_ops, parms);
+ vport = ovs_vport_alloc(sizeof(struct tnl_vport), vport_ops, parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto error;
get_random_bytes(&initial_frag_id, sizeof(int));
atomic_set(&tnl_vport->frag_id, initial_frag_id);
- err = tnl_set_config(parms->options, tnl_ops, NULL, mutable);
+ err = tnl_set_config(ovs_dp_get_net(parms->dp), parms->options, tnl_ops,
+ NULL, mutable);
if (err)
goto error_free_mutable;
free_mutable_rtnl(mutable);
kfree(mutable);
error_free_vport:
- vport_free(vport);
+ ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
-int tnl_set_options(struct vport *vport, struct nlattr *options)
+int ovs_tnl_set_options(struct vport *vport, struct nlattr *options)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *old_mutable;
memcpy(mutable->eth_addr, old_mutable->eth_addr, ETH_ALEN);
/* Parse the others configured by userspace. */
- err = tnl_set_config(options, tnl_vport->tnl_ops, vport, mutable);
+ err = tnl_set_config(ovs_dp_get_net(vport->dp), options, tnl_vport->tnl_ops,
+ vport, mutable);
if (err)
goto error_free;
return err;
}
-int tnl_get_options(const struct vport *vport, struct sk_buff *skb)
+int ovs_tnl_get_options(const struct vport *vport, struct sk_buff *skb)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference_rtnl(tnl_vport->mutable);
- NLA_PUT_U32(skb, OVS_TUNNEL_ATTR_FLAGS, mutable->flags & TNL_F_PUBLIC);
- NLA_PUT_BE32(skb, OVS_TUNNEL_ATTR_DST_IPV4, mutable->key.daddr);
-
- if (!(mutable->flags & TNL_F_IN_KEY_MATCH))
- NLA_PUT_BE64(skb, OVS_TUNNEL_ATTR_IN_KEY, mutable->key.in_key);
- if (!(mutable->flags & TNL_F_OUT_KEY_ACTION))
- NLA_PUT_BE64(skb, OVS_TUNNEL_ATTR_OUT_KEY, mutable->out_key);
- if (mutable->key.saddr)
- NLA_PUT_BE32(skb, OVS_TUNNEL_ATTR_SRC_IPV4, mutable->key.saddr);
- if (mutable->tos)
- NLA_PUT_U8(skb, OVS_TUNNEL_ATTR_TOS, mutable->tos);
- if (mutable->ttl)
- NLA_PUT_U8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl);
+ if (nla_put_u32(skb, OVS_TUNNEL_ATTR_FLAGS,
+ mutable->flags & TNL_F_PUBLIC) ||
+ nla_put_be32(skb, OVS_TUNNEL_ATTR_DST_IPV4, mutable->key.daddr))
+ goto nla_put_failure;
+
+ if (!(mutable->flags & TNL_F_IN_KEY_MATCH) &&
+ nla_put_be64(skb, OVS_TUNNEL_ATTR_IN_KEY, mutable->key.in_key))
+ goto nla_put_failure;
+ if (!(mutable->flags & TNL_F_OUT_KEY_ACTION) &&
+ nla_put_be64(skb, OVS_TUNNEL_ATTR_OUT_KEY, mutable->out_key))
+ goto nla_put_failure;
+ if (mutable->key.saddr &&
+ nla_put_be32(skb, OVS_TUNNEL_ATTR_SRC_IPV4, mutable->key.saddr))
+ goto nla_put_failure;
+ if (mutable->tos && nla_put_u8(skb, OVS_TUNNEL_ATTR_TOS, mutable->tos))
+ goto nla_put_failure;
+ if (mutable->ttl && nla_put_u8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl))
+ goto nla_put_failure;
return 0;
free_cache((struct tnl_cache __force *)tnl_vport->cache);
kfree((struct tnl_mutable __force *)tnl_vport->mutable);
- vport_free(tnl_vport_to_vport(tnl_vport));
+ ovs_vport_free(tnl_vport_to_vport(tnl_vport));
}
-void tnl_destroy(struct vport *vport)
+void ovs_tnl_destroy(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *mutable;
call_rcu(&tnl_vport->rcu, free_port_rcu);
}
-int tnl_set_addr(struct vport *vport, const unsigned char *addr)
+int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *old_mutable, *mutable;
return 0;
}
-const char *tnl_get_name(const struct vport *vport)
+const char *ovs_tnl_get_name(const struct vport *vport)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return tnl_vport->name;
}
-const unsigned char *tnl_get_addr(const struct vport *vport)
+const unsigned char *ovs_tnl_get_addr(const struct vport *vport)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return rcu_dereference_rtnl(tnl_vport->mutable)->eth_addr;
}
-void tnl_free_linked_skbs(struct sk_buff *skb)
+void ovs_tnl_free_linked_skbs(struct sk_buff *skb)
{
while (skb) {
struct sk_buff *next = skb->next;
}
}
-int tnl_init(void)
+int ovs_tnl_init(void)
{
int i;
port_table = kmalloc(PORT_TABLE_SIZE * sizeof(struct hlist_head *),
- GFP_KERNEL);
+ GFP_KERNEL);
if (!port_table)
return -ENOMEM;
return 0;
}
-void tnl_exit(void)
+void ovs_tnl_exit(void)
{
- int i;
-
- for (i = 0; i < PORT_TABLE_SIZE; i++) {
- struct tnl_vport *tnl_vport;
- struct hlist_head *hash_head;
- struct hlist_node *n;
-
- hash_head = &port_table[i];
- hlist_for_each_entry(tnl_vport, n, hash_head, hash_node) {
- BUG();
- goto out;
- }
- }
-out:
kfree(port_table);
}