/*
- * Copyright (c) 2010, 2011 Nicira Networks.
+ * Copyright (c) 2007-2012 Nicira, Inc.
* Distributed under the terms of the GNU GPL version 2.
*
* Significant portions of this file may be copied from parts of the Linux
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/net.h>
+#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_frag.h>
#include <net/protocol.h>
#include <net/udp.h>
+#include "datapath.h"
#include "tunnel.h"
#include "vport.h"
#include "vport-generic.h"
#define CAPWAP_FRAG_TIMEOUT (30 * HZ)
#define CAPWAP_FRAG_MAX_MEM (256 * 1024)
-#define CAPWAP_FRAG_PRUNE_MEM (192 *1024)
+#define CAPWAP_FRAG_PRUNE_MEM (192 * 1024)
#define CAPWAP_FRAG_SECRET_INTERVAL (10 * 60 * HZ)
/*
* statically create them and we can do very fast parsing by checking all 12
* fields in one go.
*/
-#define CAPWAP_BEGIN_HLEN __cpu_to_be32(0x00100000)
-#define CAPWAP_BEGIN_WBID __cpu_to_be32(0x00000200)
-#define CAPWAP_BEGIN_FRAG __cpu_to_be32(0x00000080)
-#define CAPWAP_BEGIN_LAST __cpu_to_be32(0x00000040)
-
-#define NO_FRAG_HDR (CAPWAP_BEGIN_HLEN | CAPWAP_BEGIN_WBID)
-#define FRAG_HDR (NO_FRAG_HDR | CAPWAP_BEGIN_FRAG)
-#define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_BEGIN_LAST)
+#define CAPWAP_PREAMBLE_MASK __cpu_to_be32(0xFF000000)
+#define CAPWAP_HLEN_SHIFT 17
+#define CAPWAP_HLEN_MASK __cpu_to_be32(0x00F80000)
+#define CAPWAP_RID_MASK __cpu_to_be32(0x0007C000)
+#define CAPWAP_WBID_MASK __cpu_to_be32(0x00003E00)
+#define CAPWAP_F_MASK __cpu_to_be32(0x000001FF)
+
+#define CAPWAP_F_FRAG __cpu_to_be32(0x00000080)
+#define CAPWAP_F_LASTFRAG __cpu_to_be32(0x00000040)
+#define CAPWAP_F_WSI __cpu_to_be32(0x00000020)
+#define CAPWAP_F_RMAC __cpu_to_be32(0x00000010)
+
+#define CAPWAP_RMAC_LEN 4
+
+/* Standard CAPWAP looks for a WBID value of 2.
+ * When we insert WSI field, use WBID value of 30, which has been
+ * proposed for all "experimental" usage - users with no reserved WBID value
+ * of their own.
+*/
+#define CAPWAP_WBID_30 __cpu_to_be32(0x00003C00)
+#define CAPWAP_WBID_2 __cpu_to_be32(0x00000200)
+
+#define FRAG_HDR (CAPWAP_F_FRAG)
+#define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_F_LASTFRAG)
+
+/* Keyed packet, WBID 30, and length long enough to include WSI key */
+#define CAPWAP_KEYED (CAPWAP_WBID_30 | CAPWAP_F_WSI | htonl(20 << CAPWAP_HLEN_SHIFT))
+/* A backward-compatible packet, WBID 2 and length of 2 words (no WSI fields) */
+#define CAPWAP_NO_WSI (CAPWAP_WBID_2 | htonl(8 << CAPWAP_HLEN_SHIFT))
+
+/* Mask for all parts of header that must be 0. */
+#define CAPWAP_ZERO_MASK (CAPWAP_PREAMBLE_MASK | \
+ (CAPWAP_F_MASK ^ (CAPWAP_F_WSI | CAPWAP_F_FRAG | CAPWAP_F_LASTFRAG | CAPWAP_F_RMAC)))
struct capwaphdr {
__be32 begin;
__be16 frag_id;
+ /* low 3 bits of frag_off are reserved */
__be16 frag_off;
};
-static inline struct capwaphdr *capwap_hdr(const struct sk_buff *skb)
+/*
+ * We use the WSI field to hold additional tunnel data.
+ * The first eight bits store the size of the wsi data in bytes.
+ */
+struct capwaphdr_wsi {
+ u8 wsi_len;
+ u8 flags;
+ __be16 reserved_padding;
+};
+
+struct capwaphdr_wsi_key {
+ __be64 key;
+};
+
+/* Flag indicating a 64bit key is stored in WSI data field */
+#define CAPWAP_WSI_F_KEY64 0x80
+
+static struct capwaphdr *capwap_hdr(const struct sk_buff *skb)
{
return (struct capwaphdr *)(udp_hdr(skb) + 1);
}
*/
#define FRAG_OFF_MASK (~0x7U)
-#define CAPWAP_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr))
+/*
+ * The minimum header length. The header may be longer if the optional
+ * WSI field is used.
+ */
+#define CAPWAP_MIN_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr))
struct frag_match {
__be32 saddr;
#define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb)
static struct sk_buff *fragment(struct sk_buff *, const struct vport *,
- struct dst_entry *);
-static void defrag_init(void);
-static void defrag_exit(void);
+ struct dst_entry *dst, unsigned int hlen);
static struct sk_buff *defrag(struct sk_buff *, bool frag_last);
static void capwap_frag_init(struct inet_frag_queue *, void *match);
.frag_expire = capwap_frag_expire,
.secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
};
-static struct netns_frags frag_netns_state = {
- .timeout = CAPWAP_FRAG_TIMEOUT,
- .high_thresh = CAPWAP_FRAG_MAX_MEM,
- .low_thresh = CAPWAP_FRAG_PRUNE_MEM,
-};
-
-static struct socket *capwap_rcv_socket;
-static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
+static int capwap_hdr_len(const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key)
{
+ int size = CAPWAP_MIN_HLEN;
+ u32 flags;
+ __be64 out_key;
+
+ tnl_get_param(mutable, tun_key, &flags, &out_key);
+
/* CAPWAP has no checksums. */
- if (mutable->flags & TNL_F_CSUM)
+ if (flags & TNL_F_CSUM)
return -EINVAL;
- /* CAPWAP has no keys, so check that the configuration for keys is the
- * default if no key-specific attributes are used.
- */
- if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) !=
- (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
- return -EINVAL;
+ /* if keys are specified, then add WSI field */
+ if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
+ size += sizeof(struct capwaphdr_wsi) +
+ sizeof(struct capwaphdr_wsi_key);
+ }
- return CAPWAP_HLEN;
+ return size;
}
-static void capwap_build_header(const struct vport *vport,
- const struct tnl_mutable_config *mutable,
- void *header)
+static struct sk_buff *capwap_build_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct dst_entry *dst,
+ struct sk_buff *skb,
+ int tunnel_hlen)
{
- struct udphdr *udph = header;
+ struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+ struct udphdr *udph = udp_hdr(skb);
struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
+ u32 flags;
+ __be64 out_key;
+
+ tnl_get_param(mutable, tun_key, &flags, &out_key);
udph->source = htons(CAPWAP_SRC_PORT);
udph->dest = htons(CAPWAP_DST_PORT);
udph->check = 0;
- cwh->begin = NO_FRAG_HDR;
cwh->frag_id = 0;
cwh->frag_off = 0;
-}
-static struct sk_buff *capwap_update_header(const struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct dst_entry *dst,
- struct sk_buff *skb)
-{
- struct udphdr *udph = udp_hdr(skb);
+ if (out_key || flags & TNL_F_OUT_KEY_ACTION) {
+ /* first field in WSI is key */
+ struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
+ cwh->begin = CAPWAP_KEYED;
+
+ /* -1 for wsi_len byte, not included in length as per spec */
+ wsi->wsi_len = sizeof(struct capwaphdr_wsi) - 1
+ + sizeof(struct capwaphdr_wsi_key);
+ wsi->flags = CAPWAP_WSI_F_KEY64;
+ wsi->reserved_padding = 0;
+
+ if (out_key) {
+ struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
+ opt->key = out_key;
+ }
+ } else {
+ /* make packet readable by old capwap code */
+ cwh->begin = CAPWAP_NO_WSI;
+ }
udph->len = htons(skb->len - skb_transport_offset(skb));
- if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst)))
- skb = fragment(skb, vport, dst);
+ if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) {
+ unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable, tun_key);
+ skb = fragment(skb, vport, dst, hlen);
+ }
return skb;
}
-static inline struct sk_buff *process_capwap_proto(struct sk_buff *skb)
+static int process_capwap_wsi(struct sk_buff *skb, __be64 *key, bool *key_present)
{
struct capwaphdr *cwh = capwap_hdr(skb);
+ struct capwaphdr_wsi *wsi;
+ int hdr_len;
+ int rmac_len = 0;
+ int wsi_len;
- if (likely(cwh->begin == NO_FRAG_HDR))
- return skb;
- else if (cwh->begin == FRAG_HDR)
- return defrag(skb, false);
- else if (cwh->begin == FRAG_LAST_HDR)
- return defrag(skb, true);
- else {
- if (net_ratelimit())
- pr_warn("unparsable packet receive on capwap socket\n");
+ if (((cwh->begin & CAPWAP_WBID_MASK) != CAPWAP_WBID_30))
+ return 0;
- kfree_skb(skb);
- return NULL;
+ if (cwh->begin & CAPWAP_F_RMAC)
+ rmac_len = CAPWAP_RMAC_LEN;
+
+ hdr_len = ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT;
+
+ if (unlikely(sizeof(struct capwaphdr) + rmac_len + sizeof(struct capwaphdr_wsi) > hdr_len))
+ return -EINVAL;
+
+ /* read wsi header to find out how big it really is */
+ wsi = (struct capwaphdr_wsi *)((u8 *)(cwh + 1) + rmac_len);
+ /* +1 for length byte not included in wsi_len */
+ wsi_len = 1 + wsi->wsi_len;
+
+ if (unlikely(sizeof(struct capwaphdr) + rmac_len + wsi_len != hdr_len))
+ return -EINVAL;
+
+ wsi_len -= sizeof(struct capwaphdr_wsi);
+
+ if (wsi->flags & CAPWAP_WSI_F_KEY64) {
+ struct capwaphdr_wsi_key *opt;
+
+ if (unlikely(wsi_len < sizeof(struct capwaphdr_wsi_key)))
+ return -EINVAL;
+
+ opt = (struct capwaphdr_wsi_key *)(wsi + 1);
+ *key = opt->key;
+ *key_present = true;
+ } else {
+ *key_present = false;
+ }
+
+ return 0;
+}
+
+static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key, bool *key_present)
+{
+ struct capwaphdr *cwh = capwap_hdr(skb);
+ int hdr_len = sizeof(struct udphdr);
+
+ if (unlikely((cwh->begin & CAPWAP_ZERO_MASK) != 0))
+ goto error;
+
+ hdr_len += ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT;
+ if (unlikely(hdr_len < CAPWAP_MIN_HLEN))
+ goto error;
+
+ if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
+ goto error;
+
+ cwh = capwap_hdr(skb);
+ __skb_pull(skb, hdr_len);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
+
+ if (cwh->begin & CAPWAP_F_FRAG) {
+ skb = defrag(skb, (__force bool)(cwh->begin & CAPWAP_F_LASTFRAG));
+ if (!skb)
+ return NULL;
+ cwh = capwap_hdr(skb);
}
+
+ if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key, key_present))
+ goto error;
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return NULL;
}
/* Called with rcu_read_lock and BH disabled. */
struct vport *vport;
const struct tnl_mutable_config *mutable;
struct iphdr *iph;
+ struct ovs_key_ipv4_tunnel tun_key;
+ __be64 key = 0;
+ bool key_present = false;
- if (unlikely(!pskb_may_pull(skb, CAPWAP_HLEN + ETH_HLEN)))
+ if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN)))
goto error;
- __skb_pull(skb, CAPWAP_HLEN);
- skb_postpull_rcsum(skb, skb_transport_header(skb), CAPWAP_HLEN + ETH_HLEN);
-
- skb = process_capwap_proto(skb);
+ skb = process_capwap_proto(skb, &key, &key_present);
if (unlikely(!skb))
goto out;
iph = ip_hdr(skb);
- vport = tnl_find_port(iph->daddr, iph->saddr, 0,
- TNL_T_PROTO_CAPWAP | TNL_T_KEY_EXACT, &mutable);
+ vport = ovs_tnl_find_port(sock_net(sk), iph->daddr, iph->saddr, key,
+ TNL_T_PROTO_CAPWAP, &mutable);
if (unlikely(!vport)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
goto error;
}
- tnl_rcv(vport, skb, iph->tos);
+ if (key_present && mutable->key.daddr &&
+ !(mutable->flags & TNL_F_IN_KEY_MATCH)) {
+ key_present = false;
+ key = 0;
+ }
+
+ tnl_tun_key_init(&tun_key, iph, key, key_present ? OVS_TNL_F_KEY : 0);
+ OVS_CB(skb)->tun_key = &tun_key;
+
+ ovs_tnl_rcv(vport, skb);
goto out;
error:
.ipproto = IPPROTO_UDP,
.hdr_len = capwap_hdr_len,
.build_header = capwap_build_header,
- .update_header = capwap_update_header,
};
-static struct vport *capwap_create(const struct vport_parms *parms)
+static inline struct capwap_net *ovs_get_capwap_net(struct net *net)
{
- return tnl_create(parms, &capwap_vport_ops, &capwap_tnl_ops);
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ return &ovs_net->vport_net.capwap;
}
-/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
+/* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
#define UDP_ENCAP_CAPWAP 10
-static int capwap_init(void)
+static int init_socket(struct net *net)
{
int err;
+ struct capwap_net *capwap_net = ovs_get_capwap_net(net);
struct sockaddr_in sin;
- err = sock_create(AF_INET, SOCK_DGRAM, 0, &capwap_rcv_socket);
+ if (capwap_net->n_tunnels) {
+ capwap_net->n_tunnels++;
+ return 0;
+ }
+
+ err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
+ &capwap_net->capwap_rcv_socket);
if (err)
goto error;
+ /* release net ref. */
+ sk_change_net(capwap_net->capwap_rcv_socket->sk, net);
+
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = htons(CAPWAP_DST_PORT);
- err = kernel_bind(capwap_rcv_socket, (struct sockaddr *)&sin,
+ err = kernel_bind(capwap_net->capwap_rcv_socket,
+ (struct sockaddr *)&sin,
sizeof(struct sockaddr_in));
if (err)
goto error_sock;
- udp_sk(capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP;
- udp_sk(capwap_rcv_socket->sk)->encap_rcv = capwap_rcv;
+ udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP;
+ udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_rcv = capwap_rcv;
- defrag_init();
+ capwap_net->frag_state.timeout = CAPWAP_FRAG_TIMEOUT;
+ capwap_net->frag_state.high_thresh = CAPWAP_FRAG_MAX_MEM;
+ capwap_net->frag_state.low_thresh = CAPWAP_FRAG_PRUNE_MEM;
+ inet_frags_init_net(&capwap_net->frag_state);
+ udp_encap_enable();
+ capwap_net->n_tunnels++;
return 0;
error_sock:
- sock_release(capwap_rcv_socket);
+ sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
error:
- pr_warn("cannot register capwap protocol handler\n");
+ pr_warn("cannot register capwap protocol handler : %d\n", err);
return err;
}
+static void release_socket(struct net *net)
+{
+ struct capwap_net *capwap_net = ovs_get_capwap_net(net);
+
+ capwap_net->n_tunnels--;
+ if (capwap_net->n_tunnels)
+ return;
+
+ inet_frags_exit_net(&capwap_net->frag_state, &frag_state);
+ sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
+}
+
+static struct vport *capwap_create(const struct vport_parms *parms)
+{
+ struct vport *vport;
+ int err;
+
+ err = init_socket(ovs_dp_get_net(parms->dp));
+ if (err)
+ return ERR_PTR(err);
+
+ vport = ovs_tnl_create(parms, &ovs_capwap_vport_ops, &capwap_tnl_ops);
+ if (IS_ERR(vport))
+ release_socket(ovs_dp_get_net(parms->dp));
+
+ return vport;
+}
+
+static void capwap_destroy(struct vport *vport)
+{
+ ovs_tnl_destroy(vport);
+ release_socket(ovs_dp_get_net(vport->dp));
+}
+
+static int capwap_init(void)
+{
+ inet_frags_init(&frag_state);
+ return 0;
+}
+
static void capwap_exit(void)
{
- defrag_exit();
- sock_release(capwap_rcv_socket);
+ inet_frags_fini(&frag_state);
}
static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to)
}
static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
- struct dst_entry *dst)
+ struct dst_entry *dst, unsigned int hlen)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- unsigned int hlen = skb_transport_offset(skb) + CAPWAP_HLEN;
unsigned int headroom;
unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb);
struct sk_buff *result = NULL, *list_cur = NULL;
cwh = capwap_hdr(skb2);
if (remaining > frag_size)
- cwh->begin = FRAG_HDR;
+ cwh->begin |= FRAG_HDR;
else
- cwh->begin = FRAG_LAST_HDR;
+ cwh->begin |= FRAG_LAST_HDR;
cwh->frag_id = frag_id;
cwh->frag_off = htons(offset);
remaining -= frag_size;
}
- goto out;
+ consume_skb(skb);
+ return result;
error:
- tnl_free_linked_skbs(result);
-out:
+ ovs_tnl_free_linked_skbs(result);
kfree_skb(skb);
- return result;
+ return NULL;
}
/* All of the following functions relate to fragmentation reassembly. */
-static inline struct frag_queue *ifq_cast(struct inet_frag_queue *ifq)
+static struct frag_queue *ifq_cast(struct inet_frag_queue *ifq)
{
return container_of(ifq, struct frag_queue, ifq);
}
frag_state.rnd) & (INETFRAGS_HASHSZ - 1);
}
-static struct frag_queue *queue_find(struct frag_match *match)
+static struct frag_queue *queue_find(struct netns_frags *ns_frag_state,
+ struct frag_match *match)
{
struct inet_frag_queue *ifq;
read_lock(&frag_state.lock);
- ifq = inet_frag_find(&frag_netns_state, &frag_state, match, frag_hash(match));
+ ifq = inet_frag_find(ns_frag_state, &frag_state, match, frag_hash(match));
if (!ifq)
return NULL;
{
struct iphdr *iph = ip_hdr(skb);
struct capwaphdr *cwh = capwap_hdr(skb);
+ struct capwap_net *capwap_net = ovs_get_capwap_net(dev_net(skb->dev));
+ struct netns_frags *ns_frag_state = &capwap_net->frag_state;
struct frag_match match;
u16 frag_off;
struct frag_queue *fq;
- if (atomic_read(&frag_netns_state.mem) > frag_netns_state.high_thresh)
- inet_frag_evictor(&frag_netns_state, &frag_state);
+ if (atomic_read(&ns_frag_state->mem) > ns_frag_state->high_thresh)
+ inet_frag_evictor(ns_frag_state, &frag_state);
match.daddr = iph->daddr;
match.saddr = iph->saddr;
match.id = cwh->frag_id;
frag_off = ntohs(cwh->frag_off) & FRAG_OFF_MASK;
- fq = queue_find(&match);
+ fq = queue_find(ns_frag_state, &match);
if (fq) {
spin_lock(&fq->ifq.lock);
skb = frag_queue(fq, skb, frag_off, frag_last);
return NULL;
}
-static void defrag_init(void)
-{
- inet_frags_init(&frag_state);
- inet_frags_init_net(&frag_netns_state);
-}
-
-static void defrag_exit(void)
-{
- inet_frags_exit_net(&frag_netns_state, &frag_state);
- inet_frags_fini(&frag_state);
-}
-
static void capwap_frag_init(struct inet_frag_queue *ifq, void *match_)
{
struct frag_match *match = match_;
inet_frag_put(&fq->ifq, &frag_state);
}
-const struct vport_ops capwap_vport_ops = {
+const struct vport_ops ovs_capwap_vport_ops = {
.type = OVS_VPORT_TYPE_CAPWAP,
- .flags = VPORT_F_GEN_STATS,
+ .flags = VPORT_F_TUN_ID,
.init = capwap_init,
.exit = capwap_exit,
.create = capwap_create,
- .destroy = tnl_destroy,
- .set_addr = tnl_set_addr,
- .get_name = tnl_get_name,
- .get_addr = tnl_get_addr,
- .get_options = tnl_get_options,
- .set_options = tnl_set_options,
- .get_dev_flags = vport_gen_get_dev_flags,
- .is_running = vport_gen_is_running,
- .get_operstate = vport_gen_get_operstate,
- .send = tnl_send,
+ .destroy = capwap_destroy,
+ .set_addr = ovs_tnl_set_addr,
+ .get_name = ovs_tnl_get_name,
+ .get_addr = ovs_tnl_get_addr,
+ .get_options = ovs_tnl_get_options,
+ .set_options = ovs_tnl_set_options,
+ .get_dev_flags = ovs_vport_gen_get_dev_flags,
+ .is_running = ovs_vport_gen_is_running,
+ .get_operstate = ovs_vport_gen_get_operstate,
+ .send = ovs_tnl_send,
};
#else
#warning CAPWAP tunneling will not be available on kernels before 2.6.26