#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
+#include <linux/icmpv6.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#include "vlan.h"
static struct kmem_cache *flow_cache;
static unsigned int hash_seed __read_mostly;
return skb->len >= skb_transport_offset(skb) + sizeof(struct icmphdr);
}
+u64 flow_used_time(unsigned long flow_jiffies)
+{
+ struct timespec cur_ts;
+ u64 cur_ms, idle_ms;
+
+ ktime_get_ts(&cur_ts);
+ idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+ cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+ cur_ts.tv_nsec / NSEC_PER_MSEC;
+
+ return cur_ms - idle_ms;
+}
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ unsigned int nh_ofs = skb_network_offset(skb);
+ unsigned int nh_len;
+ int payload_ofs;
+ struct ipv6hdr *nh;
+ uint8_t nexthdr;
+
+ if (unlikely(skb->len < nh_ofs + sizeof(*nh)))
+ return -EINVAL;
+
+ nh = ipv6_hdr(skb);
+ nexthdr = nh->nexthdr;
+ payload_ofs = (u8 *)(nh + 1) - skb->data;
+
+ ipv6_addr_copy(&key->ipv6_src, &nh->saddr);
+ ipv6_addr_copy(&key->ipv6_dst, &nh->daddr);
+ key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+ key->nw_proto = NEXTHDR_NONE;
+
+ payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr);
+ if (unlikely(payload_ofs < 0))
+ return -EINVAL;
+
+ nh_len = payload_ofs - nh_ofs;
+
+ /* Pull enough header bytes to account for the IP header plus the
+ * longest transport header that we parse, currently 20 bytes for TCP.
+ * To dig deeper than the transport header, transport parsers may need
+ * to pull more header bytes.
+ */
+ if (unlikely(!pskb_may_pull(skb, min(nh_ofs + nh_len + 20, skb->len))))
+ return -ENOMEM;
+
+ skb_set_transport_header(skb, nh_ofs + nh_len);
+ key->nw_proto = nexthdr;
+ return nh_len;
+}
+
+static bool icmp6hdr_ok(struct sk_buff *skb)
+{
+ return skb->len >= skb_transport_offset(skb) + sizeof(struct icmp6hdr);
+}
+
#define TCP_FLAGS_OFFSET 13
#define TCP_FLAG_MASK 0x3f
spin_unlock_bh(&flow->lock);
}
-struct sw_flow_actions *flow_actions_alloc(u32 actions_len)
+struct sw_flow_actions *flow_actions_alloc(const struct nlattr *actions)
{
+ int actions_len = nla_len(actions);
struct sw_flow_actions *sfa;
- if (actions_len % NLA_ALIGNTO)
- return ERR_PTR(-EINVAL);
-
/* At least DP_MAX_PORTS actions are required to be able to flood a
* packet to every port. Factor of 2 allows for setting VLAN tags,
* etc. */
return ERR_PTR(-ENOMEM);
sfa->actions_len = actions_len;
+ memcpy(sfa->actions, nla_data(actions), actions_len);
return sfa;
}
return llc->ethertype;
}
+static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
+ int nh_len)
+{
+ struct icmp6hdr *icmp = icmp6_hdr(skb);
+
+ /* The ICMPv6 type and code fields use the 16-bit transport port
+ * fields, so we need to store them in 16-bit network byte order.
+ */
+ key->tp_src = htons(icmp->icmp6_type);
+ key->tp_dst = htons(icmp->icmp6_code);
+
+ if (icmp->icmp6_code == 0 &&
+ (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+ icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ int icmp_len = skb->len - skb_transport_offset(skb);
+ struct nd_msg *nd;
+ int offset;
+
+ /* In order to process neighbor discovery options, we need the
+ * entire packet.
+ */
+ if (unlikely(icmp_len < sizeof(*nd)))
+ return 0;
+ if (unlikely(skb_linearize(skb)))
+ return -ENOMEM;
+
+ nd = (struct nd_msg *)skb_transport_header(skb);
+ ipv6_addr_copy(&key->nd_target, &nd->target);
+
+ icmp_len -= sizeof(*nd);
+ offset = 0;
+ while (icmp_len >= 8) {
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd->opt + offset);
+ int opt_len = nd_opt->nd_opt_len * 8;
+
+ if (unlikely(!opt_len || opt_len > icmp_len))
+ goto invalid;
+
+ /* Store the link layer address if the appropriate
+ * option is provided. It is considered an error if
+ * the same link layer option is specified twice.
+ */
+ if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
+ && opt_len == 8) {
+ if (unlikely(!is_zero_ether_addr(key->arp_sha)))
+ goto invalid;
+ memcpy(key->arp_sha,
+ &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
+ && opt_len == 8) {
+ if (unlikely(!is_zero_ether_addr(key->arp_tha)))
+ goto invalid;
+ memcpy(key->arp_tha,
+ &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ }
+
+ icmp_len -= opt_len;
+ offset += opt_len;
+ }
+ }
+
+ return 0;
+
+invalid:
+ memset(&key->nd_target, 0, sizeof(key->nd_target));
+ memset(key->arp_sha, 0, sizeof(key->arp_sha));
+ memset(key->arp_tha, 0, sizeof(key->arp_tha));
+
+ return 0;
+}
+
/**
* flow_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
* - skb->network_header: just past the Ethernet header, or just past the
* VLAN header, to the first byte of the Ethernet payload.
*
- * - skb->transport_header: If key->dl_type is ETH_P_IP on output, then just
- * past the IPv4 header, if one is present and of a correct length,
- * otherwise the same as skb->network_header. For other key->dl_type
- * values it is left untouched.
+ * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6
+ * on output, then just past the IP header, if one is present and
+ * of a correct length, otherwise the same as skb->network_header.
+ * For other key->dl_type values it is left untouched.
*/
int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
bool *is_frag)
/*
* We would really like to pull as many bytes as we could possibly
- * want to parse into the linear data area. Currently that is:
+ * want to parse into the linear data area. Currently, for IPv4,
+ * that is:
*
* 14 Ethernet header
* 4 VLAN header
/* dl_type, dl_vlan, dl_vlan_pcp. */
__skb_pull(skb, 2 * ETH_ALEN);
- if (eth->h_proto == htons(ETH_P_8021Q))
+
+ if (vlan_tx_tag_present(skb))
+ key->dl_tci = htons(vlan_get_tci(skb));
+ else if (eth->h_proto == htons(ETH_P_8021Q))
parse_vlan(skb, key);
+
key->dl_type = parse_ethertype(skb);
skb_reset_network_header(skb);
__skb_push(skb, skb->data - (unsigned char *)eth);
}
nh = ip_hdr(skb);
- key->nw_src = nh->saddr;
- key->nw_dst = nh->daddr;
+ key->ipv4_src = nh->saddr;
+ key->ipv4_dst = nh->daddr;
key->nw_tos = nh->tos & ~INET_ECN_MASK;
key->nw_proto = nh->protocol;
if (key->nw_proto == ARPOP_REQUEST
|| key->nw_proto == ARPOP_REPLY) {
- memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src));
- memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst));
+ memcpy(&key->ipv4_src, arp->ar_sip, sizeof(key->ipv4_src));
+ memcpy(&key->ipv4_dst, arp->ar_tip, sizeof(key->ipv4_dst));
+ memcpy(key->arp_sha, arp->ar_sha, ETH_ALEN);
+ memcpy(key->arp_tha, arp->ar_tha, ETH_ALEN);
+ }
+ }
+ } else if (key->dl_type == htons(ETH_P_IPV6)) {
+ int nh_len; /* IPv6 Header + Extensions */
+
+ nh_len = parse_ipv6hdr(skb, key);
+ if (unlikely(nh_len < 0)) {
+ if (nh_len == -EINVAL) {
+ skb->transport_header = skb->network_header;
+ return 0;
+ }
+ return nh_len;
+ }
+
+ /* Transport layer. */
+ if (key->nw_proto == NEXTHDR_TCP) {
+ if (tcphdr_ok(skb)) {
+ struct tcphdr *tcp = tcp_hdr(skb);
+ key->tp_src = tcp->source;
+ key->tp_dst = tcp->dest;
+ }
+ } else if (key->nw_proto == NEXTHDR_UDP) {
+ if (udphdr_ok(skb)) {
+ struct udphdr *udp = udp_hdr(skb);
+ key->tp_src = udp->source;
+ key->tp_dst = udp->dest;
+ }
+ } else if (key->nw_proto == NEXTHDR_ICMP) {
+ if (icmp6hdr_ok(skb)) {
+ int error = parse_icmpv6(skb, key, nh_len);
+ if (error < 0)
+ return error;
}
}
}
* This state machine accepts the following forms, with [] for optional
* elements and | for alternatives:
*
- * [tun_id] in_port ethernet [8021q] [ethertype [IP [TCP|UDP|ICMP] | ARP]
+ * [tun_id] in_port ethernet [8021q] [ethertype \
+ * [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]]
*/
int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
{
[ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q),
[ODP_KEY_ATTR_ETHERTYPE] = 2,
[ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4),
+ [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6),
[ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp),
[ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp),
[ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp),
+ [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6),
[ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp),
+ [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd),
};
const struct odp_key_ethernet *eth_key;
const struct odp_key_8021q *q_key;
const struct odp_key_ipv4 *ipv4_key;
+ const struct odp_key_ipv6 *ipv6_key;
const struct odp_key_tcp *tcp_key;
const struct odp_key_udp *udp_key;
const struct odp_key_icmp *icmp_key;
+ const struct odp_key_icmpv6 *icmpv6_key;
const struct odp_key_arp *arp_key;
+ const struct odp_key_nd *nd_key;
int type = nla_type(nla);
if (swkey->dl_type != htons(ETH_P_IP))
return -EINVAL;
ipv4_key = nla_data(nla);
- swkey->nw_src = ipv4_key->ipv4_src;
- swkey->nw_dst = ipv4_key->ipv4_dst;
+ swkey->ipv4_src = ipv4_key->ipv4_src;
+ swkey->ipv4_dst = ipv4_key->ipv4_dst;
swkey->nw_proto = ipv4_key->ipv4_proto;
swkey->nw_tos = ipv4_key->ipv4_tos;
if (swkey->nw_tos & INET_ECN_MASK)
return -EINVAL;
break;
+ case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_IPV6):
+ if (swkey->dl_type != htons(ETH_P_IPV6))
+ return -EINVAL;
+ ipv6_key = nla_data(nla);
+ memcpy(&swkey->ipv6_src, ipv6_key->ipv6_src,
+ sizeof(swkey->ipv6_src));
+ memcpy(&swkey->ipv6_dst, ipv6_key->ipv6_dst,
+ sizeof(swkey->ipv6_dst));
+ swkey->nw_proto = ipv6_key->ipv6_proto;
+ swkey->nw_tos = ipv6_key->ipv6_tos;
+ if (swkey->nw_tos & INET_ECN_MASK)
+ return -EINVAL;
+ break;
+
case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_TCP):
+ case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_TCP):
if (swkey->nw_proto != IPPROTO_TCP)
return -EINVAL;
tcp_key = nla_data(nla);
break;
case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_UDP):
+ case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_UDP):
if (swkey->nw_proto != IPPROTO_UDP)
return -EINVAL;
udp_key = nla_data(nla);
swkey->tp_dst = htons(icmp_key->icmp_code);
break;
+ case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_ICMPV6):
+ if (swkey->nw_proto != IPPROTO_ICMPV6)
+ return -EINVAL;
+ icmpv6_key = nla_data(nla);
+ swkey->tp_src = htons(icmpv6_key->icmpv6_type);
+ swkey->tp_dst = htons(icmpv6_key->icmpv6_code);
+ break;
+
case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_ARP):
if (swkey->dl_type != htons(ETH_P_ARP))
return -EINVAL;
arp_key = nla_data(nla);
- swkey->nw_src = arp_key->arp_sip;
- swkey->nw_dst = arp_key->arp_tip;
+ swkey->ipv4_src = arp_key->arp_sip;
+ swkey->ipv4_dst = arp_key->arp_tip;
if (arp_key->arp_op & htons(0xff00))
return -EINVAL;
swkey->nw_proto = ntohs(arp_key->arp_op);
+ memcpy(swkey->arp_sha, arp_key->arp_sha, ETH_ALEN);
+ memcpy(swkey->arp_tha, arp_key->arp_tha, ETH_ALEN);
+ break;
+
+ case TRANSITION(ODP_KEY_ATTR_ICMPV6, ODP_KEY_ATTR_ND):
+ if (swkey->tp_src != htons(NDISC_NEIGHBOUR_SOLICITATION)
+ && swkey->tp_src != htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+ return -EINVAL;
+ nd_key = nla_data(nla);
+ memcpy(&swkey->nd_target, nd_key->nd_target,
+ sizeof(swkey->nd_target));
+ memcpy(swkey->arp_sha, nd_key->nd_sll, ETH_ALEN);
+ memcpy(swkey->arp_tha, nd_key->nd_tll, ETH_ALEN);
break;
default:
return -EINVAL;
return 0;
+ case ODP_KEY_ATTR_IPV6:
+ if (swkey->nw_proto == IPPROTO_TCP ||
+ swkey->nw_proto == IPPROTO_UDP ||
+ swkey->nw_proto == IPPROTO_ICMPV6)
+ return -EINVAL;
+ return 0;
+
+ case ODP_KEY_ATTR_ICMPV6:
+ if (swkey->tp_src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ swkey->tp_src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+ return -EINVAL;
+ return 0;
+
case ODP_KEY_ATTR_TCP:
case ODP_KEY_ATTR_UDP:
case ODP_KEY_ATTR_ICMP:
case ODP_KEY_ATTR_ARP:
+ case ODP_KEY_ATTR_ND:
return 0;
}
struct odp_key_ethernet *eth_key;
struct nlattr *nla;
+ /* This is an imperfect sanity-check that FLOW_BUFSIZE doesn't need
+ * to be updated, but will at least raise awareness when new ODP key
+ * types are added. */
+ BUILD_BUG_ON(__ODP_KEY_ATTR_MAX != 14);
+
if (swkey->tun_id != cpu_to_be64(0))
NLA_PUT_BE64(skb, ODP_KEY_ATTR_TUN_ID, swkey->tun_id);
if (!nla)
goto nla_put_failure;
ipv4_key = nla_data(nla);
- ipv4_key->ipv4_src = swkey->nw_src;
- ipv4_key->ipv4_dst = swkey->nw_dst;
+ memset(ipv4_key, 0, sizeof(struct odp_key_ipv4));
+ ipv4_key->ipv4_src = swkey->ipv4_src;
+ ipv4_key->ipv4_dst = swkey->ipv4_dst;
ipv4_key->ipv4_proto = swkey->nw_proto;
ipv4_key->ipv4_tos = swkey->nw_tos;
+ } else if (swkey->dl_type == htons(ETH_P_IPV6)) {
+ struct odp_key_ipv6 *ipv6_key;
+
+ nla = nla_reserve(skb, ODP_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv6_key = nla_data(nla);
+ memset(ipv6_key, 0, sizeof(struct odp_key_ipv6));
+ memcpy(ipv6_key->ipv6_src, &swkey->ipv6_src,
+ sizeof(ipv6_key->ipv6_src));
+ memcpy(ipv6_key->ipv6_dst, &swkey->ipv6_dst,
+ sizeof(ipv6_key->ipv6_dst));
+ ipv6_key->ipv6_proto = swkey->nw_proto;
+ ipv6_key->ipv6_tos = swkey->nw_tos;
+ } else if (swkey->dl_type == htons(ETH_P_ARP)) {
+ struct odp_key_arp *arp_key;
+
+ nla = nla_reserve(skb, ODP_KEY_ATTR_ARP, sizeof(*arp_key));
+ if (!nla)
+ goto nla_put_failure;
+ arp_key = nla_data(nla);
+ memset(arp_key, 0, sizeof(struct odp_key_arp));
+ arp_key->arp_sip = swkey->ipv4_src;
+ arp_key->arp_tip = swkey->ipv4_dst;
+ arp_key->arp_op = htons(swkey->nw_proto);
+ memcpy(arp_key->arp_sha, swkey->arp_sha, ETH_ALEN);
+ memcpy(arp_key->arp_tha, swkey->arp_tha, ETH_ALEN);
+ }
+
+ if (swkey->dl_type == htons(ETH_P_IP) ||
+ swkey->dl_type == htons(ETH_P_IPV6)) {
if (swkey->nw_proto == IPPROTO_TCP) {
struct odp_key_tcp *tcp_key;
udp_key = nla_data(nla);
udp_key->udp_src = swkey->tp_src;
udp_key->udp_dst = swkey->tp_dst;
- } else if (swkey->nw_proto == IPPROTO_ICMP) {
+ } else if (swkey->dl_type == htons(ETH_P_IP) &&
+ swkey->nw_proto == IPPROTO_ICMP) {
struct odp_key_icmp *icmp_key;
nla = nla_reserve(skb, ODP_KEY_ATTR_ICMP, sizeof(*icmp_key));
icmp_key = nla_data(nla);
icmp_key->icmp_type = ntohs(swkey->tp_src);
icmp_key->icmp_code = ntohs(swkey->tp_dst);
- }
- } else if (swkey->dl_type == htons(ETH_P_ARP)) {
- struct odp_key_arp *arp_key;
+ } else if (swkey->dl_type == htons(ETH_P_IPV6) &&
+ swkey->nw_proto == IPPROTO_ICMPV6) {
+ struct odp_key_icmpv6 *icmpv6_key;
- nla = nla_reserve(skb, ODP_KEY_ATTR_ARP, sizeof(*arp_key));
- if (!nla)
- goto nla_put_failure;
- arp_key = nla_data(nla);
- arp_key->arp_sip = swkey->nw_src;
- arp_key->arp_tip = swkey->nw_dst;
- arp_key->arp_op = htons(swkey->nw_proto);
+ nla = nla_reserve(skb, ODP_KEY_ATTR_ICMPV6,
+ sizeof(*icmpv6_key));
+ if (!nla)
+ goto nla_put_failure;
+ icmpv6_key = nla_data(nla);
+ icmpv6_key->icmpv6_type = ntohs(swkey->tp_src);
+ icmpv6_key->icmpv6_code = ntohs(swkey->tp_dst);
+
+ if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+ icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ struct odp_key_nd *nd_key;
+
+ nla = nla_reserve(skb, ODP_KEY_ATTR_ND, sizeof(*nd_key));
+ if (!nla)
+ goto nla_put_failure;
+ nd_key = nla_data(nla);
+ memcpy(nd_key->nd_target, &swkey->nd_target,
+ sizeof(nd_key->nd_target));
+ memcpy(nd_key->nd_sll, swkey->arp_sha, ETH_ALEN);
+ memcpy(nd_key->nd_tll, swkey->arp_tha, ETH_ALEN);
+ }
+ }
}
return 0;