#include "flow.h"
#include "datapath.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
return 0;
}
-static inline bool arphdr_ok(struct sk_buff *skb)
+static bool arphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_network_offset(skb) +
sizeof(struct arp_eth_header));
}
-static inline int check_iphdr(struct sk_buff *skb)
+static int check_iphdr(struct sk_buff *skb)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int ip_len;
return 0;
}
-static inline bool tcphdr_ok(struct sk_buff *skb)
+static bool tcphdr_ok(struct sk_buff *skb)
{
int th_ofs = skb_transport_offset(skb);
int tcp_len;
return true;
}
-static inline bool udphdr_ok(struct sk_buff *skb)
+static bool udphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct udphdr));
}
-static inline bool icmphdr_ok(struct sk_buff *skb)
+static bool icmphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct icmphdr));
}
#define SW_FLOW_KEY_OFFSET(field) \
- offsetof(struct sw_flow_key, field) + \
- FIELD_SIZEOF(struct sw_flow_key, field)
+ (offsetof(struct sw_flow_key, field) + \
+ FIELD_SIZEOF(struct sw_flow_key, field))
/**
* skip_exthdr - skip any IPv6 extension headers
* @nexthdrp: Initially, points to the type of the extension header at @start.
* This function updates it to point to the extension header at the final
* offset.
- * @tos_frag: Points to the @tos_frag member in a &struct sw_flow_key. This
+ * @frag: Points to the @frag member in a &struct sw_flow_key. This
* function sets an appropriate %OVS_FRAG_TYPE_* value.
*
- * This is based on ipv6_skip_exthdr() but adds the updates to *@tos_frag.
+ * This is based on ipv6_skip_exthdr() but adds the updates to *@frag.
*
* When there is more than one fragment header, this version reports whether
* the final fragment header that it examines is a first fragment.
* Returns the final payload offset, or -1 on error.
*/
static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
- u8 *tos_frag)
+ u8 *frag)
{
u8 nexthdr = *nexthdrp;
if (fp == NULL)
return -1;
- *tos_frag &= ~OVS_FRAG_TYPE_MASK;
if (ntohs(*fp) & ~0x7) {
- *tos_frag |= OVS_FRAG_TYPE_LATER;
+ *frag = OVS_FRAG_TYPE_LATER;
break;
}
- *tos_frag |= OVS_FRAG_TYPE_FIRST;
+ *frag = OVS_FRAG_TYPE_FIRST;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hp->hdrlen+2)<<2;
uint8_t nexthdr;
int err;
- *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.addr);
+ *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
err = check_header(skb, nh_ofs + sizeof(*nh));
if (unlikely(err))
payload_ofs = (u8 *)(nh + 1) - skb->data;
key->ip.proto = NEXTHDR_NONE;
- key->ip.tos_frag = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+ key->ip.tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+ key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr);
ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr);
- payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.tos_frag);
+ payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.frag);
if (unlikely(payload_ofs < 0))
return -EINVAL;
static struct flex_array __rcu *alloc_buckets(unsigned int n_buckets)
{
- struct flex_array __rcu * buckets;
+ struct flex_array __rcu *buckets;
int i, err;
buckets = flex_array_alloc(sizeof(struct hlist_head *),
return buckets;
}
-static void free_buckets(struct flex_array * buckets)
+static void free_buckets(struct flex_array *buckets)
{
flex_array_free(buckets);
}
void flow_tbl_deferred_destroy(struct flow_table *table)
{
- if (!table)
- return;
+ if (!table)
+ return;
- call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
}
struct sw_flow *flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
icmp_len -= sizeof(*nd);
offset = 0;
while (icmp_len >= 8) {
- struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd->opt + offset);
+ struct nd_opt_hdr *nd_opt =
+ (struct nd_opt_hdr *)(nd->opt + offset);
int opt_len = nd_opt->nd_opt_len * 8;
if (unlikely(!opt_len || opt_len > icmp_len))
struct ethhdr *eth;
memset(key, 0, sizeof(*key));
- key->eth.tun_id = OVS_CB(skb)->tun_id;
- key->eth.in_port = in_port;
+
+ key->phy.priority = skb->priority;
+ key->phy.tun_id = OVS_CB(skb)->tun_id;
+ key->phy.in_port = in_port;
skb_reset_mac_header(skb);
key->ipv4.addr.dst = nh->daddr;
key->ip.proto = nh->protocol;
- key->ip.tos_frag = nh->tos & ~INET_ECN_MASK;
+ key->ip.tos = nh->tos & ~INET_ECN_MASK;
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
- key->ip.tos_frag |= OVS_FRAG_TYPE_LATER;
+ key->ip.frag = OVS_FRAG_TYPE_LATER;
goto out;
}
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
- key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
- * transport port fields, so we need to store them
- * in 16-bit network byte order. */
+ * transport port fields, so we need to store
+ * them in 16-bit network byte order. */
key->ipv4.tp.src = htons(icmp->type);
key->ipv4.tp.dst = htons(icmp->code);
}
goto out;
}
- if ((key->ip.tos_frag & OVS_FRAG_TYPE_MASK) == OVS_FRAG_TYPE_LATER)
+ if (key->ip.frag == OVS_FRAG_TYPE_LATER)
goto out;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
- key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
u32 flow_hash(const struct sw_flow_key *key, int key_len)
{
- return jhash2((u32*)key, DIV_ROUND_UP(key_len, sizeof(u32)), hash_seed);
+ return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), hash_seed);
}
-struct sw_flow * flow_tbl_lookup(struct flow_table *table,
+struct sw_flow *flow_tbl_lookup(struct flow_table *table,
struct sw_flow_key *key, int key_len)
{
struct sw_flow *flow;
}
}
-static int parse_tos_frag(struct sw_flow_key *swkey, u8 tos, u8 frag)
-{
- if (tos & INET_ECN_MASK || frag > OVS_FRAG_TYPE_MAX)
- return -EINVAL;
-
- swkey->ip.tos_frag = tos | frag;
- return 0;
-}
-
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+ [OVS_KEY_ATTR_PRIORITY] = 4,
[OVS_KEY_ATTR_TUN_ID] = 8,
[OVS_KEY_ATTR_IN_PORT] = 4,
[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
* This state machine accepts the following forms, with [] for optional
* elements and | for alternatives:
*
- * [tun_id] [in_port] ethernet [8021q] [ethertype \
+ * [priority] [tun_id] [in_port] ethernet [8021q] [ethertype \
* [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]]
*
* except that IPv4 or IPv6 terminates the sequence if its @ipv4_frag or
const struct nlattr *attr)
{
int error = 0;
- enum ovs_frag_type frag_type;
const struct nlattr *nla;
u16 prev_type;
int rem;
int key_len;
memset(swkey, 0, sizeof(*swkey));
- swkey->eth.in_port = USHRT_MAX;
+ swkey->phy.in_port = USHRT_MAX;
swkey->eth.type = htons(ETH_P_802_2);
key_len = SW_FLOW_KEY_OFFSET(eth);
const struct ovs_key_arp *arp_key;
const struct ovs_key_nd *nd_key;
- int type = nla_type(nla);
+ int type = nla_type(nla);
- if (type > OVS_KEY_ATTR_MAX || nla_len(nla) != ovs_key_lens[type])
+ if (type > OVS_KEY_ATTR_MAX ||
+ nla_len(nla) != ovs_key_lens[type])
goto invalid;
#define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE))
switch (TRANSITION(prev_type, type)) {
+ case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_PRIORITY):
+ swkey->phy.priority = nla_get_u32(nla);
+ break;
+
case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_TUN_ID):
- swkey->eth.tun_id = nla_get_be64(nla);
+ case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_TUN_ID):
+ swkey->phy.tun_id = nla_get_be64(nla);
break;
case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_IN_PORT):
+ case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_IN_PORT):
case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_IN_PORT):
if (nla_get_u32(nla) >= DP_MAX_PORTS)
goto invalid;
- swkey->eth.in_port = nla_get_u32(nla);
+ swkey->phy.in_port = nla_get_u32(nla);
break;
case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_ETHERNET):
+ case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_ETHERNET):
case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_ETHERNET):
case TRANSITION(OVS_KEY_ATTR_IN_PORT, OVS_KEY_ATTR_ETHERNET):
eth_key = nla_data(nla);
if (swkey->eth.type != htons(ETH_P_IP))
goto invalid;
ipv4_key = nla_data(nla);
- swkey->ip.proto = ipv4_key->ipv4_proto;
- if (parse_tos_frag(swkey, ipv4_key->ipv4_tos,
- ipv4_key->ipv4_frag))
+ if (ipv4_key->ipv4_tos & INET_ECN_MASK)
goto invalid;
+ if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
+ goto invalid;
+ swkey->ip.proto = ipv4_key->ipv4_proto;
+ swkey->ip.tos = ipv4_key->ipv4_tos;
+ swkey->ip.frag = ipv4_key->ipv4_frag;
swkey->ipv4.addr.src = ipv4_key->ipv4_src;
swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
break;
case TRANSITION(OVS_KEY_ATTR_ETHERTYPE, OVS_KEY_ATTR_IPV6):
- key_len = SW_FLOW_KEY_OFFSET(ipv6.addr);
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
if (swkey->eth.type != htons(ETH_P_IPV6))
goto invalid;
ipv6_key = nla_data(nla);
- swkey->ip.proto = ipv6_key->ipv6_proto;
- if (parse_tos_frag(swkey, ipv6_key->ipv6_tos,
- ipv6_key->ipv6_frag))
+ if (ipv6_key->ipv6_tos & INET_ECN_MASK)
+ goto invalid;
+ if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
goto invalid;
+ swkey->ipv6.label = ipv6_key->ipv6_label;
+ swkey->ip.proto = ipv6_key->ipv6_proto;
+ swkey->ip.tos = ipv6_key->ipv6_tos;
+ swkey->ip.frag = ipv6_key->ipv6_frag;
memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
sizeof(swkey->ipv6.addr.src));
memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
if (rem)
goto invalid;
- frag_type = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
switch (prev_type) {
case OVS_KEY_ATTR_UNSPEC:
goto invalid;
+ case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_TUN_ID:
case OVS_KEY_ATTR_IN_PORT:
goto invalid;
case OVS_KEY_ATTR_ETHERTYPE:
if (swkey->eth.type == htons(ETH_P_IP) ||
+ swkey->eth.type == htons(ETH_P_IPV6) ||
swkey->eth.type == htons(ETH_P_ARP))
goto invalid;
goto ok;
case OVS_KEY_ATTR_IPV4:
- if (frag_type == OVS_FRAG_TYPE_LATER)
+ if (swkey->ip.frag == OVS_FRAG_TYPE_LATER)
goto ok;
if (swkey->ip.proto == IPPROTO_TCP ||
swkey->ip.proto == IPPROTO_UDP ||
goto ok;
case OVS_KEY_ATTR_IPV6:
- if (frag_type == OVS_FRAG_TYPE_LATER)
+ if (swkey->ip.frag == OVS_FRAG_TYPE_LATER)
goto ok;
if (swkey->ip.proto == IPPROTO_TCP ||
swkey->ip.proto == IPPROTO_UDP ||
case OVS_KEY_ATTR_ICMPV6:
if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT) ||
- frag_type == OVS_FRAG_TYPE_LATER)
+ swkey->ip.frag == OVS_FRAG_TYPE_LATER)
goto invalid;
goto ok;
case OVS_KEY_ATTR_UDP:
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ND:
- if (frag_type == OVS_FRAG_TYPE_LATER)
+ if (swkey->ip.frag == OVS_FRAG_TYPE_LATER)
goto invalid;
goto ok;
error = -EINVAL;
ok:
- WARN_ON_ONCE(!key_len && !error);
*key_lenp = key_len;
return error;
}
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
-int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id,
+int flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
const struct nlattr *attr)
{
const struct nlattr *nla;
*in_port = USHRT_MAX;
*tun_id = 0;
+ *priority = 0;
prev_type = OVS_KEY_ATTR_UNSPEC;
nla_for_each_nested(nla, attr, rem) {
- int type = nla_type(nla);
+ int type = nla_type(nla);
if (type > OVS_KEY_ATTR_MAX || nla_len(nla) != ovs_key_lens[type])
return -EINVAL;
switch (TRANSITION(prev_type, type)) {
+ case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_PRIORITY):
+ *priority = nla_get_u32(nla);
+ break;
+
case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_TUN_ID):
+ case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_TUN_ID):
*tun_id = nla_get_be64(nla);
break;
case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_IN_PORT):
+ case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_IN_PORT):
case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_IN_PORT):
if (nla_get_u32(nla) >= DP_MAX_PORTS)
return -EINVAL;
struct ovs_key_ethernet *eth_key;
struct nlattr *nla;
- /* This is an imperfect sanity-check that FLOW_BUFSIZE doesn't need
- * to be updated, but will at least raise awareness when new
- * datapath key types are added. */
- BUILD_BUG_ON(__OVS_KEY_ATTR_MAX != 14);
+ if (swkey->phy.priority)
+ NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority);
- if (swkey->eth.tun_id != cpu_to_be64(0))
- NLA_PUT_BE64(skb, OVS_KEY_ATTR_TUN_ID, swkey->eth.tun_id);
+ if (swkey->phy.tun_id != cpu_to_be64(0))
+ NLA_PUT_BE64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id);
- if (swkey->eth.in_port != USHRT_MAX)
- NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->eth.in_port);
+ if (swkey->phy.in_port != USHRT_MAX)
+ NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port);
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
ipv4_key->ipv4_src = swkey->ipv4.addr.src;
ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
ipv4_key->ipv4_proto = swkey->ip.proto;
- ipv4_key->ipv4_tos = swkey->ip.tos_frag & ~INET_ECN_MASK;
- ipv4_key->ipv4_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
+ ipv4_key->ipv4_tos = swkey->ip.tos & ~INET_ECN_MASK;
+ ipv4_key->ipv4_frag = swkey->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
sizeof(ipv6_key->ipv6_src));
memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
sizeof(ipv6_key->ipv6_dst));
+ ipv6_key->ipv6_label = swkey->ipv6.label;
ipv6_key->ipv6_proto = swkey->ip.proto;
- ipv6_key->ipv6_tos = swkey->ip.tos_frag & ~INET_ECN_MASK;
- ipv6_key->ipv6_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
+ ipv6_key->ipv6_tos = swkey->ip.tos & ~INET_ECN_MASK;
+ ipv6_key->ipv6_frag = swkey->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_ARP)) {
struct ovs_key_arp *arp_key;
if ((swkey->eth.type == htons(ETH_P_IP) ||
swkey->eth.type == htons(ETH_P_IPV6)) &&
- (swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK) != OVS_FRAG_TYPE_LATER) {
+ swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
if (swkey->ip.proto == IPPROTO_TCP) {
struct ovs_key_tcp *tcp_key;