util: Suppress build assertions when building with sparse.
[openvswitch] / datapath / flow.c
index 4b0e6cc310c4bdac563a616737f5d8bfa1853920..d678979b3715a8bd3255398bd95859844a357112 100644 (file)
 static struct kmem_cache *flow_cache;
 static unsigned int hash_seed __read_mostly;
 
+static int check_header(struct sk_buff *skb, int len)
+{
+       if (unlikely(skb->len < len))
+               return -EINVAL;
+       if (unlikely(!pskb_may_pull(skb, len)))
+               return -ENOMEM;
+       return 0;
+}
+
 static inline bool arphdr_ok(struct sk_buff *skb)
 {
-       return skb->len >= skb_network_offset(skb) + sizeof(struct arp_eth_header);
+       return pskb_may_pull(skb, skb_network_offset(skb) +
+                                 sizeof(struct arp_eth_header));
 }
 
 static inline int check_iphdr(struct sk_buff *skb)
 {
        unsigned int nh_ofs = skb_network_offset(skb);
        unsigned int ip_len;
+       int err;
 
-       if (skb->len < nh_ofs + sizeof(struct iphdr))
-               return -EINVAL;
+       err = check_header(skb, nh_ofs + sizeof(struct iphdr));
+       if (unlikely(err))
+               return err;
 
        ip_len = ip_hdrlen(skb);
-       if (ip_len < sizeof(struct iphdr) || skb->len < nh_ofs + ip_len)
+       if (unlikely(ip_len < sizeof(struct iphdr) ||
+                    skb->len < nh_ofs + ip_len))
                return -EINVAL;
 
-       /*
-        * Pull enough header bytes to account for the IP header plus the
-        * longest transport header that we parse, currently 20 bytes for TCP.
-        */
-       if (!pskb_may_pull(skb, min(nh_ofs + ip_len + 20, skb->len)))
-               return -ENOMEM;
-
        skb_set_transport_header(skb, nh_ofs + ip_len);
        return 0;
 }
@@ -70,22 +76,29 @@ static inline int check_iphdr(struct sk_buff *skb)
 static inline bool tcphdr_ok(struct sk_buff *skb)
 {
        int th_ofs = skb_transport_offset(skb);
-       if (skb->len >= th_ofs + sizeof(struct tcphdr)) {
-               int tcp_len = tcp_hdrlen(skb);
-               return (tcp_len >= sizeof(struct tcphdr)
-                       && skb->len >= th_ofs + tcp_len);
-       }
-       return false;
+       int tcp_len;
+
+       if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
+               return false;
+
+       tcp_len = tcp_hdrlen(skb);
+       if (unlikely(tcp_len < sizeof(struct tcphdr) ||
+                    skb->len < th_ofs + tcp_len))
+               return false;
+
+       return true;
 }
 
 static inline bool udphdr_ok(struct sk_buff *skb)
 {
-       return skb->len >= skb_transport_offset(skb) + sizeof(struct udphdr);
+       return pskb_may_pull(skb, skb_transport_offset(skb) +
+                                 sizeof(struct udphdr));
 }
 
 static inline bool icmphdr_ok(struct sk_buff *skb)
 {
-       return skb->len >= skb_transport_offset(skb) + sizeof(struct icmphdr);
+       return pskb_may_pull(skb, skb_transport_offset(skb) +
+                                 sizeof(struct icmphdr));
 }
 
 u64 flow_used_time(unsigned long flow_jiffies)
@@ -106,49 +119,28 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
        unsigned int nh_ofs = skb_network_offset(skb);
        unsigned int nh_len;
        int payload_ofs;
-       int payload_len;
        struct ipv6hdr *nh;
        uint8_t nexthdr;
+       int err;
 
-       if (unlikely(skb->len < nh_ofs + sizeof(*nh)))
-               return -EINVAL;
+       err = check_header(skb, nh_ofs + sizeof(*nh));
+       if (unlikely(err))
+               return err;
 
        nh = ipv6_hdr(skb);
        nexthdr = nh->nexthdr;
        payload_ofs = (u8 *)(nh + 1) - skb->data;
-       payload_len = ntohs(nh->payload_len);
 
-       memcpy(key->ipv6_src, nh->saddr.in6_u.u6_addr8, sizeof(key->ipv6_src));
-       memcpy(key->ipv6_dst, nh->daddr.in6_u.u6_addr8, sizeof(key->ipv6_dst));
+       ipv6_addr_copy(&key->ipv6_src, &nh->saddr);
+       ipv6_addr_copy(&key->ipv6_dst, &nh->daddr);
        key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
        key->nw_proto = NEXTHDR_NONE;
 
-       /* We don't process jumbograms. */
-       if (!payload_len)
-               return -EINVAL;
-
-       if (unlikely(skb->len < nh_ofs + sizeof(*nh) + payload_len))
-               return -EINVAL;
-
        payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr);
-       if (payload_ofs < 0) {
-               return -EINVAL;
-       }
-       nh_len = payload_ofs - nh_ofs;
-
-       /* Ensure that the payload length claimed is at least large enough
-        * for the headers we've already processed. */
-       if (payload_len < nh_len - sizeof(*nh))
+       if (unlikely(payload_ofs < 0))
                return -EINVAL;
 
-       /* Pull enough header bytes to account for the IP header plus the
-        * longest transport header that we parse, currently 20 bytes for TCP.
-        * To dig deeper than the transport header, transport parsers may need
-        * to pull more header bytes.
-        */
-       if (unlikely(!pskb_may_pull(skb, min(nh_ofs + nh_len + 20, skb->len))))
-               return -ENOMEM;
-
+       nh_len = payload_ofs - nh_ofs;
        skb_set_transport_header(skb, nh_ofs + nh_len);
        key->nw_proto = nexthdr;
        return nh_len;
@@ -156,7 +148,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 
 static bool icmp6hdr_ok(struct sk_buff *skb)
 {
-       return skb->len >= skb_transport_offset(skb) + sizeof(struct icmp6hdr);
+       return pskb_may_pull(skb, skb_transport_offset(skb) +
+                                 sizeof(struct icmp6hdr));
 }
 
 #define TCP_FLAGS_OFFSET 13
@@ -210,6 +203,7 @@ struct sw_flow *flow_alloc(void)
 
        spin_lock_init(&flow->lock);
        atomic_set(&flow->refcnt, 1);
+       flow->sf_acts = NULL;
        flow->dead = false;
 
        return flow;
@@ -270,7 +264,7 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
        call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
 }
 
-static void parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
        struct qtag_prefix {
                __be16 eth_type; /* ETH_P_8021Q */
@@ -278,12 +272,15 @@ static void parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
        };
        struct qtag_prefix *qp;
 
-       if (skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))
-               return;
+       if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
+                                        sizeof(__be16))))
+               return -ENOMEM;
 
        qp = (struct qtag_prefix *) skb->data;
        key->dl_tci = qp->tci | htons(VLAN_TAG_PRESENT);
        __skb_pull(skb, sizeof(struct qtag_prefix));
+
+       return 0;
 }
 
 static __be16 parse_ethertype(struct sk_buff *skb)
@@ -304,9 +301,12 @@ static __be16 parse_ethertype(struct sk_buff *skb)
        if (ntohs(proto) >= 1536)
                return proto;
 
-       if (unlikely(skb->len < sizeof(struct llc_snap_hdr)))
+       if (skb->len < sizeof(struct llc_snap_hdr))
                return htons(ETH_P_802_2);
 
+       if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
+               return htons(0);
+
        llc = (struct llc_snap_hdr *) skb->data;
        if (llc->dsap != LLC_SAP_SNAP ||
            llc->ssap != LLC_SAP_SNAP ||
@@ -318,32 +318,33 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-               int nh_len)
+                       int nh_len)
 {
-       struct ipv6hdr *nh = ipv6_hdr(skb);
-       int icmp_len = ntohs(nh->payload_len) + sizeof(*nh) - nh_len;
        struct icmp6hdr *icmp = icmp6_hdr(skb);
 
        /* The ICMPv6 type and code fields use the 16-bit transport port
-        * fields, so we need to store them in 16-bit network byte order. */
+        * fields, so we need to store them in 16-bit network byte order.
+        */
        key->tp_src = htons(icmp->icmp6_type);
        key->tp_dst = htons(icmp->icmp6_code);
 
-       if (!icmp->icmp6_code
-                       && ((icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
-                         || (icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT))) {
+       if (icmp->icmp6_code == 0 &&
+           (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+            icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+               int icmp_len = skb->len - skb_transport_offset(skb);
                struct nd_msg *nd;
                int offset;
 
                /* In order to process neighbor discovery options, we need the
-                * entire packet. */
-               if (icmp_len < sizeof(*nd))
-                       goto invalid;
-               if (!pskb_may_pull(skb, skb_transport_offset(skb) + icmp_len))
+                * entire packet.
+                */
+               if (unlikely(icmp_len < sizeof(*nd)))
+                       return 0;
+               if (unlikely(skb_linearize(skb)))
                        return -ENOMEM;
 
                nd = (struct nd_msg *)skb_transport_header(skb);
-               memcpy(key->nd_target, &nd->target, sizeof(key->nd_target));
+               ipv6_addr_copy(&key->nd_target, &nd->target);
 
                icmp_len -= sizeof(*nd);
                offset = 0;
@@ -351,24 +352,25 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
                        struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd->opt + offset);
                        int opt_len = nd_opt->nd_opt_len * 8;
 
-                       if (!opt_len || (opt_len > icmp_len))
+                       if (unlikely(!opt_len || opt_len > icmp_len))
                                goto invalid;
 
-                       /* Store the link layer address if the appropriate option is
-                        * provided.  It is considered an error if the same link
-                        * layer option is specified twice. */
+                       /* Store the link layer address if the appropriate
+                        * option is provided.  It is considered an error if
+                        * the same link layer option is specified twice.
+                        */
                        if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
-                                       && opt_len == 8) {
-                               if (!is_zero_ether_addr(key->arp_sha))
+                           && opt_len == 8) {
+                               if (unlikely(!is_zero_ether_addr(key->arp_sha)))
                                        goto invalid;
                                memcpy(key->arp_sha,
-                                               &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+                                   &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
                        } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
-                                       && opt_len == 8) {
-                               if (!is_zero_ether_addr(key->arp_tha))
+                                  && opt_len == 8) {
+                               if (unlikely(!is_zero_ether_addr(key->arp_tha)))
                                        goto invalid;
                                memcpy(key->arp_tha,
-                                               &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+                                   &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
                        }
 
                        icmp_len -= opt_len;
@@ -379,7 +381,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
        return 0;
 
 invalid:
-       memset(key->nd_target, 0, sizeof(key->nd_target));
+       memset(&key->nd_target, 0, sizeof(key->nd_target));
        memset(key->arp_sha, 0, sizeof(key->arp_sha));
        memset(key->arp_tha, 0, sizeof(key->arp_tha));
 
@@ -421,45 +423,28 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
        key->in_port = in_port;
        *is_frag = false;
 
-       /*
-        * We would really like to pull as many bytes as we could possibly
-        * want to parse into the linear data area.  Currently, for IPv4,
-        * that is:
-        *
-        *    14     Ethernet header
-        *     4     VLAN header
-        *    60     max IP header with options
-        *    20     max TCP/UDP/ICMP header (don't care about options)
-        *    --
-        *    98
-        *
-        * But Xen only allocates 64 or 72 bytes for the linear data area in
-        * netback, which means that we would reallocate and copy the skb's
-        * linear data on every packet if we did that.  So instead just pull 64
-        * bytes, which is always sufficient without IP options, and then check
-        * whether we need to pull more later when we look at the IP header.
-        */
-       if (!pskb_may_pull(skb, min(skb->len, 64u)))
-               return -ENOMEM;
-
        skb_reset_mac_header(skb);
 
-       /* Link layer. */
+       /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
+        * header in the linear data area.
+        */
        eth = eth_hdr(skb);
        memcpy(key->dl_src, eth->h_source, ETH_ALEN);
        memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
-
-       /* dl_type, dl_vlan, dl_vlan_pcp. */
        __skb_pull(skb, 2 * ETH_ALEN);
 
        if (vlan_tx_tag_present(skb))
                key->dl_tci = htons(vlan_get_tci(skb));
        else if (eth->h_proto == htons(ETH_P_8021Q))
-               parse_vlan(skb, key);
+               if (unlikely(parse_vlan(skb, key)))
+                       return -ENOMEM;
 
        key->dl_type = parse_ethertype(skb);
+       if (unlikely(key->dl_type == htons(0)))
+               return -ENOMEM;
+
        skb_reset_network_header(skb);
-       __skb_push(skb, skb->data - (unsigned char *)eth);
+       __skb_push(skb, skb->data - skb_mac_header(skb));
 
        /* Network layer. */
        if (key->dl_type == htons(ETH_P_IP)) {
@@ -687,9 +672,9 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
                        if (swkey->dl_type != htons(ETH_P_IPV6))
                                return -EINVAL;
                        ipv6_key = nla_data(nla);
-                       memcpy(swkey->ipv6_src, ipv6_key->ipv6_src,
+                       memcpy(&swkey->ipv6_src, ipv6_key->ipv6_src,
                                        sizeof(swkey->ipv6_src));
-                       memcpy(swkey->ipv6_dst, ipv6_key->ipv6_dst,
+                       memcpy(&swkey->ipv6_dst, ipv6_key->ipv6_dst,
                                        sizeof(swkey->ipv6_dst));
                        swkey->nw_proto = ipv6_key->ipv6_proto;
                        swkey->nw_tos = ipv6_key->ipv6_tos;
@@ -746,10 +731,10 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr)
 
                case TRANSITION(ODP_KEY_ATTR_ICMPV6, ODP_KEY_ATTR_ND):
                        if (swkey->tp_src != htons(NDISC_NEIGHBOUR_SOLICITATION)
-                                       && swkey->tp_src != htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+                           && swkey->tp_src != htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
                                return -EINVAL;
                        nd_key = nla_data(nla);
-                       memcpy(swkey->nd_target, nd_key->nd_target,
+                       memcpy(&swkey->nd_target, nd_key->nd_target,
                                        sizeof(swkey->nd_target));
                        memcpy(swkey->arp_sha, nd_key->nd_sll, ETH_ALEN);
                        memcpy(swkey->arp_tha, nd_key->nd_tll, ETH_ALEN);
@@ -819,6 +804,11 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
        struct odp_key_ethernet *eth_key;
        struct nlattr *nla;
 
+       /* This is an imperfect sanity-check that FLOW_BUFSIZE doesn't need
+        * to be updated, but will at least raise awareness when new ODP key
+        * types are added. */
+       BUILD_BUG_ON(__ODP_KEY_ATTR_MAX != 14);
+
        if (swkey->tun_id != cpu_to_be64(0))
                NLA_PUT_BE64(skb, ODP_KEY_ATTR_TUN_ID, swkey->tun_id);
 
@@ -864,9 +854,9 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
                        goto nla_put_failure;
                ipv6_key = nla_data(nla);
                memset(ipv6_key, 0, sizeof(struct odp_key_ipv6));
-               memcpy(ipv6_key->ipv6_src, swkey->ipv6_src,
+               memcpy(ipv6_key->ipv6_src, &swkey->ipv6_src,
                                sizeof(ipv6_key->ipv6_src));
-               memcpy(ipv6_key->ipv6_dst, swkey->ipv6_dst,
+               memcpy(ipv6_key->ipv6_dst, &swkey->ipv6_dst,
                                sizeof(ipv6_key->ipv6_dst));
                ipv6_key->ipv6_proto = swkey->nw_proto;
                ipv6_key->ipv6_tos = swkey->nw_tos;
@@ -885,8 +875,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
                memcpy(arp_key->arp_tha, swkey->arp_tha, ETH_ALEN);
        }
 
-       if (swkey->dl_type == htons(ETH_P_IP)
-                       || swkey->dl_type == htons(ETH_P_IPV6)) {
+       if (swkey->dl_type == htons(ETH_P_IP) ||
+           swkey->dl_type == htons(ETH_P_IPV6)) {
 
                if (swkey->nw_proto == IPPROTO_TCP) {
                        struct odp_key_tcp *tcp_key;
@@ -906,8 +896,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
                        udp_key = nla_data(nla);
                        udp_key->udp_src = swkey->tp_src;
                        udp_key->udp_dst = swkey->tp_dst;
-               } else if (swkey->dl_type == htons(ETH_P_IP)
-                               && swkey->nw_proto == IPPROTO_ICMP) {
+               } else if (swkey->dl_type == htons(ETH_P_IP) &&
+                          swkey->nw_proto == IPPROTO_ICMP) {
                        struct odp_key_icmp *icmp_key;
 
                        nla = nla_reserve(skb, ODP_KEY_ATTR_ICMP, sizeof(*icmp_key));
@@ -916,26 +906,27 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
                        icmp_key = nla_data(nla);
                        icmp_key->icmp_type = ntohs(swkey->tp_src);
                        icmp_key->icmp_code = ntohs(swkey->tp_dst);
-               } else if (swkey->dl_type == htons(ETH_P_IPV6)
-                               && swkey->nw_proto == IPPROTO_ICMPV6) {
+               } else if (swkey->dl_type == htons(ETH_P_IPV6) &&
+                          swkey->nw_proto == IPPROTO_ICMPV6) {
                        struct odp_key_icmpv6 *icmpv6_key;
 
-                       nla = nla_reserve(skb, ODP_KEY_ATTR_ICMPV6, sizeof(*icmpv6_key));
+                       nla = nla_reserve(skb, ODP_KEY_ATTR_ICMPV6,
+                                               sizeof(*icmpv6_key));
                        if (!nla)
                                goto nla_put_failure;
                        icmpv6_key = nla_data(nla);
                        icmpv6_key->icmpv6_type = ntohs(swkey->tp_src);
                        icmpv6_key->icmpv6_code = ntohs(swkey->tp_dst);
 
-                       if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION
-                                       || icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+                       if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+                           icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
                                struct odp_key_nd *nd_key;
 
                                nla = nla_reserve(skb, ODP_KEY_ATTR_ND, sizeof(*nd_key));
                                if (!nla)
                                        goto nla_put_failure;
                                nd_key = nla_data(nla);
-                               memcpy(nd_key->nd_target, swkey->nd_target,
+                               memcpy(nd_key->nd_target, &swkey->nd_target,
                                                        sizeof(nd_key->nd_target));
                                memcpy(nd_key->nd_sll, swkey->arp_sha, ETH_ALEN);
                                memcpy(nd_key->nd_tll, swkey->arp_tha, ETH_ALEN);