datapath: Fix tunnel reconfiguration that does not change key data.
[openvswitch] / datapath / datapath.c
index 63a393221f892d0f5024e2cb1d0c7d33a160eca8..cd29482c508eb50c042f1484d33651ef2ce55fa4 100644 (file)
 #include <linux/netfilter_ipv4.h>
 #include <linux/inetdevice.h>
 #include <linux/list.h>
+#include <linux/openvswitch.h>
 #include <linux/rculist.h>
 #include <linux/dmi.h>
 #include <net/inet_ecn.h>
 #include <net/genetlink.h>
 
-#include "openvswitch/datapath-protocol.h"
 #include "checksum.h"
 #include "datapath.h"
 #include "actions.h"
@@ -84,7 +84,7 @@ EXPORT_SYMBOL(dp_ioctl_hook);
 static LIST_HEAD(dps);
 
 static struct vport *new_vport(const struct vport_parms *);
-static int queue_userspace_packets(struct datapath *, u32 pid, struct sk_buff *,
+static int queue_userspace_packets(struct datapath *, struct sk_buff *,
                                 const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
@@ -280,9 +280,10 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
        struct datapath *dp = p->dp;
        struct sw_flow *flow;
        struct dp_stats_percpu *stats;
-       int stats_counter_off;
+       u64 *stats_counter;
        int error;
 
+       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
        OVS_CB(skb)->vport = p;
 
        if (!OVS_CB(skb)->flow) {
@@ -299,7 +300,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
                if (is_frag && dp->drop_frags) {
                        consume_skb(skb);
-                       stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+                       stats_counter = &stats->n_frags;
                        goto out;
                }
 
@@ -310,28 +311,27 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
                        upcall.cmd = OVS_PACKET_CMD_MISS;
                        upcall.key = &key;
+                       upcall.userdata = NULL;
+                       upcall.pid = p->upcall_pid;
                        dp_upcall(dp, skb, &upcall);
-                       stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+                       kfree_skb(skb);
+                       stats_counter = &stats->n_missed;
                        goto out;
                }
 
                OVS_CB(skb)->flow = flow;
        }
 
-       stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
+       stats_counter = &stats->n_hit;
        flow_used(OVS_CB(skb)->flow, skb);
        execute_actions(dp, skb);
 
 out:
        /* Update datapath statistics. */
-       local_bh_disable();
-       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 
        write_seqcount_begin(&stats->seqlock);
-       (*(u64 *)((u8 *)stats + stats_counter_off))++;
+       (*stats_counter)++;
        write_seqcount_end(&stats->seqlock);
-
-       local_bh_enable();
 }
 
 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
@@ -357,20 +357,15 @@ static struct genl_family dp_packet_genl_family = {
        .maxattr = OVS_PACKET_ATTR_MAX
 };
 
-int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
+int dp_upcall(struct datapath *dp, struct sk_buff *skb,
+             const struct dp_upcall_info *upcall_info)
 {
+       struct sk_buff *segs = NULL;
        struct dp_stats_percpu *stats;
-       u32 pid;
        int err;
 
-       if (OVS_CB(skb)->flow)
-               pid = OVS_CB(skb)->flow->upcall_pid;
-       else
-               pid = OVS_CB(skb)->vport->upcall_pid;
-
-       if (pid == 0) {
+       if (upcall_info->pid == 0) {
                err = -ENOTCONN;
-               kfree_skb(skb);
                goto err;
        }
 
@@ -379,33 +374,37 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_i
        /* Break apart GSO packets into their component pieces.  Otherwise
         * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
        if (skb_is_gso(skb)) {
-               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+               segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
                
-               if (IS_ERR(nskb)) {
-                       kfree_skb(skb);
-                       err = PTR_ERR(nskb);
+               if (IS_ERR(segs)) {
+                       err = PTR_ERR(segs);
                        goto err;
                }
-               consume_skb(skb);
-               skb = nskb;
+               skb = segs;
+       }
+
+       err = queue_userspace_packets(dp, skb, upcall_info);
+       if (segs) {
+               struct sk_buff *next;
+               /* Free GSO-segments */
+               do {
+                       next = segs->next;
+                       kfree_skb(segs);
+               } while ((segs = next) != NULL);
        }
 
-       err = queue_userspace_packets(dp, pid, skb, upcall_info);
        if (err)
                goto err;
 
        return 0;
 
 err:
-       local_bh_disable();
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 
        write_seqcount_begin(&stats->seqlock);
        stats->n_lost++;
        write_seqcount_end(&stats->seqlock);
 
-       local_bh_enable();
-
        return err;
 }
 
@@ -413,38 +412,28 @@ err:
  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
  * packet.
  */
-static int queue_userspace_packets(struct datapath *dp, u32 pid,
-                                  struct sk_buff *skb,
+static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
                                   const struct dp_upcall_info *upcall_info)
 {
        int dp_ifindex;
-       struct sk_buff *nskb;
-       int err;
 
        dp_ifindex = get_dpifindex(dp);
-       if (!dp_ifindex) {
-               err = -ENODEV;
-               nskb = skb->next;
-               goto err_kfree_skbs;
-       }
+       if (!dp_ifindex)
+               return -ENODEV;
 
        do {
                struct ovs_header *upcall;
                struct sk_buff *user_skb; /* to be queued to userspace */
                struct nlattr *nla;
                unsigned int len;
-
-               nskb = skb->next;
-               skb->next = NULL;
+               int err;
 
                err = vlan_deaccel_tag(skb);
                if (unlikely(err))
-                       goto err_kfree_skbs;
+                       return err;
 
-               if (nla_attr_size(skb->len) > USHRT_MAX) {
-                       err = -EFBIG;
-                       goto err_kfree_skbs;
-               }
+               if (nla_attr_size(skb->len) > USHRT_MAX)
+                       return -EFBIG;
 
                len = sizeof(struct ovs_header);
                len += nla_total_size(skb->len);
@@ -453,21 +442,20 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid,
                        len += nla_total_size(8);
 
                user_skb = genlmsg_new(len, GFP_ATOMIC);
-               if (!user_skb) {
-                       err = -ENOMEM;
-                       goto err_kfree_skbs;
-               }
+               if (!user_skb)
+                       return -ENOMEM;
 
-               upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
+               upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+                                        0, upcall_info->cmd);
                upcall->dp_ifindex = dp_ifindex;
 
                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
                flow_to_nlattrs(upcall_info->key, user_skb);
                nla_nest_end(user_skb, nla);
 
-               if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
+               if (upcall_info->userdata)
                        nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
-                                               upcall_info->userdata);
+                                   nla_get_u64(upcall_info->userdata));
 
                nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
                if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -475,22 +463,13 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid,
                else
                        skb_copy_bits(skb, 0, nla_data(nla), skb->len);
 
-               err = genlmsg_unicast(&init_net, user_skb, pid);
+               err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
                if (err)
-                       goto err_kfree_skbs;
+                       return err;
 
-               consume_skb(skb);
-               skb = nskb;
-       } while (skb);
-       return 0;
+       } while ((skb = skb->next));
 
-err_kfree_skbs:
-       kfree_skb(skb);
-       while ((skb = nskb) != NULL) {
-               nskb = skb->next;
-               kfree_skb(skb);
-       }
-       return err;
+       return 0;
 }
 
 /* Called with genl_mutex. */
@@ -539,6 +518,26 @@ static int validate_sample(const struct nlattr *attr, int depth)
        return validate_actions(a[OVS_SAMPLE_ATTR_ACTIONS], (depth + 1));
 }
 
+static int validate_userspace(const struct nlattr *attr)
+{
+       static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =
+       {
+               [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+               [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+       };
+       struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+       int error;
+
+       error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, userspace_policy);
+       if (error)
+               return error;
+
+       if (!a[OVS_USERSPACE_ATTR_PID] || !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int validate_actions(const struct nlattr *attr, int depth)
 {
        const struct nlattr *a;
@@ -548,9 +547,10 @@ static int validate_actions(const struct nlattr *attr, int depth)
                return -EOVERFLOW;
 
        nla_for_each_nested(a, attr, rem) {
+               /* Expected argument lengths, (u32)-1 for variable length. */
                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
                        [OVS_ACTION_ATTR_OUTPUT] = 4,
-                       [OVS_ACTION_ATTR_USERSPACE] = 8,
+                       [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
                        [OVS_ACTION_ATTR_PUSH_VLAN] = 2,
                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
                        [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
@@ -563,22 +563,19 @@ static int validate_actions(const struct nlattr *attr, int depth)
                        [OVS_ACTION_ATTR_SET_TUNNEL] = 8,
                        [OVS_ACTION_ATTR_SET_PRIORITY] = 4,
                        [OVS_ACTION_ATTR_POP_PRIORITY] = 0,
+                       [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
                };
                int type = nla_type(a);
 
-               /* Match expected attr len for given attr len except for
-                * OVS_ACTION_ATTR_SAMPLE, as it has nested actions list which
-                * is variable size. */
                if (type > OVS_ACTION_ATTR_MAX ||
-                  (nla_len(a) != action_lens[type] &&
-                         type != OVS_ACTION_ATTR_SAMPLE))
+                   (action_lens[type] != nla_len(a) &&
+                    action_lens[type] != (u32)-1))
                        return -EINVAL;
 
                switch (type) {
                case OVS_ACTION_ATTR_UNSPEC:
                        return -EINVAL;
 
-               case OVS_ACTION_ATTR_USERSPACE:
                case OVS_ACTION_ATTR_POP_VLAN:
                case OVS_ACTION_ATTR_SET_DL_SRC:
                case OVS_ACTION_ATTR_SET_DL_DST:
@@ -592,6 +589,12 @@ static int validate_actions(const struct nlattr *attr, int depth)
                        /* No validation needed. */
                        break;
 
+               case OVS_ACTION_ATTR_USERSPACE:
+                       err = validate_userspace(a);
+                       if (err)
+                               return err;
+                       break;
+
                case OVS_ACTION_ATTR_OUTPUT:
                        if (nla_get_u32(a) >= DP_MAX_PORTS)
                                return -EINVAL;
@@ -693,11 +696,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
        flow->hash = flow_hash(&flow->key, key_len);
 
-       if (a[OVS_PACKET_ATTR_UPCALL_PID])
-               flow->upcall_pid = nla_get_u32(a[OVS_PACKET_ATTR_UPCALL_PID]);
-       else
-               flow->upcall_pid = NETLINK_CB(skb).pid;
-
        acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
        err = PTR_ERR(acts);
        if (IS_ERR(acts))
@@ -716,7 +714,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
                OVS_CB(packet)->vport = get_vport_protected(dp,
                                                        flow->key.eth.in_port);
 
+       local_bh_disable();
        err = execute_actions(dp, packet);
+       local_bh_enable();
        rcu_read_unlock();
 
        flow_put(flow);
@@ -736,7 +736,6 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
        [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
-       [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
 };
 
 static struct genl_ops dp_packet_genl_ops[] = {
@@ -776,7 +775,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 
 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
-       [OVS_FLOW_ATTR_UPCALL_PID] = { .type = NLA_U32 },
        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 };
@@ -823,8 +821,6 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
                goto error;
        nla_nest_end(skb, nla);
 
-       NLA_PUT_U32(skb, OVS_FLOW_ATTR_UPCALL_PID, flow->upcall_pid);
-
        spin_lock_bh(&flow->lock);
        used = flow->used;
        stats.n_packets = flow->packet_count;
@@ -962,11 +958,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                flow->key = key;
                clear_stats(flow);
 
-               if (a[OVS_FLOW_ATTR_UPCALL_PID])
-                       flow->upcall_pid = nla_get_u32(a[OVS_FLOW_ATTR_UPCALL_PID]);
-               else
-                       flow->upcall_pid = NETLINK_CB(skb).pid;
-
                /* Obtain actions. */
                acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
                error = PTR_ERR(acts);
@@ -1016,9 +1007,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
                                                info->snd_seq, OVS_FLOW_CMD_NEW);
 
-               if (a[OVS_FLOW_ATTR_UPCALL_PID])
-                       flow->upcall_pid = nla_get_u32(a[OVS_FLOW_ATTR_UPCALL_PID]);
-
                /* Clear stats. */
                if (a[OVS_FLOW_ATTR_CLEAR]) {
                        spin_lock_bh(&flow->lock);
@@ -1294,7 +1282,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        int err;
 
        err = -EINVAL;
-       if (!a[OVS_DP_ATTR_NAME])
+       if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
                goto err;
 
        err = ovs_dp_cmd_validate(a);
@@ -1338,10 +1326,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        parms.options = NULL;
        parms.dp = dp;
        parms.port_no = OVSP_LOCAL;
-       if (a[OVS_DP_ATTR_UPCALL_PID])
-               parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
-       else
-               parms.upcall_pid = NETLINK_CB(skb).pid;
+       parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
 
        vport = new_vport(&parms);
        if (IS_ERR(vport)) {
@@ -1561,7 +1546,6 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 {
        struct ovs_header *ovs_header;
        struct nlattr *nla;
-       int ifindex;
        int err;
 
        ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
@@ -1588,10 +1572,6 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
        if (err == -EMSGSIZE)
                goto error;
 
-       ifindex = vport_get_ifindex(vport);
-       if (ifindex > 0)
-               NLA_PUT_U32(skb, OVS_VPORT_ATTR_IFINDEX, ifindex);
-
        return genlmsg_end(skb, ovs_header);
 
 nla_put_failure:
@@ -1681,7 +1661,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
        int err;
 
        err = -EINVAL;
-       if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE])
+       if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
+           !a[OVS_VPORT_ATTR_UPCALL_PID])
                goto exit;
 
        err = ovs_vport_cmd_validate(a);
@@ -1722,10 +1703,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
        parms.dp = dp;
        parms.port_no = port_no;
-       if (a[OVS_VPORT_ATTR_UPCALL_PID])
-               parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
-       else
-               parms.upcall_pid = NETLINK_CB(skb).pid;
+       parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
        vport = new_vport(&parms);
        err = PTR_ERR(vport);
@@ -1773,7 +1751,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
                goto exit_unlock;
 
        err = 0;
-       if (a[OVS_VPORT_ATTR_OPTIONS])
+       if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport_get_type(vport))
+               err = -EINVAL;
+       if (!err && a[OVS_VPORT_ATTR_OPTIONS])
                err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
        if (!err)
                err = change_vport(vport, a);