From: Ben Pfaff Date: Wed, 12 Oct 2011 23:24:54 +0000 (-0700) Subject: datapath: Move Netlink PID for userspace actions from flows to actions. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=98403001ece61cbf783297c467a06032b200b8d0;p=openvswitch datapath: Move Netlink PID for userspace actions from flows to actions. Commit b063d9f06 "datapath: Use unicast Netlink sockets for upcalls" that switched from multicast to unicast Netlink for sending upcalls added a Netlink PID to each kernel flow, used by OVS_ACTION_ATTR_USERSPACE actions within the flow as target. This commit drops this per-flow PID in favor of a per-action PID, because that is more flexible. It does not yet make use of this additional flexibility, so behavior should not change. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross Bug #7559. --- diff --git a/datapath/actions.c b/datapath/actions.c index 945c461c..470e617b 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -245,13 +245,31 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) return 0; } -static int output_userspace(struct datapath *dp, struct sk_buff *skb, u64 arg) +static int output_userspace(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) { struct dp_upcall_info upcall; + const struct nlattr *a; + int rem; upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; - upcall.userdata = arg; + upcall.userdata = NULL; + upcall.pid = 0; + + for (a = nla_data(attr), rem = nla_len(attr); rem > 0; + a = nla_next(a, &rem)) { + switch (nla_type(a)) { + case OVS_USERSPACE_ATTR_USERDATA: + upcall.userdata = a; + break; + + case OVS_USERSPACE_ATTR_PID: + upcall.pid = nla_get_u32(a); + break; + } + } + return dp_upcall(dp, skb, &upcall); } @@ -308,7 +326,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_USERSPACE: - output_userspace(dp, skb, nla_get_u64(a)); + output_userspace(dp, skb, a); break; case OVS_ACTION_ATTR_SET_TUNNEL: diff --git a/datapath/datapath.c b/datapath/datapath.c index 07188ad5..551b384a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -84,7 +84,7 @@ EXPORT_SYMBOL(dp_ioctl_hook); static LIST_HEAD(dps); static struct vport *new_vport(const struct vport_parms *); -static int queue_userspace_packets(struct datapath *, u32 pid, struct sk_buff *, +static int queue_userspace_packets(struct datapath *, struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ @@ -311,6 +311,8 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; + upcall.userdata = NULL; + upcall.pid = p->upcall_pid; dp_upcall(dp, skb, &upcall); kfree_skb(skb); stats_counter = &stats->n_missed; @@ -360,15 +362,9 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb, { struct sk_buff *segs = NULL; struct dp_stats_percpu *stats; - u32 pid; int err; - if (OVS_CB(skb)->flow) - pid = OVS_CB(skb)->flow->upcall_pid; - else - pid = OVS_CB(skb)->vport->upcall_pid; - - if (pid == 0) { + if (upcall_info->pid == 0) { err = -ENOTCONN; goto err; } @@ -387,7 +383,7 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb, skb = segs; } - err = queue_userspace_packets(dp, pid, skb, upcall_info); + err = queue_userspace_packets(dp, skb, upcall_info); if (segs) { struct sk_buff *next; /* Free GSO-segments */ @@ -416,8 +412,7 @@ err: * 'upcall_info'. There will be only one packet unless we broke up a GSO * packet. */ -static int queue_userspace_packets(struct datapath *dp, u32 pid, - struct sk_buff *skb, +static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { int dp_ifindex; @@ -458,9 +453,9 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid, flow_to_nlattrs(upcall_info->key, user_skb); nla_nest_end(user_skb, nla); - if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) + if (upcall_info->userdata) nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, - upcall_info->userdata); + nla_get_u64(upcall_info->userdata)); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -468,7 +463,7 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid, else skb_copy_bits(skb, 0, nla_data(nla), skb->len); - err = genlmsg_unicast(&init_net, user_skb, pid); + err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid); if (err) return err; @@ -523,6 +518,26 @@ static int validate_sample(const struct nlattr *attr, int depth) return validate_actions(a[OVS_SAMPLE_ATTR_ACTIONS], (depth + 1)); } +static int validate_userspace(const struct nlattr *attr) +{ + static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = + { + [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, + }; + struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; + int error; + + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, userspace_policy); + if (error) + return error; + + if (!a[OVS_USERSPACE_ATTR_PID] || !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) + return -EINVAL; + + return 0; +} + static int validate_actions(const struct nlattr *attr, int depth) { const struct nlattr *a; @@ -532,9 +547,10 @@ static int validate_actions(const struct nlattr *attr, int depth) return -EOVERFLOW; nla_for_each_nested(a, attr, rem) { + /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { [OVS_ACTION_ATTR_OUTPUT] = 4, - [OVS_ACTION_ATTR_USERSPACE] = 8, + [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, [OVS_ACTION_ATTR_PUSH_VLAN] = 2, [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN, @@ -547,22 +563,19 @@ static int validate_actions(const struct nlattr *attr, int depth) [OVS_ACTION_ATTR_SET_TUNNEL] = 8, [OVS_ACTION_ATTR_SET_PRIORITY] = 4, [OVS_ACTION_ATTR_POP_PRIORITY] = 0, + [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 }; int type = nla_type(a); - /* Match expected attr len for given attr len except for - * OVS_ACTION_ATTR_SAMPLE, as it has nested actions list which - * is variable size. */ if (type > OVS_ACTION_ATTR_MAX || - (nla_len(a) != action_lens[type] && - type != OVS_ACTION_ATTR_SAMPLE)) + (action_lens[type] != nla_len(a) && + action_lens[type] != (u32)-1)) return -EINVAL; switch (type) { case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; - case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_SET_DL_SRC: case OVS_ACTION_ATTR_SET_DL_DST: @@ -576,6 +589,12 @@ static int validate_actions(const struct nlattr *attr, int depth) /* No validation needed. */ break; + case OVS_ACTION_ATTR_USERSPACE: + err = validate_userspace(a); + if (err) + return err; + break; + case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; @@ -677,11 +696,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) flow->hash = flow_hash(&flow->key, key_len); - if (a[OVS_PACKET_ATTR_UPCALL_PID]) - flow->upcall_pid = nla_get_u32(a[OVS_PACKET_ATTR_UPCALL_PID]); - else - flow->upcall_pid = NETLINK_CB(skb).pid; - acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); err = PTR_ERR(acts); if (IS_ERR(acts)) @@ -722,7 +736,6 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, - [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 }, }; static struct genl_ops dp_packet_genl_ops[] = { @@ -762,7 +775,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, }; @@ -809,8 +821,6 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, goto error; nla_nest_end(skb, nla); - NLA_PUT_U32(skb, OVS_FLOW_ATTR_UPCALL_PID, flow->upcall_pid); - spin_lock_bh(&flow->lock); used = flow->used; stats.n_packets = flow->packet_count; @@ -948,11 +958,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = key; clear_stats(flow); - if (a[OVS_FLOW_ATTR_UPCALL_PID]) - flow->upcall_pid = nla_get_u32(a[OVS_FLOW_ATTR_UPCALL_PID]); - else - flow->upcall_pid = NETLINK_CB(skb).pid; - /* Obtain actions. */ acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); error = PTR_ERR(acts); @@ -1002,9 +1007,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, OVS_FLOW_CMD_NEW); - if (a[OVS_FLOW_ATTR_UPCALL_PID]) - flow->upcall_pid = nla_get_u32(a[OVS_FLOW_ATTR_UPCALL_PID]); - /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) { spin_lock_bh(&flow->lock); diff --git a/datapath/datapath.h b/datapath/datapath.h index 8a713913..b93665ce 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -117,13 +117,17 @@ struct ovs_skb_cb { * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. - * @userdata: Is passed to user-space as %OVS_PACKET_ATTR_USERDATA if @cmd is - * %OVS_PACKET_CMD_ACTION. + * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * %OVS_PACKET_ATTR_USERDATA. + * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no + * packet is sent and the packet is accounted in the datapath's @n_lost + * counter. */ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; - u64 userdata; + const struct nlattr *userdata; + u32 pid; }; extern struct notifier_block dp_device_notifier; diff --git a/datapath/flow.h b/datapath/flow.h index ae12fe4d..3590a7df 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -81,7 +81,6 @@ struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node; u32 hash; - u32 upcall_pid; struct sw_flow_key key; struct sw_flow_actions __rcu *sf_acts; diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index 6c894111..976ab684 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -170,13 +170,9 @@ enum ovs_packet_cmd { * flow key against the kernel's. * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. - * @OVS_PACKET_ATTR_UPCALL_PID: Optionally present for OVS_PACKET_CMD_EXECUTE. - * The Netlink socket in userspace that OVS_PACKET_CMD_USERSPACE and - * OVS_PACKET_CMD_SAMPLE upcalls will be directed to for actions triggered by - * this packet. A value of zero indicates that upcalls should not be sent. * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION - * notification if the %OVS_ACTION_ATTR_USERSPACE, action's argument was - * nonzero. + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_USERDATA attribute. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -186,7 +182,6 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_PACKET, /* Packet data. */ OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_PACKET_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls. */ OVS_PACKET_ATTR_USERDATA, /* u64 OVS_ACTION_ATTR_USERSPACE arg. */ __OVS_PACKET_ATTR_MAX }; @@ -372,12 +367,6 @@ struct ovs_key_nd { * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying * the actions to take for packets that match the key. Always present in * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional - * @OVS_FLOW_ATTR_UPCALL_PID: The Netlink socket in userspace that - * OVS_PACKET_CMD_USERSPACE and OVS_PACKET_CMD_SAMPLE upcalls will be - * directed to for packets received on this port. A value of zero indicates - * that upcalls should not be sent. - * on %OVS_FLOW_CMD_SET request to change the existing actions, ignored for - * other requests. * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this * flow. Present in notifications if the stats would be nonzero. Ignored in * requests. @@ -399,7 +388,6 @@ enum ovs_flow_attr { OVS_FLOW_ATTR_UNSPEC, OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_FLOW_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls. */ OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ @@ -410,13 +398,16 @@ enum ovs_flow_attr { #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) /** - * enum ovs_sample_attr - Attributes for OVS_ACTION_ATTR_SAMPLE + * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of * %UINT32_MAX samples all packets and intermediate values sample intermediate * fractions of packets. * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. * Actions are passed as nested attributes. + * + * Executes the specified actions with the given probability on a per-packet + * basis. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, @@ -427,11 +418,27 @@ enum ovs_sample_attr { #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) +/** + * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. + * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION + * message should be sent. Required. + * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the + * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA, + */ +enum ovs_userspace_attr { + OVS_USERSPACE_ATTR_UNSPEC, + OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ + OVS_USERSPACE_ATTR_USERDATA, /* u64 optional user-specified cookie. */ + __OVS_USERSPACE_ATTR_MAX +}; + +#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) + /* Action types. */ enum ovs_action_type { OVS_ACTION_ATTR_UNSPEC, OVS_ACTION_ATTR_OUTPUT, /* Output to switch port. */ - OVS_ACTION_ATTR_USERSPACE, /* Send copy to userspace. */ + OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ OVS_ACTION_ATTR_PUSH_VLAN, /* Set the 802.1q TCI value. */ OVS_ACTION_ATTR_POP_VLAN, /* Strip the 802.1q header. */ OVS_ACTION_ATTR_SET_DL_SRC, /* Ethernet source address. */ @@ -444,8 +451,7 @@ enum ovs_action_type { OVS_ACTION_ATTR_SET_TUNNEL, /* Set the encapsulating tunnel ID. */ OVS_ACTION_ATTR_SET_PRIORITY, /* Set skb->priority. */ OVS_ACTION_ATTR_POP_PRIORITY, /* Restore original skb->priority. */ - OVS_ACTION_ATTR_SAMPLE, /* Execute list of actions at given - probability. */ + OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX }; diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 98c4682a..1e1afe50 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -112,7 +112,6 @@ struct dpif_linux_flow { size_t key_len; const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */ size_t actions_len; - const uint32_t *upcall_pid; /* OVS_FLOW_ATTR_UPCALL_PID. */ const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */ const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */ const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */ @@ -168,9 +167,9 @@ static int dpif_linux_init(void); static void open_dpif(const struct dpif_linux_dp *, struct dpif **); static bool dpif_linux_nln_parse(struct ofpbuf *, void *); static void dpif_linux_port_changed(const void *vport, void *dpif); -static uint32_t get_upcall_pid_port(struct dpif_linux *, uint32_t port); -static uint32_t get_upcall_pid_flow(struct dpif_linux *, - const struct nlattr *key, size_t key_len); +static uint32_t dpif_linux_port_get_pid__(const struct dpif *, + uint16_t port_no, + enum dpif_upcall_type); static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, struct ofpbuf *); @@ -418,7 +417,8 @@ dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev, uint32_t upcall_pid; request.port_no = dpif_linux_pop_port(dpif); - upcall_pid = get_upcall_pid_port(dpif, request.port_no); + upcall_pid = dpif_linux_port_get_pid__(dpif_, request.port_no, + DPIF_UC_MISS); request.upcall_pid = &upcall_pid; error = dpif_linux_vport_transact(&request, &reply, &buf); @@ -500,6 +500,26 @@ dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED) return 1024; } +static uint32_t +dpif_linux_port_get_pid__(const struct dpif *dpif_, uint16_t port_no, + enum dpif_upcall_type upcall_type) +{ + struct dpif_linux *dpif = dpif_linux_cast(dpif_); + + if (!(dpif->listen_mask & (1u << upcall_type))) { + return 0; + } else { + int idx = port_no & (N_UPCALL_SOCKS - 1); + return nl_sock_pid(dpif->upcall_socks[idx]); + } +} + +static uint32_t +dpif_linux_port_get_pid(const struct dpif *dpif, uint16_t port_no) +{ + return dpif_linux_port_get_pid__(dpif, port_no, DPIF_UC_ACTION); +} + static int dpif_linux_flow_flush(struct dpif *dpif_) { @@ -669,12 +689,9 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags, struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_flow request, reply; struct nlattr dummy_action; - uint32_t upcall_pid; struct ofpbuf *buf; int error; - upcall_pid = get_upcall_pid_flow(dpif, key, key_len); - dpif_linux_flow_init(&request); request.cmd = flags & DPIF_FP_CREATE ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET; request.dp_ifindex = dpif->dp_ifindex; @@ -683,7 +700,6 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags, /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */ request.actions = actions ? actions : &dummy_action; request.actions_len = actions_len; - request.upcall_pid = &upcall_pid; if (flags & DPIF_FP_ZERO_STATS) { request.clear = true; } @@ -815,8 +831,7 @@ dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) } static int -dpif_linux_execute__(int dp_ifindex, uint32_t upcall_pid, - const struct nlattr *key, size_t key_len, +dpif_linux_execute__(int dp_ifindex, const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { @@ -835,7 +850,6 @@ dpif_linux_execute__(int dp_ifindex, uint32_t upcall_pid, nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET, packet->data, packet->size); nl_msg_put_unspec(buf, OVS_PACKET_ATTR_KEY, key, key_len); nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS, actions, actions_len); - nl_msg_put_u32(buf, OVS_PACKET_ATTR_UPCALL_PID, upcall_pid); error = nl_sock_transact(genl_sock, buf, NULL); ofpbuf_delete(buf); @@ -849,9 +863,8 @@ dpif_linux_execute(struct dpif *dpif_, const struct ofpbuf *packet) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - uint32_t upcall_pid = get_upcall_pid_flow(dpif, key, key_len); - return dpif_linux_execute__(dpif->dp_ifindex, upcall_pid, key, key_len, + return dpif_linux_execute__(dpif->dp_ifindex, key, key_len, actions, actions_len, packet); } @@ -863,56 +876,17 @@ dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask) return 0; } -static uint32_t -get_upcall_pid_port__(struct dpif_linux *dpif, uint32_t port) -{ - int idx = port & (N_UPCALL_SOCKS - 1); - return nl_sock_pid(dpif->upcall_socks[idx]); -} - -static uint32_t -get_upcall_pid_port(struct dpif_linux *dpif, uint32_t port) -{ - if (!(dpif->listen_mask & (1u << DPIF_UC_MISS))) { - return 0; - } - - return get_upcall_pid_port__(dpif, port); -} - -static uint32_t -get_upcall_pid_flow(struct dpif_linux *dpif, - const struct nlattr *key, size_t key_len) -{ - const struct nlattr *nla; - uint32_t port; - - if (!(dpif->listen_mask & (1u << DPIF_UC_ACTION))) { - return 0; - } - - nla = nl_attr_find__(key, key_len, OVS_KEY_ATTR_IN_PORT); - if (nla) { - port = nl_attr_get_u32(nla); - } else { - port = random_uint32(); - } - - return get_upcall_pid_port__(dpif, port); -} - static void -set_upcall_pids(struct dpif_linux *dpif) +set_upcall_pids(struct dpif *dpif_) { - struct dpif_port port; + struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_port_dump port_dump; - struct dpif_flow_dump flow_dump; - const struct nlattr *key; - size_t key_len; + struct dpif_port port; int error; DPIF_PORT_FOR_EACH (&port, &port_dump, &dpif->dpif) { - uint32_t upcall_pid = get_upcall_pid_port(dpif, port.port_no); + uint32_t upcall_pid = dpif_linux_port_get_pid__(dpif_, port.port_no, + DPIF_UC_MISS); struct dpif_linux_vport vport_request; dpif_linux_vport_init(&vport_request); @@ -930,26 +904,6 @@ set_upcall_pids(struct dpif_linux *dpif) dpif_name(&dpif->dpif), strerror(error)); } } - - dpif_flow_dump_start(&flow_dump, &dpif->dpif); - while (dpif_flow_dump_next(&flow_dump, &key, &key_len, - NULL, NULL, NULL)) { - uint32_t upcall_pid = get_upcall_pid_flow(dpif, key, key_len); - struct dpif_linux_flow flow_request; - - dpif_linux_flow_init(&flow_request); - flow_request.cmd = OVS_FLOW_CMD_SET; - flow_request.dp_ifindex = dpif->dp_ifindex; - flow_request.key = key; - flow_request.key_len = key_len; - flow_request.upcall_pid = &upcall_pid; - error = dpif_linux_flow_transact(&flow_request, NULL, NULL); - if (error) { - VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on flow: %s", - dpif_name(&dpif->dpif), strerror(error)); - } - } - dpif_flow_dump_done(&flow_dump); } static int @@ -977,7 +931,7 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) } dpif->listen_mask = listen_mask; - set_upcall_pids(dpif); + set_upcall_pids(dpif_); return 0; } @@ -1148,6 +1102,7 @@ const struct dpif_class dpif_linux_class = { dpif_linux_port_query_by_number, dpif_linux_port_query_by_name, dpif_linux_get_max_ports, + dpif_linux_port_get_pid, dpif_linux_port_dump_start, dpif_linux_port_dump_next, dpif_linux_port_dump_done, @@ -1248,7 +1203,7 @@ dpif_linux_vport_send(int dp_ifindex, uint32_t port_no, ofpbuf_use_stack(&actions, &action, sizeof action); nl_msg_put_u32(&actions, OVS_ACTION_ATTR_OUTPUT, port_no); - return dpif_linux_execute__(dp_ifindex, 0, key.data, key.size, + return dpif_linux_execute__(dp_ifindex, key.data, key.size, actions.data, actions.size, &packet); } @@ -1623,7 +1578,6 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, static const struct nl_policy ovs_flow_policy[] = { [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true }, - [OVS_FLOW_ATTR_UPCALL_PID] = { .type = NL_A_U32 }, [OVS_FLOW_ATTR_STATS] = { .type = NL_A_UNSPEC, .min_len = sizeof(struct ovs_flow_stats), .max_len = sizeof(struct ovs_flow_stats), @@ -1660,9 +1614,6 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]); flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]); } - if (a[OVS_FLOW_ATTR_UPCALL_PID]) { - flow->upcall_pid = nl_attr_get(a[OVS_FLOW_ATTR_UPCALL_PID]); - } if (a[OVS_FLOW_ATTR_STATS]) { flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]); } @@ -1699,10 +1650,6 @@ dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, flow->actions, flow->actions_len); } - if (flow->upcall_pid) { - nl_msg_put_u32(buf, OVS_FLOW_ATTR_UPCALL_PID, *flow->upcall_pid); - } - /* We never need to send these to the kernel. */ assert(!flow->stats); assert(!flow->tcp_flags); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c1c339e9..1bb306ab 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1244,6 +1244,19 @@ dp_netdev_sample(struct dp_netdev *dp, nl_attr_get_size(subactions)); } +static void +dp_netdev_action_userspace(struct dp_netdev *dp, + struct ofpbuf *packet, struct flow *key, + const struct nlattr *a) +{ + const struct nlattr *userdata_attr; + uint64_t userdata; + + userdata_attr = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); + userdata = userdata_attr ? nl_attr_get_u64(userdata_attr) : 0; + dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, userdata); +} + static int dp_netdev_execute_actions(struct dp_netdev *dp, struct ofpbuf *packet, struct flow *key, @@ -1262,8 +1275,7 @@ dp_netdev_execute_actions(struct dp_netdev *dp, break; case OVS_ACTION_ATTR_USERSPACE: - dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, - key, nl_attr_get_u64(a)); + dp_netdev_action_userspace(dp, packet, key, a); break; case OVS_ACTION_ATTR_PUSH_VLAN: @@ -1330,6 +1342,7 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_port_query_by_number, dpif_netdev_port_query_by_name, dpif_netdev_get_max_ports, + NULL, /* port_get_pid */ dpif_netdev_port_dump_start, dpif_netdev_port_dump_next, dpif_netdev_port_dump_done, diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 1975cc5c..8cdedcd1 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -142,6 +142,18 @@ struct dpif_class { * actions. */ int (*get_max_ports)(const struct dpif *dpif); + /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE + * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in + * flows whose packets arrived on port 'port_no'. + * + * The return value only needs to be meaningful when DPIF_UC_ACTION has + * been enabled in the 'dpif''s listen mask, and it is allowed to change + * when DPIF_UC_ACTION is disabled and then re-enabled. + * + * A dpif provider that doesn't have meaningful Netlink PIDs can use NULL + * for this function. This is equivalent to always returning 0. */ + uint32_t (*port_get_pid)(const struct dpif *dpif, uint16_t port_no); + /* Attempts to begin dumping the ports in a dpif. On success, returns 0 * and initializes '*statep' with any data needed for iteration. On * failure, returns a positive errno value. */ @@ -300,7 +312,11 @@ struct dpif_class { /* Sets 'dpif''s "listen mask" to 'listen_mask'. A 1-bit of value 2**X set * in '*listen_mask' requests that 'dpif' will receive messages of the type * (from "enum dpif_upcall_type") with value X when its 'recv' function is - * called. */ + * called. + * + * Turning DPIF_UC_ACTION off and then back on is allowed to change Netlink + * PID assignments (see ->port_get_pid()). The client is responsible for + * updating flows as necessary if it does this. */ int (*recv_set_mask)(struct dpif *dpif, int listen_mask); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a diff --git a/lib/dpif.c b/lib/dpif.c index c6940b14..aae0ffea 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -542,6 +542,23 @@ dpif_get_max_ports(const struct dpif *dpif) return dpif->dpif_class->get_max_ports(dpif); } +/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions + * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose + * packets arrived on port 'port_no'. + * + * The return value is only meaningful when DPIF_UC_ACTION has been enabled in + * the 'dpif''s listen mask. It is allowed to change when DPIF_UC_ACTION is + * disabled and then re-enabled, so a client that does that must be prepared to + * update all of the flows that it installed that contain + * OVS_ACTION_ATTR_USERSPACE actions. */ +uint32_t +dpif_port_get_pid(const struct dpif *dpif, uint16_t port_no) +{ + return (dpif->dpif_class->port_get_pid + ? (dpif->dpif_class->port_get_pid)(dpif, port_no) + : 0); +} + /* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies * the port's name into the 'name_size' bytes in 'name', ensuring that the * result is null-terminated. On failure, returns a positive errno value and @@ -1008,7 +1025,12 @@ dpif_recv_get_mask(const struct dpif *dpif, int *listen_mask) /* Sets 'dpif''s "listen mask" to 'listen_mask'. A 1-bit of value 2**X set in * '*listen_mask' requests that dpif_recv() will receive messages of the type * (from "enum dpif_upcall_type") with value X. Returns 0 if successful, - * otherwise a positive errno value. */ + * otherwise a positive errno value. + * + * Turning DPIF_UC_ACTION off and then back on may change the Netlink PID + * assignments returned by dpif_port_get_pid(). If the client does this, it + * must update all of the flows that have OVS_ACTION_ATTR_USERSPACE actions + * using the new PID assignment. */ int dpif_recv_set_mask(struct dpif *dpif, int listen_mask) { diff --git a/lib/dpif.h b/lib/dpif.h index 04c0a519..f7ffbcec 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -92,6 +92,7 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname, int dpif_port_get_name(struct dpif *, uint16_t port_no, char *name, size_t name_size); int dpif_get_max_ports(const struct dpif *); +uint32_t dpif_port_get_pid(const struct dpif *, uint16_t port_no); struct dpif_port_dump { const struct dpif *dpif; diff --git a/lib/odp-util.c b/lib/odp-util.c index c67e14a9..08378a68 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -39,6 +39,15 @@ * interactions with the datapath. */ +/* Returns one the following for the action with the given OVS_ACTION_ATTR_* + * 'type': + * + * - For an action whose argument has a fixed length, returned that + * nonnegative length in bytes. + * + * - For an action with a variable-length argument, returns -2. + * + * - For an invalid 'type', returns -1. */ int odp_action_len(uint16_t type) { @@ -48,7 +57,7 @@ odp_action_len(uint16_t type) switch ((enum ovs_action_type) type) { case OVS_ACTION_ATTR_OUTPUT: return 4; - case OVS_ACTION_ATTR_USERSPACE: return 8; + case OVS_ACTION_ATTR_USERSPACE: return -2; case OVS_ACTION_ATTR_PUSH_VLAN: return 2; case OVS_ACTION_ATTR_POP_VLAN: return 0; case OVS_ACTION_ATTR_SET_DL_SRC: return ETH_ADDR_LEN; @@ -61,8 +70,8 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_SET_TUNNEL: return 8; case OVS_ACTION_ATTR_SET_PRIORITY: return 4; case OVS_ACTION_ATTR_POP_PRIORITY: return 0; + case OVS_ACTION_ATTR_SAMPLE: return -2; - case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: return -1; @@ -121,18 +130,57 @@ format_odp_sample_action(struct ds *ds, const struct nlattr *attr) ds_put_format(ds, "))"); } +static void +format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) +{ + static const struct nl_policy ovs_userspace_policy[] = { + [OVS_USERSPACE_ATTR_PID] = { .type = NL_A_U32 }, + [OVS_USERSPACE_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true }, + }; + struct nlattr *a[ARRAY_SIZE(ovs_userspace_policy)]; + + if (!nl_parse_nested(attr, ovs_userspace_policy, a, ARRAY_SIZE(a))) { + ds_put_cstr(ds, "userspace(error)"); + return; + } + + ds_put_format(ds, "userspace(pid=%"PRIu32, + nl_attr_get_u32(a[OVS_USERSPACE_ATTR_PID])); + + if (a[OVS_USERSPACE_ATTR_USERDATA]) { + uint64_t userdata = nl_attr_get_u64(a[OVS_USERSPACE_ATTR_USERDATA]); + struct user_action_cookie cookie; + + memcpy(&cookie, &userdata, sizeof cookie); + + if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { + ds_put_format(ds, ",controller,length=%"PRIu32, + cookie.data); + } else if (cookie.type == USER_ACTION_COOKIE_SFLOW) { + ds_put_format(ds, ",sFlow,n_output=%"PRIu8"," + "vid=%"PRIu16",pcp=%"PRIu8",ifindex=%"PRIu32, + cookie.n_output, vlan_tci_to_vid(cookie.vlan_tci), + vlan_tci_to_pcp(cookie.vlan_tci), cookie.data); + } else { + ds_put_format(ds, ",userdata=0x%"PRIx64, userdata); + } + } + + ds_put_char(ds, ')'); +} + + void format_odp_action(struct ds *ds, const struct nlattr *a) { const uint8_t *eth; + int expected_len; ovs_be32 ip; - struct user_action_cookie cookie; - uint64_t data; - if (nl_attr_get_size(a) != odp_action_len(nl_attr_type(a)) && - nl_attr_type(a) != OVS_ACTION_ATTR_SAMPLE) { + expected_len = odp_action_len(nl_attr_type(a)); + if (expected_len != -2 && nl_attr_get_size(a) != expected_len) { ds_put_format(ds, "bad length %zu, expected %d for: ", - nl_attr_get_size(a), odp_action_len(nl_attr_type(a))); + nl_attr_get_size(a), expected_len); format_generic_odp_action(ds, a); return; } @@ -142,21 +190,7 @@ format_odp_action(struct ds *ds, const struct nlattr *a) ds_put_format(ds, "%"PRIu16, nl_attr_get_u32(a)); break; case OVS_ACTION_ATTR_USERSPACE: - data = nl_attr_get_u64(a); - memcpy(&cookie, &data, sizeof(cookie)); - - if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { - ds_put_format(ds, "userspace(controller,length=%"PRIu32")", - cookie.data); - } else if (cookie.type == USER_ACTION_COOKIE_SFLOW) { - ds_put_format(ds, "userspace(sFlow,n_output=%"PRIu8"," - "vid=%"PRIu16",pcp=%"PRIu8",ifindex=%"PRIu32")", - cookie.n_output, vlan_tci_to_vid(cookie.vlan_tci), - vlan_tci_to_pcp(cookie.vlan_tci), cookie.data); - } else { - ds_put_format(ds, "userspace(unknown,data=0x%"PRIx64")", - nl_attr_get_u64(a)); - } + format_odp_userspace_action(ds, a); break; case OVS_ACTION_ATTR_SET_TUNNEL: ds_put_format(ds, "set_tunnel(%#"PRIx64")", diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 1ffe36a9..36635fc6 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -2207,13 +2207,17 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, struct ofpbuf key; int error; - if (actions_len == NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)) - && odp_actions->nla_type == OVS_ACTION_ATTR_USERSPACE) { - const struct user_action_cookie *cookie; + if (odp_actions->nla_type == OVS_ACTION_ATTR_USERSPACE + && NLA_ALIGN(odp_actions->nla_len) == actions_len) { + struct user_action_cookie cookie; struct dpif_upcall upcall; + uint64_t cookie_u64; - cookie = nl_attr_get_unspec(odp_actions, sizeof(*cookie)); - if (cookie->type == USER_ACTION_COOKIE_CONTROLLER) { + cookie_u64 = nl_attr_get_u64(nl_attr_find_nested( + odp_actions, + OVS_USERSPACE_ATTR_USERDATA)); + memcpy(&cookie, &cookie_u64, sizeof cookie); + if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { /* As an optimization, avoid a round-trip from userspace to kernel * to userspace. This also avoids possibly filling up kernel packet * buffers along the way. @@ -2965,6 +2969,27 @@ static void do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx); static void xlate_normal(struct action_xlate_ctx *); +static size_t +put_userspace_action(const struct ofproto_dpif *ofproto, + struct ofpbuf *odp_actions, + const struct flow *flow, + const struct user_action_cookie *cookie) +{ + size_t offset; + uint32_t pid; + + pid = dpif_port_get_pid(ofproto->dpif, + ofp_port_to_odp_port(flow->in_port)); + + offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); + nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); + nl_msg_put_unspec(odp_actions, OVS_USERSPACE_ATTR_USERDATA, + cookie, sizeof *cookie); + nl_msg_end_nested(odp_actions, offset); + + return odp_actions->size - NLA_ALIGN(sizeof *cookie); +} + /* Compose SAMPLE action for sFlow. */ static size_t compose_sflow_action(const struct ofproto_dpif *ofproto, @@ -2974,9 +2999,9 @@ compose_sflow_action(const struct ofproto_dpif *ofproto, { uint32_t port_ifindex; uint32_t probability; - struct user_action_cookie *cookie; + struct user_action_cookie cookie; size_t sample_offset, actions_offset; - int user_cookie_offset, n_output; + int cookie_offset, n_output; if (!ofproto->sflow || flow->in_port == OFPP_NONE) { return 0; @@ -2998,17 +3023,15 @@ compose_sflow_action(const struct ofproto_dpif *ofproto, actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS); - cookie = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_USERSPACE, - sizeof(*cookie)); - cookie->type = USER_ACTION_COOKIE_SFLOW; - cookie->data = port_ifindex; - cookie->n_output = n_output; - cookie->vlan_tci = 0; - user_cookie_offset = (char *) cookie - (char *) odp_actions->data; + cookie.type = USER_ACTION_COOKIE_SFLOW; + cookie.data = port_ifindex; + cookie.n_output = n_output; + cookie.vlan_tci = 0; + cookie_offset = put_userspace_action(ofproto, odp_actions, flow, &cookie); nl_msg_end_nested(odp_actions, actions_offset); nl_msg_end_nested(odp_actions, sample_offset); - return user_cookie_offset; + return cookie_offset; } /* SAMPLE action must be first action in any given list of actions. @@ -3253,7 +3276,7 @@ flood_packets(struct action_xlate_ctx *ctx, ovs_be32 mask) } static void -compose_controller_action(struct ofpbuf *odp_actions, int len) +compose_controller_action(struct action_xlate_ctx *ctx, int len) { struct user_action_cookie cookie; @@ -3261,9 +3284,7 @@ compose_controller_action(struct ofpbuf *odp_actions, int len) cookie.data = len; cookie.n_output = 0; cookie.vlan_tci = 0; - - nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_USERSPACE, - &cookie, sizeof(cookie)); + put_userspace_action(ctx->ofproto, ctx->odp_actions, &ctx->flow, &cookie); } static void @@ -3292,7 +3313,7 @@ xlate_output_action__(struct action_xlate_ctx *ctx, break; case OFPP_CONTROLLER: commit_odp_actions(ctx); - compose_controller_action(ctx->odp_actions, max_len); + compose_controller_action(ctx, max_len); break; case OFPP_LOCAL: add_output_action(ctx, OFPP_LOCAL);