From: Ben Pfaff Date: Wed, 1 Jun 2011 20:39:51 +0000 (-0700) Subject: datapath: Get packet metadata from userspace in odp_packet_cmd_execute(). X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=80e5eed9;p=openvswitch datapath: Get packet metadata from userspace in odp_packet_cmd_execute(). Until now, the tun_id and in_port have been lost when a packet is sent from the kernel to userspace and then back to the kernel. I didn't think that this was a problem, but recent behavior made me look closer and see that it makes a difference if sFlow is turned on or if an ODP_ATTR_ACTION_CONTROLLER action is present. We could possibly kluge around those, but for future-proofing it seems better to pass the packet metadata from userspace to the kernel. That is what this commit does. This commit introduces a user-kernel protocol break. We could avoid that, if it is desirable, by making ODP_PACKET_ATTR_KEY optional for ODP_PACKET_CMD_EXECUTE commands. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross --- diff --git a/datapath/datapath.c b/datapath/datapath.c index 72801224..d607315e 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -655,7 +655,8 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) int key_len; err = -EINVAL; - if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] || + if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_KEY] || + !a[ODP_PACKET_ATTR_ACTIONS] || nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN) goto err; @@ -694,6 +695,12 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_put; flow->tbl_node.hash = flow_hash(&flow->key, key_len); + err = flow_metadata_from_nlattrs(&flow->key.eth.in_port, + &flow->key.eth.tun_id, + a[ODP_PACKET_ATTR_KEY]); + if (err) + goto err_flow_put; + acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]); err = PTR_ERR(acts); if (IS_ERR(acts)) @@ -725,6 +732,7 @@ err: static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = { [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, + [ODP_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, }; diff --git a/datapath/flow.c b/datapath/flow.c index 2b80c6d8..d181cdef 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -597,6 +597,23 @@ int flow_cmp(const struct tbl_node *node, void *key2_, int len) return !memcmp(key1, key2, len); } +/* The size of the argument for each %ODP_KEY_ATTR_* Netlink attribute. */ +static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = { + [ODP_KEY_ATTR_TUN_ID] = 8, + [ODP_KEY_ATTR_IN_PORT] = 4, + [ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet), + [ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q), + [ODP_KEY_ATTR_ETHERTYPE] = 2, + [ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4), + [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6), + [ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp), + [ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp), + [ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp), + [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6), + [ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp), + [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd), +}; + /** * flow_from_nlattrs - parses Netlink attributes into a flow key. * @swkey: receives the extracted flow key. @@ -625,22 +642,6 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, prev_type = ODP_KEY_ATTR_UNSPEC; nla_for_each_nested(nla, attr, rem) { - static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = { - [ODP_KEY_ATTR_TUN_ID] = 8, - [ODP_KEY_ATTR_IN_PORT] = 4, - [ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet), - [ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q), - [ODP_KEY_ATTR_ETHERTYPE] = 2, - [ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4), - [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6), - [ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp), - [ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp), - [ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp), - [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6), - [ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp), - [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd), - }; - const struct odp_key_ethernet *eth_key; const struct odp_key_8021q *q_key; const struct odp_key_ipv4 *ipv4_key; @@ -868,6 +869,62 @@ ok: return error; } +/** + * flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. + * @in_port: receives the extracted input port. + * @tun_id: receives the extracted tunnel ID. + * @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute + * sequence. + * + * This parses a series of Netlink attributes that form a flow key, which must + * take the same form accepted by flow_from_nlattrs(), but only enough of it to + * get the metadata, that is, the parts of the flow key that cannot be + * extracted from the packet itself. + */ +int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, + const struct nlattr *attr) +{ + const struct nlattr *nla; + u16 prev_type; + int rem; + + *tun_id = 0; + + prev_type = ODP_KEY_ATTR_UNSPEC; + nla_for_each_nested(nla, attr, rem) { + int type = nla_type(nla); + + if (type > ODP_KEY_ATTR_MAX || nla_len(nla) != key_lens[type]) + return -EINVAL; + + switch (TRANSITION(prev_type, type)) { + case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_TUN_ID): + *tun_id = nla_get_be64(nla); + break; + + case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_IN_PORT): + case TRANSITION(ODP_KEY_ATTR_TUN_ID, ODP_KEY_ATTR_IN_PORT): + if (nla_get_u32(nla) >= DP_MAX_PORTS) + return -EINVAL; + *in_port = nla_get_u32(nla); + break; + + default: + goto done; + } + + prev_type = type; + } + if (rem) + return -EINVAL; + +done: + if (prev_type == ODP_KEY_ATTR_UNSPEC || + prev_type == ODP_KEY_ATTR_TUN_ID) + return -EINVAL; + return 0; +} + int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) { struct odp_key_ethernet *eth_key; diff --git a/datapath/flow.h b/datapath/flow.h index 1d75a018..3cda5967 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -152,6 +152,8 @@ int flow_cmp(const struct tbl_node *, void *target, int len); int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); +int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, + const struct nlattr *); static inline struct sw_flow *flow_cast(const struct tbl_node *node) { diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index d84b5fa7..2eda329a 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -34,6 +34,7 @@ #include "bitmap.h" #include "dpif-provider.h" +#include "dynamic-string.h" #include "netdev.h" #include "netdev-linux.h" #include "netdev-vport.h" @@ -784,6 +785,7 @@ dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) static int dpif_linux_execute(struct dpif *dpif_, + const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { @@ -801,6 +803,7 @@ dpif_linux_execute(struct dpif *dpif_, execute->dp_ifindex = dpif->dp_ifindex; nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size); + nl_msg_put_unspec(buf, ODP_PACKET_ATTR_KEY, key, key_len); nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len); error = nl_sock_transact(genl_sock, buf, NULL); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 53574aa0..5f37197e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -944,6 +944,7 @@ dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) static int dpif_netdev_execute(struct dpif *dpif, + const struct nlattr *key_attrs, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { @@ -975,7 +976,10 @@ dpif_netdev_execute(struct dpif *dpif, * if we don't. */ copy = *packet; } + flow_extract(©, 0, -1, &key); + dpif_netdev_flow_from_nlattrs(key_attrs, key_len, &key); + error = dp_netdev_execute_actions(dp, ©, &key, actions, actions_len); if (mutates) { ofpbuf_uninit(©); diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 8670906a..58d2f0b6 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -282,9 +282,14 @@ struct dpif_class { int (*flow_dump_done)(const struct dpif *dpif, void *state); /* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet - * frame specified in 'packet'. */ - int (*execute)(struct dpif *dpif, const struct nlattr *actions, - size_t actions_len, const struct ofpbuf *packet); + * frame specified in 'packet' taken from the flow specified in the + * 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but + * it contains some metadata that cannot be recovered from 'packet', such + * as tun_id and in_port.) */ + int (*execute)(struct dpif *dpif, + const struct nlattr *key, size_t key_len, + const struct nlattr *actions, size_t actions_len, + const struct ofpbuf *packet); /* Retrieves 'dpif''s "listen mask" into '*listen_mask'. A 1-bit of value * 2**X set in '*listen_mask' indicates that 'dpif' will receive messages diff --git a/lib/dpif.c b/lib/dpif.c index 74e98569..1106d6be 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -914,11 +914,15 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump) } /* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on - * the Ethernet frame specified in 'packet'. + * the Ethernet frame specified in 'packet' taken from the flow specified in + * the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but + * it contains some metadata that cannot be recovered from 'packet', such as + * tun_id and in_port.) * * Returns 0 if successful, otherwise a positive errno value. */ int dpif_execute(struct dpif *dpif, + const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *buf) { @@ -926,7 +930,8 @@ dpif_execute(struct dpif *dpif, COVERAGE_INC(dpif_execute); if (actions_len > 0) { - error = dpif->dpif_class->execute(dpif, actions, actions_len, buf); + error = dpif->dpif_class->execute(dpif, key, key_len, + actions, actions_len, buf); } else { error = 0; } diff --git a/lib/dpif.h b/lib/dpif.h index 8452349b..60e3cb48 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -148,8 +148,10 @@ bool dpif_flow_dump_next(struct dpif_flow_dump *, const struct dpif_flow_stats **); int dpif_flow_dump_done(struct dpif_flow_dump *); -int dpif_execute(struct dpif *, const struct nlattr *actions, - size_t actions_len, const struct ofpbuf *); +int dpif_execute(struct dpif *, + const struct nlattr *key, size_t key_len, + const struct nlattr *actions, size_t actions_len, + const struct ofpbuf *); enum dpif_upcall_type { DPIF_UC_MISS, /* Miss in flow table. */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 2f8d404b..c5fe2820 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -2007,9 +2007,16 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, return true; } else { + struct odputil_keybuf keybuf; + struct ofpbuf key; int error; - error = dpif_execute(ofproto->dpif, odp_actions, actions_len, packet); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); + + error = dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions, actions_len, packet); + ofpbuf_delete(packet); return !error; } @@ -2671,12 +2678,20 @@ static int send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, const struct ofpbuf *packet) { - struct ofpbuf odp_actions; + struct ofpbuf key, odp_actions; + struct odputil_keybuf keybuf; + struct flow flow; int error; + flow_extract((struct ofpbuf *) packet, 0, 0, &flow); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow); + ofpbuf_init(&odp_actions, 32); nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port); - error = dpif_execute(ofproto->dpif, odp_actions.data, odp_actions.size, + error = dpif_execute(ofproto->dpif, + key.data, key.size, + odp_actions.data, odp_actions.size, packet); ofpbuf_uninit(&odp_actions); @@ -3721,13 +3736,18 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet, error = validate_actions(ofp_actions, n_ofp_actions, flow, ofproto->max_ports); if (!error) { + struct odputil_keybuf keybuf; struct action_xlate_ctx ctx; struct ofpbuf *odp_actions; + struct ofpbuf key; + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); action_xlate_ctx_init(&ctx, ofproto, flow, packet); odp_actions = xlate_actions(&ctx, ofp_actions, n_ofp_actions); - dpif_execute(ofproto->dpif, odp_actions->data, odp_actions->size, - packet); + dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions->data, odp_actions->size, packet); ofpbuf_delete(odp_actions); } return error;