From: Pravin B Shelar Date: Tue, 1 Nov 2011 17:13:16 +0000 (-0700) Subject: datapath: Convert kernel priority actions into match/set. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=abff858b5ad310a529d5a5ac2a230ee4ac9736db;p=openvswitch datapath: Convert kernel priority actions into match/set. Following patch adds skb-priority to flow key. So userspace will know what was priority when packet arrived and we can remove the pop/reset priority action. It's no longer necessary to have a special action for pop that is based on the kernel remembering original skb->priority. Userspace can just emit a set priority action with the original value. Since the priority field is a match field with just a normal set action, we can convert it into the new model for actions that are based on matches. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Bug #7715 --- diff --git a/datapath/actions.c b/datapath/actions.c index 8ca243df..4db2563c 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -314,6 +314,10 @@ static int execute_set_action(struct sk_buff *skb, int err = 0; switch (nla_type(nested_attr)) { + case OVS_KEY_ATTR_PRIORITY: + skb->priority = nla_get_u32(nested_attr); + break; + case OVS_KEY_ATTR_TUN_ID: OVS_CB(skb)->tun_id = nla_get_be64(nested_attr); break; @@ -347,7 +351,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, * then freeing the original skbuff is wasteful. So the following code * is slightly obscure just to avoid that. */ int prev_port = -1; - u32 priority = skb->priority; const struct nlattr *a; int rem; @@ -385,14 +388,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_set_action(skb, nla_data(a)); break; - case OVS_ACTION_ATTR_SET_PRIORITY: - skb->priority = nla_get_u32(a); - break; - - case OVS_ACTION_ATTR_POP_PRIORITY: - skb->priority = priority; - break; - case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, a); break; diff --git a/datapath/datapath.c b/datapath/datapath.c index be90d549..87056cf8 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -558,6 +558,7 @@ static int validate_action_key(const struct nlattr *a, const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_8021q *q_key; + case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_PRIORITY): case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_TUN_ID): case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_ETHERNET): break; @@ -652,8 +653,6 @@ static int validate_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_PUSH] = (u32)-1, [OVS_ACTION_ATTR_POP] = 2, [OVS_ACTION_ATTR_SET] = (u32)-1, - [OVS_ACTION_ATTR_SET_PRIORITY] = 4, - [OVS_ACTION_ATTR_POP_PRIORITY] = 0, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 }; int type = nla_type(a); @@ -667,11 +666,6 @@ static int validate_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; - case OVS_ACTION_ATTR_SET_PRIORITY: - case OVS_ACTION_ATTR_POP_PRIORITY: - /* No validation needed. */ - break; - case OVS_ACTION_ATTR_USERSPACE: err = validate_userspace(a); if (err) @@ -770,8 +764,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_put; - err = flow_metadata_from_nlattrs(&flow->key.eth.in_port, - &flow->key.eth.tun_id, + err = flow_metadata_from_nlattrs(&flow->key.phy.priority, + &flow->key.phy.in_port, + &flow->key.phy.tun_id, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_put; @@ -789,6 +784,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); OVS_CB(packet)->flow = flow; + packet->priority = flow->key.phy.priority; rcu_read_lock(); dp = get_dp(ovs_header->dp_ifindex); @@ -796,9 +792,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock; - if (flow->key.eth.in_port < DP_MAX_PORTS) + if (flow->key.phy.in_port < DP_MAX_PORTS) OVS_CB(packet)->vport = get_vport_protected(dp, - flow->key.eth.in_port); + flow->key.phy.in_port); local_bh_disable(); err = execute_actions(dp, packet); diff --git a/datapath/flow.c b/datapath/flow.c index b6023a08..0084ca20 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -638,8 +638,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, struct ethhdr *eth; memset(key, 0, sizeof(*key)); - key->eth.tun_id = OVS_CB(skb)->tun_id; - key->eth.in_port = in_port; + + key->phy.priority = skb->priority; + key->phy.tun_id = OVS_CB(skb)->tun_id; + key->phy.in_port = in_port; skb_reset_mac_header(skb); @@ -849,6 +851,7 @@ static int parse_tos_frag(struct sw_flow_key *swkey, u8 tos, u8 frag) /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_PRIORITY] = 4, [OVS_KEY_ATTR_TUN_ID] = 8, [OVS_KEY_ATTR_IN_PORT] = 4, [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), @@ -874,7 +877,7 @@ const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { * This state machine accepts the following forms, with [] for optional * elements and | for alternatives: * - * [tun_id] [in_port] ethernet [8021q] [ethertype \ + * [priority] [tun_id] [in_port] ethernet [8021q] [ethertype \ * [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]] * * except that IPv4 or IPv6 terminates the sequence if its @ipv4_frag or @@ -891,7 +894,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int key_len; memset(swkey, 0, sizeof(*swkey)); - swkey->eth.in_port = USHRT_MAX; + swkey->phy.in_port = USHRT_MAX; swkey->eth.type = htons(ETH_P_802_2); key_len = SW_FLOW_KEY_OFFSET(eth); @@ -915,18 +918,25 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, #define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE)) switch (TRANSITION(prev_type, type)) { + case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_PRIORITY): + swkey->phy.priority = nla_get_u32(nla); + break; + case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_TUN_ID): - swkey->eth.tun_id = nla_get_be64(nla); + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_TUN_ID): + swkey->phy.tun_id = nla_get_be64(nla); break; case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_IN_PORT): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_IN_PORT): case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_IN_PORT): if (nla_get_u32(nla) >= DP_MAX_PORTS) goto invalid; - swkey->eth.in_port = nla_get_u32(nla); + swkey->phy.in_port = nla_get_u32(nla); break; case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_ETHERNET): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_ETHERNET): case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_ETHERNET): case TRANSITION(OVS_KEY_ATTR_IN_PORT, OVS_KEY_ATTR_ETHERNET): eth_key = nla_data(nla); @@ -1073,6 +1083,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, case OVS_KEY_ATTR_UNSPEC: goto invalid; + case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_IN_PORT: goto invalid; @@ -1148,7 +1159,7 @@ ok: * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, +int flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, const struct nlattr *attr) { const struct nlattr *nla; @@ -1157,6 +1168,7 @@ int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, *in_port = USHRT_MAX; *tun_id = 0; + *priority = 0; prev_type = OVS_KEY_ATTR_UNSPEC; nla_for_each_nested(nla, attr, rem) { @@ -1166,11 +1178,17 @@ int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, return -EINVAL; switch (TRANSITION(prev_type, type)) { + case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_PRIORITY): + *priority = nla_get_u32(nla); + break; + case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_TUN_ID): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_TUN_ID): *tun_id = nla_get_be64(nla); break; case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_IN_PORT): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_IN_PORT): case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_IN_PORT): if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; @@ -1196,13 +1214,16 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) /* This is an imperfect sanity-check that FLOW_BUFSIZE doesn't need * to be updated, but will at least raise awareness when new * datapath key types are added. */ - BUILD_BUG_ON(__OVS_KEY_ATTR_MAX != 14); + BUILD_BUG_ON(__OVS_KEY_ATTR_MAX != 15); + + if (swkey->phy.priority) + NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority); - if (swkey->eth.tun_id != cpu_to_be64(0)) - NLA_PUT_BE64(skb, OVS_KEY_ATTR_TUN_ID, swkey->eth.tun_id); + if (swkey->phy.tun_id != cpu_to_be64(0)) + NLA_PUT_BE64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id); - if (swkey->eth.in_port != USHRT_MAX) - NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->eth.in_port); + if (swkey->phy.in_port != USHRT_MAX) + NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port); nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) diff --git a/datapath/flow.h b/datapath/flow.h index 484ea626..e68269e0 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -36,8 +36,11 @@ struct sw_flow_actions { struct sw_flow_key { struct { - __be64 tun_id; /* Encapsulating tunnel ID. */ - u16 in_port; /* Input switch port (or USHRT_MAX). */ + __be64 tun_id; /* Encapsulating tunnel ID. */ + u32 priority; /* Packet QoS priority. */ + u16 in_port; /* Input switch port (or USHRT_MAX). */ + } phy; + struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ @@ -138,6 +141,7 @@ u64 flow_used_time(unsigned long flow_jiffies); * * struct pad nl hdr total * ------ --- ------ ----- + * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 @@ -147,14 +151,14 @@ u64 flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 132 + * total 140 */ -#define FLOW_BUFSIZE 132 +#define FLOW_BUFSIZE 140 int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, +int flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, const struct nlattr *); #define TBL_MIN_BUCKETS 1024 diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 087655c5..57ce4ef7 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -263,6 +263,7 @@ struct ovs_flow_stats { enum ovs_key_attr { OVS_KEY_ATTR_UNSPEC, + OVS_KEY_ATTR_PRIORITY, /* 32-bit skb->priority */ OVS_KEY_ATTR_TUN_ID, /* 64-bit tunnel ID */ OVS_KEY_ATTR_IN_PORT, /* 32-bit OVS dp port number */ OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ @@ -449,9 +450,6 @@ enum ovs_userspace_attr { * header. * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. * The argument takes the same form as %OVS_ACTION_ATTR_PUSH. - * @OVS_ACTION_ATTR_SET_PRIORITY: Sets skb->priority to 32-bit number passed - * as argument. - * @OVS_ACTION_ATTR_POP_PRIORITY: Restore skb->priority to original value. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. * @@ -466,8 +464,6 @@ enum ovs_action_attr { OVS_ACTION_ATTR_PUSH, /* One nested OVS_KEY_ATTR_*. */ OVS_ACTION_ATTR_POP, /* u16 OVS_KEY_ATTR_*. */ OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ - OVS_ACTION_ATTR_SET_PRIORITY, /* u32 skb->priority value. */ - OVS_ACTION_ATTR_POP_PRIORITY, /* No argument. */ OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX }; diff --git a/lib/classifier.c b/lib/classifier.c index 869029f5..8ffc96f5 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -83,6 +83,7 @@ cls_rule_init_exact(const struct flow *flow, unsigned int priority, struct cls_rule *rule) { rule->flow = *flow; + rule->flow.priority = 0; flow_wildcards_init_exact(&rule->wc); rule->priority = priority; } diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index c5628b7e..7d0628c3 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1275,7 +1275,7 @@ dpif_linux_vport_send(int dp_ifindex, uint32_t port_no, uint64_t action; ofpbuf_use_const(&packet, data, size); - flow_extract(&packet, htonll(0), 0, &flow); + flow_extract(&packet, 0, htonll(0), 0, &flow); ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, &flow); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1d05dd8d..b4e788ff 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -881,7 +881,7 @@ dpif_netdev_execute(struct dpif *dpif, ofpbuf_reserve(©, DP_NETDEV_HEADROOM); ofpbuf_put(©, packet->data, packet->size); - flow_extract(©, 0, -1, &key); + flow_extract(©, 0, 0, -1, &key); error = dpif_netdev_flow_from_nlattrs(key_attrs, key_len, &key); if (!error) { dp_netdev_execute_actions(dp, ©, &key, @@ -981,7 +981,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, if (packet->size < ETH_HEADER_LEN) { return; } - flow_extract(packet, 0, port->port_no, &key); + flow_extract(packet, 0, 0, port->port_no, &key); flow = dp_netdev_lookup_flow(dp, &key); if (flow) { dp_netdev_flow_used(flow, &key, packet); @@ -1239,6 +1239,8 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a) enum ovs_key_attr type = nl_attr_type(a); switch (type) { case OVS_KEY_ATTR_TUN_ID: + case OVS_KEY_ATTR_PRIORITY: + /* not implemented */ break; case OVS_KEY_ATTR_ETHERNET: @@ -1319,11 +1321,6 @@ dp_netdev_execute_actions(struct dp_netdev *dp, dp_netdev_sample(dp, packet, key, a); break; - case OVS_ACTION_ATTR_SET_PRIORITY: - case OVS_ACTION_ATTR_POP_PRIORITY: - /* not implemented */ - break; - case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: NOT_REACHED(); diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 83d56d63..429cc9d7 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -318,8 +318,7 @@ struct dpif_class { int (*recv_set_mask)(struct dpif *dpif, int listen_mask); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a - * priority value for use in the OVS_ACTION_ATTR_SET_PRIORITY action in - * '*priority'. */ + * priority value used for setting packet priority. */ int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id, uint32_t *priority); diff --git a/lib/dpif.c b/lib/dpif.c index 68a95f67..cc6e805f 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1129,9 +1129,9 @@ dpif_get_netflow_ids(const struct dpif *dpif, } /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority - * value for use in the OVS_ACTION_ATTR_SET_PRIORITY action. On success, - * returns 0 and stores the priority into '*priority'. On failure, returns a - * positive errno value and stores 0 into '*priority'. */ + * value used for setting packet priority. + * On success, returns 0 and stores the priority into '*priority'. + * On failure, returns a positive errno value and stores 0 into '*priority'. */ int dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, uint32_t *priority) diff --git a/lib/flow.c b/lib/flow.c index 06cc822a..1263734b 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -324,8 +324,8 @@ invalid: * present and has a correct length, and otherwise NULL. */ void -flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, - struct flow *flow) +flow_extract(struct ofpbuf *packet, uint32_t priority, ovs_be64 tun_id, + uint16_t ofp_in_port, struct flow *flow) { struct ofpbuf b = *packet; struct eth_header *eth; @@ -335,6 +335,7 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, memset(flow, 0, sizeof *flow); flow->tun_id = tun_id; flow->in_port = ofp_in_port; + flow->priority = priority; packet->l2 = b.data; packet->l3 = NULL; @@ -484,6 +485,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards) if (wc & FWW_ND_TARGET) { memset(&flow->nd_target, 0, sizeof flow->nd_target); } + flow->priority = 0; } char * @@ -499,8 +501,14 @@ flow_format(struct ds *ds, const struct flow *flow) { int frag; - ds_put_format(ds, "tunnel%#"PRIx64":in_port%04"PRIx16":tci(", - ntohll(flow->tun_id), flow->in_port); + ds_put_format(ds, "priority%"PRIu32 + ":tunnel%#"PRIx64 + ":in_port%04"PRIx16, + flow->priority, + ntohll(flow->tun_id), + flow->in_port); + + ds_put_format(ds, ":tci("); if (flow->vlan_tci) { ds_put_format(ds, "vlan%"PRIu16",pcp%d", vlan_tci_to_vid(flow->vlan_tci), diff --git a/lib/flow.h b/lib/flow.h index e9da2ad4..0abfe5a4 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -54,6 +54,7 @@ BUILD_ASSERT_DECL(FLOW_FRAG_LATER == NX_IP_FRAG_LATER); struct flow { ovs_be64 tun_id; /* Encapsulating tunnel ID. */ + uint32_t priority; /* Packet priority for QoS. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ ovs_be32 nw_src; /* IPv4 source address. */ ovs_be32 nw_dst; /* IPv4 destination address. */ @@ -71,21 +72,22 @@ struct flow { struct in6_addr ipv6_src; /* IPv6 source address. */ struct in6_addr ipv6_dst; /* IPv6 destination address. */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ + uint32_t reserved; /* Reserved for 64-bit packing. */ }; /* Assert that there are FLOW_SIG_SIZE bytes of significant data in "struct * flow", followed by FLOW_PAD_SIZE bytes of padding. */ -#define FLOW_SIG_SIZE (100 + FLOW_N_REGS * 4) -#define FLOW_PAD_SIZE 0 +#define FLOW_SIG_SIZE (104 + FLOW_N_REGS * 4) +#define FLOW_PAD_SIZE 4 BUILD_ASSERT_DECL(offsetof(struct flow, nd_target) == FLOW_SIG_SIZE - 16); BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->nd_target) == 16); BUILD_ASSERT_DECL(sizeof(struct flow) == FLOW_SIG_SIZE + FLOW_PAD_SIZE); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ -BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 3); +BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 124 && FLOW_WC_SEQ == 3); -void flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port, - struct flow *); +void flow_extract(struct ofpbuf *, uint32_t priority, ovs_be64 tun_id, + uint16_t in_port, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); char *flow_to_string(const struct flow *); diff --git a/lib/learning-switch.c b/lib/learning-switch.c index b2608629..ecc5509e 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -419,7 +419,7 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, pkt_ofs = offsetof(struct ofp_packet_in, data); pkt_len = ntohs(opi->header.length) - pkt_ofs; ofpbuf_use_const(&pkt, opi->data, pkt_len); - flow_extract(&pkt, 0, in_port, &flow); + flow_extract(&pkt, 0, 0, in_port, &flow); /* Choose output port. */ out_port = lswitch_choose_destination(sw, &flow); diff --git a/lib/odp-util.c b/lib/odp-util.c index 1e20f49d..1e9289a7 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -65,8 +65,6 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_PUSH: return -2; case OVS_ACTION_ATTR_POP: return 2; case OVS_ACTION_ATTR_SET: return -2; - case OVS_ACTION_ATTR_SET_PRIORITY: return 4; - case OVS_ACTION_ATTR_POP_PRIORITY: return 0; case OVS_ACTION_ATTR_SAMPLE: return -2; case OVS_ACTION_ATTR_UNSPEC: @@ -206,12 +204,6 @@ format_odp_action(struct ds *ds, const struct nlattr *a) ds_put_format(ds, "pop(key%"PRIu16")", nl_attr_get_u16(a)); } break; - case OVS_ACTION_ATTR_SET_PRIORITY: - ds_put_format(ds, "set_priority(%#"PRIx32")", nl_attr_get_u32(a)); - break; - case OVS_ACTION_ATTR_POP_PRIORITY: - ds_put_cstr(ds, "pop_priority"); - break; case OVS_ACTION_ATTR_SAMPLE: format_odp_sample_action(ds, a); break; @@ -258,6 +250,7 @@ odp_flow_key_attr_len(uint16_t type) } switch ((enum ovs_key_attr) type) { + case OVS_KEY_ATTR_PRIORITY: return 4; case OVS_KEY_ATTR_TUN_ID: return 8; case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); @@ -339,6 +332,10 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) } switch (nl_attr_type(a)) { + case OVS_KEY_ATTR_PRIORITY: + ds_put_format(ds, "priority(%"PRIu32")", nl_attr_get_u32(a)); + break; + case OVS_KEY_ATTR_TUN_ID: ds_put_format(ds, "tun_id(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); break; @@ -528,6 +525,16 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key) * The tun_id parser has to use an alternative approach because there is no * type larger than 64 bits. */ + { + unsigned long long int priority; + int n = -1; + + if (sscanf(s, "priority(%lli)%n", &priority, &n) > 0 && n > 0) { + nl_msg_put_u32(key, OVS_KEY_ATTR_PRIORITY, priority); + return n; + } + } + { char tun_id_s[32]; int n = -1; @@ -825,6 +832,10 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) { struct ovs_key_ethernet *eth_key; + if (flow->priority) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, flow->priority); + } + if (flow->tun_id != htonll(0)) { nl_msg_put_be64(buf, OVS_KEY_ATTR_TUN_ID, flow->tun_id); } @@ -992,11 +1003,17 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, #define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE)) switch (TRANSITION(prev_type, type)) { + case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_PRIORITY): + flow->priority = nl_attr_get_u32(nla); + break; + case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_TUN_ID): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_TUN_ID): flow->tun_id = nl_attr_get_be64(nla); break; case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_IN_PORT): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_IN_PORT): case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_IN_PORT): if (nl_attr_get_u32(nla) >= UINT16_MAX) { return EINVAL; @@ -1005,6 +1022,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, break; case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_ETHERNET): + case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_ETHERNET): case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_ETHERNET): case TRANSITION(OVS_KEY_ATTR_IN_PORT, OVS_KEY_ATTR_ETHERNET): eth_key = nl_attr_get(nla); @@ -1138,6 +1156,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, case OVS_KEY_ATTR_UNSPEC: return EINVAL; + case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_IN_PORT: return EINVAL; diff --git a/lib/odp-util.h b/lib/odp-util.h index 8c7d4d14..eceeae6b 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -66,6 +66,7 @@ void format_odp_actions(struct ds *, const struct nlattr *odp_actions, * * struct pad nl hdr total * ------ --- ------ ----- + * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 @@ -75,14 +76,14 @@ void format_odp_actions(struct ds *, const struct nlattr *odp_actions, * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 132 + * total 140 */ -#define ODPUTIL_FLOW_KEY_BYTES 132 +#define ODPUTIL_FLOW_KEY_BYTES 140 /* This is an imperfect sanity-check that ODPUTIL_FLOW_KEY_BYTES doesn't * need to be updated, but will at least raise awareness when new OVS * datapath key types are added. */ -BUILD_ASSERT_DECL(__OVS_KEY_ATTR_MAX == 14); +BUILD_ASSERT_DECL(__OVS_KEY_ATTR_MAX == 15); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 6278395d..fe852b4e 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -140,7 +140,7 @@ ofp_print_packet_in(struct ds *string, const struct ofp_packet_in *op, struct ofpbuf packet; ofpbuf_use_const(&packet, op->data, data_len); - flow_extract(&packet, 0, ntohs(op->in_port), &flow); + flow_extract(&packet, 0, 0, ntohs(op->in_port), &flow); flow_format(string, &flow); ds_put_char(string, '\n'); } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d0a5e004..3cceb6be 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -205,9 +205,8 @@ struct action_xlate_ctx { * reason to look at them. */ int recurse; /* Recursion level, via xlate_table_action. */ - uint32_t priority; /* Current flow priority. 0 if none. */ struct flow base_flow; /* Flow at the last commit. */ - uint32_t base_priority; /* Priority at the last commit. */ + uint32_t original_priority; /* Priority when packet arrived. */ uint8_t table_id; /* OpenFlow table ID where flow was found. */ uint32_t sflow_n_outputs; /* Number of output ports. */ uint16_t sflow_odp_port; /* Output port for composing sFlow action. */ @@ -2257,7 +2256,8 @@ handle_miss_upcalls(struct ofproto_dpif *ofproto, struct dpif_upcall *upcalls, /* Obtain in_port and tun_id, at least, then set 'flow''s header * pointers. */ odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); - flow_extract(upcall->packet, flow.tun_id, flow.in_port, &flow); + flow_extract(upcall->packet, flow.priority, flow.tun_id, + flow.in_port, &flow); /* Handle 802.1ag, LACP, and STP specially. */ if (process_special(ofproto, &flow, upcall->packet)) { @@ -3449,7 +3449,7 @@ send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, struct flow flow; int error; - flow_extract((struct ofpbuf *) packet, 0, 0, &flow); + flow_extract((struct ofpbuf *) packet, 0, 0, 0, &flow); ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, &flow); @@ -3722,19 +3722,17 @@ commit_set_port_action(const struct flow *flow, struct flow *base, } static void -commit_priority_action(struct action_xlate_ctx *ctx) +commit_set_priority_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) { - if (ctx->base_priority == ctx->priority) { + if (base->priority == flow->priority) { return; } + base->priority = flow->priority; - if (ctx->priority) { - nl_msg_put_u32(ctx->odp_actions, - OVS_ACTION_ATTR_SET_PRIORITY, ctx->priority); - } else { - nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_PRIORITY); - } - ctx->base_priority = ctx->priority; + commit_action__(odp_actions, OVS_ACTION_ATTR_SET, + OVS_KEY_ATTR_PRIORITY, &base->priority, + sizeof(base->priority)); } static void @@ -3749,7 +3747,7 @@ commit_odp_actions(struct action_xlate_ctx *ctx) commit_vlan_action(ctx, flow->vlan_tci); commit_set_nw_action(flow, base, odp_actions); commit_set_port_action(flow, base, odp_actions); - commit_priority_action(ctx); + commit_set_priority_action(flow, base, odp_actions); } static void @@ -3958,7 +3956,7 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx, const struct ofp_action_enqueue *oae) { uint16_t ofp_port, odp_port; - uint32_t ctx_priority, priority; + uint32_t flow_priority, priority; int error; error = dpif_queue_to_priority(ctx->ofproto->dpif, ntohl(oae->queue_id), @@ -3979,10 +3977,10 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx, odp_port = ofp_port_to_odp_port(ofp_port); /* Add datapath actions. */ - ctx_priority = ctx->priority; - ctx->priority = priority; + flow_priority = ctx->flow.priority; + ctx->flow.priority = priority; add_output_action(ctx, odp_port); - ctx->priority = ctx_priority; + ctx->flow.priority = flow_priority; /* Update NetFlow output port. */ if (ctx->nf_output_iface == NF_OUT_DROP) { @@ -4007,7 +4005,7 @@ xlate_set_queue_action(struct action_xlate_ctx *ctx, return; } - ctx->priority = priority; + ctx->flow.priority = priority; } struct xlate_reg_state { @@ -4205,7 +4203,7 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, break; case OFPUTIL_NXAST_POP_QUEUE: - ctx->priority = 0; + ctx->flow.priority = ctx->original_priority; break; case OFPUTIL_NXAST_REG_MOVE: @@ -4304,8 +4302,7 @@ xlate_actions(struct action_xlate_ctx *ctx, ctx->has_normal = false; ctx->nf_output_iface = NF_OUT_DROP; ctx->recurse = 0; - ctx->priority = 0; - ctx->base_priority = 0; + ctx->original_priority = ctx->flow.priority; ctx->base_flow = ctx->flow; ctx->base_flow.tun_id = 0; ctx->table_id = 0; @@ -5215,7 +5212,7 @@ static void ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, void *aux OVS_UNUSED) { - char *dpname, *arg1, *arg2, *arg3; + char *dpname, *arg1, *arg2, *arg3, *arg4; char *args = xstrdup(args_); char *save_ptr = NULL; struct ofproto_dpif *ofproto; @@ -5233,7 +5230,8 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, dpname = strtok_r(args, " ", &save_ptr); arg1 = strtok_r(NULL, " ", &save_ptr); arg2 = strtok_r(NULL, " ", &save_ptr); - arg3 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ + arg3 = strtok_r(NULL, " ", &save_ptr); + arg4 = strtok_r(NULL, "", &save_ptr); /* Get entire rest of line. */ if (dpname && arg1 && (!arg2 || !strcmp(arg2, "-generate")) && !arg3) { /* ofproto/trace dpname flow [-generate] */ int error; @@ -5258,18 +5256,20 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, packet = ofpbuf_new(0); flow_compose(packet, &flow); } - } else if (dpname && arg1 && arg2 && arg3) { - /* ofproto/trace dpname tun_id in_port packet */ + } else if (dpname && arg1 && arg2 && arg3 && arg4) { + /* ofproto/trace dpname priority tun_id in_port packet */ uint16_t in_port; ovs_be64 tun_id; + uint32_t priority; - tun_id = htonll(strtoull(arg1, NULL, 0)); - in_port = ofp_port_to_odp_port(atoi(arg2)); + priority = atoi(arg1); + tun_id = htonll(strtoull(arg2, NULL, 0)); + in_port = ofp_port_to_odp_port(atoi(arg3)); packet = ofpbuf_new(strlen(args) / 2); - arg3 = ofpbuf_put_hex(packet, arg3, NULL); - arg3 += strspn(arg3, " "); - if (*arg3 != '\0') { + arg4 = ofpbuf_put_hex(packet, arg4, NULL); + arg4 += strspn(arg4, " "); + if (*arg4 != '\0') { unixctl_command_reply(conn, 501, "Trailing garbage in command"); goto exit; } @@ -5284,7 +5284,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, const char *args_, ds_put_cstr(&result, s); free(s); - flow_extract(packet, tun_id, in_port, &flow); + flow_extract(packet, priority, tun_id, in_port, &flow); } else { unixctl_command_reply(conn, 501, "Bad command syntax"); goto exit; diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man index 2cf7bfaa..5ae90f22 100644 --- a/ofproto/ofproto-unixctl.man +++ b/ofproto/ofproto-unixctl.man @@ -6,13 +6,15 @@ These commands manage the core OpenFlow switch implementation (called Lists the names of the running ofproto instances. These are the names that may be used on \fBofproto/trace\fR. . -.IP "\fBofproto/trace \fIswitch tun_id in_port packet\fR" +.IP "\fBofproto/trace \fIswitch priority tun_id in_port packet\fR" .IQ "\fBofproto/trace \fIswitch odp_flow \fB\-generate\fR" Traces the path of an imaginary packet through \fIswitch\fR. Both forms require \fIswitch\fR, the switch on which the packet arrived (one of those listed by \fBofproto/list\fR). The first form specifies a packet's contents explicitly: .RS +.IP "\fIpriority\fR" +Packet QoS priority. Use \fB0\fR if QoS is not setup. .IP "\fItun_id\fR" The tunnel ID on which the packet arrived. Use \fB0\fR if the packet did not arrive through a tunnel. diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 9a302d54..50c52983 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1607,7 +1607,7 @@ rule_execute(struct rule *rule, uint16_t in_port, struct ofpbuf *packet) assert(ofpbuf_headroom(packet) >= sizeof(struct ofp_packet_in)); - flow_extract(packet, 0, in_port, &flow); + flow_extract(packet, 0, 0, in_port, &flow); return rule->ofproto->ofproto_class->rule_execute(rule, &flow, packet); } @@ -1769,7 +1769,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) } /* Send out packet. */ - flow_extract(&payload, 0, ntohs(opo->in_port), &flow); + flow_extract(&payload, 0, 0, ntohs(opo->in_port), &flow); error = p->ofproto_class->packet_out(p, &payload, &flow, ofp_actions, n_ofp_actions); ofpbuf_delete(buffer); diff --git a/tests/odp.at b/tests/odp.at index 65a9fb93..7157ceaf 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -32,6 +32,10 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp echo '# Valid forms with VLAN header.' sed 's/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt + echo + echo '# Valid forms with QoS priority.' + sed 's/^/priority(1234),/' odp-base.txt + echo echo '# Valid forms with tun_id and VLAN headers.' sed 's/^/tun_id(0xfedcba9876543210),/ diff --git a/tests/ofp-print.at b/tests/ofp-print.at index 4c190a11..1f5de782 100644 --- a/tests/ofp-print.at +++ b/tests/ofp-print.at @@ -237,7 +237,7 @@ dnl The tcpdump output format differs slightly from one version to another, dnl so trim off the end of the line where differences appear. AT_CHECK([sed 's/\(length 60:\).*/\1 .../' stdout], [0], [dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=3 data_len=60 buffer=0x00000111 -tunnel0:in_port0003:tci(0) mac50:54:00:00:00:05->50:54:00:00:00:06 type0800 proto6 tos0 ip192.168.0.1->192.168.0.2 port10031->0 +priority0:tunnel0:in_port0003:tci(0) mac50:54:00:00:00:05->50:54:00:00:00:06 type0800 proto6 tos0 ip192.168.0.1->192.168.0.2 port10031->0 50:54:00:00:00:05 > 50:54:00:00:00:06, ethertype IPv4 (0x0800), length 60: ... ]) AT_CLEANUP diff --git a/tests/test-flows.c b/tests/test-flows.c index 57157c97..9d36eb17 100644 --- a/tests/test-flows.c +++ b/tests/test-flows.c @@ -68,7 +68,7 @@ main(int argc OVS_UNUSED, char *argv[]) ovs_fatal(retval, "error reading pcap file"); } - flow_extract(packet, 0, 1, &flow); + flow_extract(packet, 0, 0, 1, &flow); cls_rule_init_exact(&flow, 0, &rule); ofputil_cls_rule_to_match(&rule, &extracted_match);