From: Ben Pfaff Date: Thu, 20 Oct 2011 04:33:44 +0000 (-0700) Subject: Implement new fragment handling policy. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7257b535ab8e5fafd811c5f6788205eefdd44948;p=openvswitch Implement new fragment handling policy. Until now, OVS has handled IP fragments more awkwardly than necessary. It has not been possible to match on L4 headers, even in fragments with offset 0 where they are actually present. This means that there was no way to implement ACLs that treat, say, different TCP ports differently, on fragmented traffic; instead, all decisions for fragment forwarding had to be made on the basis of L2 and L3 headers alone. This commit improves the situation significantly. It is still not possible to match on L4 headers in fragments with nonzero offset, because that information is simply not present in such fragments, but this commit adds the ability to match on L4 headers for fragments with zero offset. This means that it becomes possible to implement ACLs that drop such "first fragments" on the basis of L4 headers. In practice, that effectively blocks even fragmented traffic on an L4 basis, because the receiving IP stack cannot reassemble a full packet when the first fragment is missing. This commit works by adding a new "fragment type" to the kernel flow match and making it available through OpenFlow as a new NXM field named NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields are always 0 for IP fragments, it adds a new OpenFlow fragment handling mode that fills in the L4 fields for "first fragments". It also enhances ovs-ofctl to allow users to configure this new fragment handling mode and to parse the new field. Signed-off-by: Ben Pfaff Bug #7557. --- diff --git a/NEWS b/NEWS index ff3bc441..a05c1979 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,14 @@ Post-v1.2.0 new NXAST_RESUBMIT_TABLE action can look up in additional tables. Tables 128 and above are reserved for use by the switch itself; please use only tables 0 through 127. + - Fragment handling extensions: + - New OFPC_FRAG_NX_MATCH fragment handling mode, in which L4 + fields are made available for matching in fragments with + offset 0. + - New NXM_NX_IP_FRAG match field for matching IP fragments (usable + via "ip_frag" in ovs-ofctl). + - New ovs-ofctl "get-frags" and "set-frags" commands to get and set + fragment handling policy. - CAPWAP tunneling now supports an extension to transport a 64-key. By default it remains compatible with the old version and other standards-based implementations. diff --git a/datapath/datapath.c b/datapath/datapath.c index a3be3254..10bf4b9e 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -68,8 +68,8 @@ EXPORT_SYMBOL(dp_ioctl_hook); * etc.) are protected by RTNL. * * Writes to other state (flow table modifications, set miscellaneous datapath - * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL - * lock nests inside genl_mutex. + * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside + * genl_mutex. * * Reads are protected by RCU. * @@ -84,8 +84,10 @@ EXPORT_SYMBOL(dp_ioctl_hook); static LIST_HEAD(dps); static struct vport *new_vport(const struct vport_parms *); -static int queue_userspace_packets(struct datapath *, struct sk_buff *, - const struct dp_upcall_info *); +static int queue_gso_packets(int dp_ifindex, struct sk_buff *, + const struct dp_upcall_info *); +static int queue_userspace_packet(int dp_ifindex, struct sk_buff *, + const struct dp_upcall_info *); /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ struct datapath *get_dp(int dp_ifindex) @@ -289,21 +291,14 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) if (!OVS_CB(skb)->flow) { struct sw_flow_key key; int key_len; - bool is_frag; /* Extract flow from 'skb' into 'key'. */ - error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag); + error = flow_extract(skb, p->port_no, &key, &key_len); if (unlikely(error)) { kfree_skb(skb); return; } - if (is_frag && dp->drop_frags) { - consume_skb(skb); - stats_counter = &stats->n_frags; - goto out; - } - /* Look up flow. */ flow = flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); if (unlikely(!flow)) { @@ -360,8 +355,8 @@ static struct genl_family dp_packet_genl_family = { int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { - struct sk_buff *segs = NULL; struct dp_stats_percpu *stats; + int dp_ifindex; int err; if (upcall_info->pid == 0) { @@ -369,30 +364,18 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb, goto err; } - forward_ip_summed(skb, true); - - /* Break apart GSO packets into their component pieces. Otherwise - * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ - if (skb_is_gso(skb)) { - segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); - - if (IS_ERR(segs)) { - err = PTR_ERR(segs); - goto err; - } - skb = segs; + dp_ifindex = get_dpifindex(dp); + if (!dp_ifindex) { + err = -ENODEV; + goto err; } - err = queue_userspace_packets(dp, skb, upcall_info); - if (segs) { - struct sk_buff *next; - /* Free GSO-segments */ - do { - next = segs->next; - kfree_skb(segs); - } while ((segs = next) != NULL); - } + forward_ip_summed(skb, true); + if (!skb_is_gso(skb)) + err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + else + err = queue_gso_packets(dp_ifindex, skb, upcall_info); if (err) goto err; @@ -408,68 +391,97 @@ err: return err; } -/* Send each packet in the 'skb' list to userspace for 'dp' as directed by - * 'upcall_info'. There will be only one packet unless we broke up a GSO - * packet. - */ -static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb, - const struct dp_upcall_info *upcall_info) +static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, + const struct dp_upcall_info *upcall_info) { - int dp_ifindex; + struct dp_upcall_info later_info; + struct sw_flow_key later_key; + struct sk_buff *segs, *nskb; + int err; - dp_ifindex = get_dpifindex(dp); - if (!dp_ifindex) - return -ENODEV; + segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + if (IS_ERR(skb)) + return PTR_ERR(skb); + /* Queue all of the segments. */ + skb = segs; do { - struct ovs_header *upcall; - struct sk_buff *user_skb; /* to be queued to userspace */ - struct nlattr *nla; - unsigned int len; - int err; - - err = vlan_deaccel_tag(skb); - if (unlikely(err)) - return err; - - if (nla_attr_size(skb->len) > USHRT_MAX) - return -EFBIG; - - len = sizeof(struct ovs_header); - len += nla_total_size(skb->len); - len += nla_total_size(FLOW_BUFSIZE); - if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) - len += nla_total_size(8); - - user_skb = genlmsg_new(len, GFP_ATOMIC); - if (!user_skb) - return -ENOMEM; - - upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, - 0, upcall_info->cmd); - upcall->dp_ifindex = dp_ifindex; - - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - flow_to_nlattrs(upcall_info->key, user_skb); - nla_nest_end(user_skb, nla); - - if (upcall_info->userdata) - nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, - nla_get_u64(upcall_info->userdata)); - - nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); - if (skb->ip_summed == CHECKSUM_PARTIAL) - copy_and_csum_skb(skb, nla_data(nla)); - else - skb_copy_bits(skb, 0, nla_data(nla), skb->len); - - err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid); + err = queue_userspace_packet(dp_ifindex, skb, upcall_info); if (err) - return err; + break; + if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) { + /* The initial flow key extracted by flow_extract() in + * this case is for a first fragment, so we need to + * properly mark later fragments. + */ + later_key = *upcall_info->key; + later_key.ip.tos_frag &= ~OVS_FRAG_TYPE_MASK; + later_key.ip.tos_frag |= OVS_FRAG_TYPE_LATER; + + later_info = *upcall_info; + later_info.key = &later_key; + upcall_info = &later_info; + } } while ((skb = skb->next)); - return 0; + /* Free all of the segments. */ + skb = segs; + do { + nskb = skb->next; + if (err) + kfree_skb(skb); + else + consume_skb(skb); + } while ((skb = nskb)); + return err; +} + +static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, + const struct dp_upcall_info *upcall_info) +{ + struct ovs_header *upcall; + struct sk_buff *user_skb; /* to be queued to userspace */ + struct nlattr *nla; + unsigned int len; + int err; + + err = vlan_deaccel_tag(skb); + if (unlikely(err)) + return err; + + if (nla_attr_size(skb->len) > USHRT_MAX) + return -EFBIG; + + len = sizeof(struct ovs_header); + len += nla_total_size(skb->len); + len += nla_total_size(FLOW_BUFSIZE); + if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) + len += nla_total_size(8); + + user_skb = genlmsg_new(len, GFP_ATOMIC); + if (!user_skb) + return -ENOMEM; + + upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, + 0, upcall_info->cmd); + upcall->dp_ifindex = dp_ifindex; + + nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); + flow_to_nlattrs(upcall_info->key, user_skb); + nla_nest_end(user_skb, nla); + + if (upcall_info->userdata) + nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, + nla_get_u64(upcall_info->userdata)); + + nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); + if (skb->ip_summed == CHECKSUM_PARTIAL) + copy_and_csum_skb(skb, nla_data(nla)); + else + skb_copy_bits(skb, 0, nla_data(nla), skb->len); + + return genlmsg_unicast(&init_net, user_skb, upcall_info->pid); } /* Called with genl_mutex. */ @@ -567,6 +579,11 @@ static int validate_action_key(const struct nlattr *a, if (ipv4_key->ipv4_tos & INET_ECN_MASK) return -EINVAL; + + if (ipv4_key->ipv4_frag != + (flow_key->ip.tos_frag & OVS_FRAG_TYPE_MASK)) + return -EINVAL; + break; case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_TCP): @@ -708,7 +725,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct ethhdr *eth; - bool is_frag; int len; int err; int key_len; @@ -745,7 +761,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(flow)) goto err_kfree_skb; - err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag); + err = flow_extract(packet, -1, &flow->key, &key_len); if (err) goto err_flow_put; @@ -818,7 +834,7 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) stats->n_flows = flow_tbl_count(table); - stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0; + stats->n_hit = stats->n_missed = stats->n_lost = 0; for_each_possible_cpu(i) { const struct dp_stats_percpu *percpu_stats; struct dp_stats_percpu local_stats; @@ -831,7 +847,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) local_stats = *percpu_stats; } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount)); - stats->n_frags += local_stats.n_frags; stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; @@ -1231,7 +1246,6 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, #endif [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { @@ -1271,9 +1285,6 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, goto nla_put_failure; get_dp_stats(dp, nla_data(nla)); - NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS, - dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO); - return genlmsg_end(skb, ovs_header); nla_put_failure: @@ -1302,13 +1313,6 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1]) { - if (a[OVS_DP_ATTR_IPV4_FRAGS]) { - u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]); - - if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP) - return -EINVAL; - } - return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1); } @@ -1330,13 +1334,6 @@ static struct datapath *lookup_datapath(struct ovs_header *ovs_header, struct nl return dp ? dp : ERR_PTR(-ENODEV); } -/* Called with genl_mutex. */ -static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) -{ - if (a[OVS_DP_ATTR_IPV4_FRAGS]) - dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP; -} - static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1376,15 +1373,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!dp->table) goto err_free_dp; - dp->drop_frags = 0; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); if (!dp->stats_percpu) { err = -ENOMEM; goto err_destroy_table; } - change_datapath(dp, a); - /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; @@ -1497,8 +1491,6 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - change_datapath(dp, info->attrs); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); diff --git a/datapath/datapath.h b/datapath/datapath.h index b93665ce..4964a51c 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -33,7 +33,6 @@ struct vport; /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. - * @n_frags: Number of IP fragments processed by datapath. * @n_hit: Number of received packets for which a matching flow was found in * the flow table. * @n_miss: Number of received packets that had no matching flow in the flow @@ -44,7 +43,6 @@ struct vport; * one of the datapath's queues). */ struct dp_stats_percpu { - u64 n_frags; u64 n_hit; u64 n_missed; u64 n_lost; @@ -56,7 +54,6 @@ struct dp_stats_percpu { * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. * @ifobj: Represents /sys/class/net//brif. Protected by RTNL. - * @drop_frags: Drop all IP fragments if nonzero. * @n_flows: Number of flows currently in flow table. * @table: Current flow table. Protected by genl_lock and RCU. * @ports: Map from port number to &struct vport. %OVSP_LOCAL port @@ -73,8 +70,6 @@ struct datapath { struct list_head list_node; struct kobject ifobj; - int drop_frags; - /* Flow table. */ struct flow_table __rcu *table; diff --git a/datapath/flow.c b/datapath/flow.c index 7b9cb611..b6023a08 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -119,6 +119,67 @@ u64 flow_used_time(unsigned long flow_jiffies) offsetof(struct sw_flow_key, field) + \ FIELD_SIZEOF(struct sw_flow_key, field) +/** + * skip_exthdr - skip any IPv6 extension headers + * @skb: skbuff to parse + * @start: offset of first extension header + * @nexthdrp: Initially, points to the type of the extension header at @start. + * This function updates it to point to the extension header at the final + * offset. + * @tos_frag: Points to the @tos_frag member in a &struct sw_flow_key. This + * function sets an appropriate %OVS_FRAG_TYPE_* value. + * + * This is based on ipv6_skip_exthdr() but adds the updates to *@tos_frag. + * + * When there is more than one fragment header, this version reports whether + * the final fragment header that it examines is a first fragment. + * + * Returns the final payload offset, or -1 on error. + */ +static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, + u8 *tos_frag) +{ + u8 nexthdr = *nexthdrp; + + while (ipv6_ext_hdr(nexthdr)) { + struct ipv6_opt_hdr _hdr, *hp; + int hdrlen; + + if (nexthdr == NEXTHDR_NONE) + return -1; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -1; + if (nexthdr == NEXTHDR_FRAGMENT) { + __be16 _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -1; + + *tos_frag &= ~OVS_FRAG_TYPE_MASK; + if (ntohs(*fp) & ~0x7) { + *tos_frag |= OVS_FRAG_TYPE_LATER; + break; + } + *tos_frag |= OVS_FRAG_TYPE_FIRST; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + start += hdrlen; + } + + *nexthdrp = nexthdr; + return start; +} + static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int *key_lenp) { @@ -140,11 +201,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, payload_ofs = (u8 *)(nh + 1) - skb->data; key->ip.proto = NEXTHDR_NONE; - key->ip.tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK; + key->ip.tos_frag = ipv6_get_dsfield(nh) & ~INET_ECN_MASK; ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr); ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr); - payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr); + payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.tos_frag); if (unlikely(payload_ofs < 0)) return -EINVAL; @@ -552,8 +613,6 @@ out: * @in_port: port number on which @skb was received. * @key: output flow key * @key_lenp: length of output flow key - * @is_frag: set to 1 if @skb contains an IPv4 fragment, or to 0 if @skb does - * not contain an IPv4 packet or if it is not a fragment. * * The caller must ensure that skb->len >= ETH_HLEN. * @@ -572,7 +631,7 @@ out: * For other key->dl_type values it is left untouched. */ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, - int *key_lenp, bool *is_frag) + int *key_lenp) { int error = 0; int key_len = SW_FLOW_KEY_OFFSET(eth); @@ -581,7 +640,6 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->eth.tun_id = OVS_CB(skb)->tun_id; key->eth.in_port = in_port; - *is_frag = false; skb_reset_mac_header(skb); @@ -610,6 +668,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { struct iphdr *nh; + __be16 offset; key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); @@ -625,31 +684,37 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, nh = ip_hdr(skb); key->ipv4.addr.src = nh->saddr; key->ipv4.addr.dst = nh->daddr; - key->ip.tos = nh->tos & ~INET_ECN_MASK; + key->ip.proto = nh->protocol; + key->ip.tos_frag = nh->tos & ~INET_ECN_MASK; - /* Transport layer. */ - if ((nh->frag_off & htons(IP_MF | IP_OFFSET)) || - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) - *is_frag = true; + offset = nh->frag_off & htons(IP_OFFSET); + if (offset) { + key->ip.tos_frag |= OVS_FRAG_TYPE_LATER; + goto out; + } + if (nh->frag_off & htons(IP_MF) || + skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST; + /* Transport layer. */ if (key->ip.proto == IPPROTO_TCP) { key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); - if (!*is_frag && tcphdr_ok(skb)) { + if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); key->ipv4.tp.src = tcp->source; key->ipv4.tp.dst = tcp->dest; } } else if (key->ip.proto == IPPROTO_UDP) { key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); - if (!*is_frag && udphdr_ok(skb)) { + if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->ipv4.tp.src = udp->source; key->ipv4.tp.dst = udp->dest; } } else if (key->ip.proto == IPPROTO_ICMP) { key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); - if (!*is_frag && icmphdr_ok(skb)) { + if (icmphdr_ok(skb)) { struct icmphdr *icmp = icmp_hdr(skb); /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store them @@ -694,6 +759,11 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, goto out; } + if ((key->ip.tos_frag & OVS_FRAG_TYPE_MASK) == OVS_FRAG_TYPE_LATER) + goto out; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST; + /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); @@ -768,6 +838,15 @@ void flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) } } +static int parse_tos_frag(struct sw_flow_key *swkey, u8 tos, u8 frag) +{ + if (tos & INET_ECN_MASK || frag > OVS_FRAG_TYPE_MAX) + return -EINVAL; + + swkey->ip.tos_frag = tos | frag; + return 0; +} + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_TUN_ID] = 8, @@ -797,11 +876,15 @@ const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { * * [tun_id] [in_port] ethernet [8021q] [ethertype \ * [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]] + * + * except that IPv4 or IPv6 terminates the sequence if its @ipv4_frag or + * @ipv6_frag member, respectively, equals %OVS_FRAG_TYPE_LATER. */ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *attr) { int error = 0; + enum ovs_frag_type frag_type; const struct nlattr *nla; u16 prev_type; int rem; @@ -874,11 +957,11 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, goto invalid; ipv4_key = nla_data(nla); swkey->ip.proto = ipv4_key->ipv4_proto; - swkey->ip.tos = ipv4_key->ipv4_tos; + if (parse_tos_frag(swkey, ipv4_key->ipv4_tos, + ipv4_key->ipv4_frag)) + goto invalid; swkey->ipv4.addr.src = ipv4_key->ipv4_src; swkey->ipv4.addr.dst = ipv4_key->ipv4_dst; - if (swkey->ip.tos & INET_ECN_MASK) - goto invalid; break; case TRANSITION(OVS_KEY_ATTR_ETHERTYPE, OVS_KEY_ATTR_IPV6): @@ -887,13 +970,13 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, goto invalid; ipv6_key = nla_data(nla); swkey->ip.proto = ipv6_key->ipv6_proto; - swkey->ip.tos = ipv6_key->ipv6_tos; + if (parse_tos_frag(swkey, ipv6_key->ipv6_tos, + ipv6_key->ipv6_frag)) + goto invalid; memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src, sizeof(swkey->ipv6.addr.src)); memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst, sizeof(swkey->ipv6.addr.dst)); - if (swkey->ip.tos & INET_ECN_MASK) - goto invalid; break; case TRANSITION(OVS_KEY_ATTR_IPV4, OVS_KEY_ATTR_TCP): @@ -985,6 +1068,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (rem) goto invalid; + frag_type = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK; switch (prev_type) { case OVS_KEY_ATTR_UNSPEC: goto invalid; @@ -1004,6 +1088,8 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, goto ok; case OVS_KEY_ATTR_IPV4: + if (frag_type == OVS_FRAG_TYPE_LATER) + goto ok; if (swkey->ip.proto == IPPROTO_TCP || swkey->ip.proto == IPPROTO_UDP || swkey->ip.proto == IPPROTO_ICMP) @@ -1011,6 +1097,8 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, goto ok; case OVS_KEY_ATTR_IPV6: + if (frag_type == OVS_FRAG_TYPE_LATER) + goto ok; if (swkey->ip.proto == IPPROTO_TCP || swkey->ip.proto == IPPROTO_UDP || swkey->ip.proto == IPPROTO_ICMPV6) @@ -1019,15 +1107,20 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, case OVS_KEY_ATTR_ICMPV6: if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || - swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) + swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT) || + frag_type == OVS_FRAG_TYPE_LATER) goto invalid; goto ok; case OVS_KEY_ATTR_TCP: case OVS_KEY_ATTR_UDP: case OVS_KEY_ATTR_ICMP: - case OVS_KEY_ATTR_ARP: case OVS_KEY_ATTR_ND: + if (frag_type == OVS_FRAG_TYPE_LATER) + goto invalid; + goto ok; + + case OVS_KEY_ATTR_ARP: goto ok; default: @@ -1142,7 +1235,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) ipv4_key->ipv4_src = swkey->ipv4.addr.src; ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; ipv4_key->ipv4_proto = swkey->ip.proto; - ipv4_key->ipv4_tos = swkey->ip.tos; + ipv4_key->ipv4_tos = swkey->ip.tos_frag & ~INET_ECN_MASK; + ipv4_key->ipv4_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { struct ovs_key_ipv6 *ipv6_key; @@ -1156,7 +1250,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, sizeof(ipv6_key->ipv6_dst)); ipv6_key->ipv6_proto = swkey->ip.proto; - ipv6_key->ipv6_tos = swkey->ip.tos; + ipv6_key->ipv6_tos = swkey->ip.tos_frag & ~INET_ECN_MASK; + ipv6_key->ipv6_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK; } else if (swkey->eth.type == htons(ETH_P_ARP)) { struct ovs_key_arp *arp_key; @@ -1172,8 +1267,9 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); } - if (swkey->eth.type == htons(ETH_P_IP) || - swkey->eth.type == htons(ETH_P_IPV6)) { + if ((swkey->eth.type == htons(ETH_P_IP) || + swkey->eth.type == htons(ETH_P_IPV6)) && + (swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK) != OVS_FRAG_TYPE_LATER) { if (swkey->ip.proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key; diff --git a/datapath/flow.h b/datapath/flow.h index 96b3b4fe..484ea626 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -20,6 +20,7 @@ #include #include #include +#include struct sk_buff; @@ -29,6 +30,10 @@ struct sw_flow_actions { struct nlattr actions[]; }; +/* Mask for the OVS_FRAG_TYPE_* value in the low 2 bits of ip.tos_frag in + * struct sw_flow_key. */ +#define OVS_FRAG_TYPE_MASK INET_ECN_MASK + struct sw_flow_key { struct { __be64 tun_id; /* Encapsulating tunnel ID. */ @@ -40,7 +45,8 @@ struct sw_flow_key { } eth; struct { u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ - u8 tos; /* IP ToS (DSCP field, 6 bits). */ + u8 tos_frag; /* IP ToS DSCP in high 6 bits, + * OVS_FRAG_TYPE_* in low 2 bits. */ } ip; union { struct { @@ -123,7 +129,7 @@ void flow_hold(struct sw_flow *); void flow_put(struct sw_flow *); int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, - int *key_lenp, bool *is_frag); + int *key_lenp); void flow_used(struct sw_flow *, struct sk_buff *); u64 flow_used_time(unsigned long flow_jiffies); diff --git a/datapath/tunnel.c b/datapath/tunnel.c index 694e3471..f9138493 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -893,7 +893,6 @@ static struct tnl_cache *build_cache(struct vport *vport, struct sw_flow_key flow_key; struct vport *dst_vport; struct sk_buff *skb; - bool is_frag; int err; int flow_key_len; struct sw_flow *flow; @@ -910,10 +909,10 @@ static struct tnl_cache *build_cache(struct vport *vport, memcpy(skb->data, get_cached_header(cache), cache->len); err = flow_extract(skb, dst_vport->port_no, &flow_key, - &flow_key_len, &is_frag); + &flow_key_len); consume_skb(skb); - if (err || is_frag) + if (err) goto done; flow = flow_tbl_lookup(rcu_dereference(dst_vport->dp->table), diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index acc27693..190bf79e 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -80,9 +80,6 @@ struct ovs_header { * not be sent. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. - * @OVS_DP_ATTR_IPV4_FRAGS: One of %OVS_DP_FRAG_*. Always present in - * notifications. May be included in %OVS_DP_NEW or %OVS_DP_SET requests to - * change the fragment handling policy. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. @@ -92,27 +89,12 @@ enum ovs_datapath_attr { OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ - OVS_DP_ATTR_IPV4_FRAGS, /* 32-bit enum ovs_datapath_frag */ __OVS_DP_ATTR_MAX }; #define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) -/** - * enum ovs_datapath_frag - policy for handling received IPv4 fragments. - * @OVS_DP_FRAG_ZERO: Treat IP fragments as IP protocol 0 and transport ports - * zero. - * @OVS_DP_FRAG_DROP: Drop IP fragments. Do not pass them through the flow - * table or up to userspace. - */ -enum ovs_datapath_frag { - OVS_DP_FRAG_UNSPEC, - OVS_DP_FRAG_ZERO, /* Treat IP fragments as transport port 0. */ - OVS_DP_FRAG_DROP /* Drop IP fragments. */ -}; - struct ovs_dp_stats { - __u64 n_frags; /* Number of dropped IP fragments. */ __u64 n_hit; /* Number of flow table matches. */ __u64 n_missed; /* Number of flow table misses. */ __u64 n_lost; /* Number of misses not sent to userspace. */ @@ -290,6 +272,24 @@ enum ovs_key_attr { #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) +/** + * enum ovs_frag_type - IPv4 and IPv6 fragment type + * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. + * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. + * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. + * + * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct + * ovs_key_ipv6. + */ +enum ovs_frag_type { + OVS_FRAG_TYPE_NONE, + OVS_FRAG_TYPE_FIRST, + OVS_FRAG_TYPE_LATER, + __OVS_FRAG_TYPE_MAX +}; + +#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) + struct ovs_key_ethernet { __u8 eth_src[6]; __u8 eth_dst[6]; @@ -305,6 +305,7 @@ struct ovs_key_ipv4 { __be32 ipv4_dst; __u8 ipv4_proto; __u8 ipv4_tos; + __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ }; struct ovs_key_ipv6 { @@ -312,6 +313,7 @@ struct ovs_key_ipv6 { __be32 ipv6_dst[4]; __u8 ipv6_proto; __u8 ipv6_tos; + __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ }; struct ovs_key_tcp { diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index a6d2db45..aeb1a31c 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -1531,6 +1531,55 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24); * Masking: Not maskable. */ #define NXM_NX_ND_TLL NXM_HEADER (0x0001, 25, 6) +/* IP fragment information. + * + * Prereqs: + * NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. + * + * Format: 8-bit value with one of the values 0, 1, or 3, as described below. + * + * Masking: Fully maskable. + * + * This field has three possible values: + * + * - A packet that is not an IP fragment has value 0. + * + * - A packet that is an IP fragment with offset 0 (the first fragment) has + * bit 0 set and thus value 1. + * + * - A packet that is an IP fragment with nonzero offset has bits 0 and 1 set + * and thus value 3. + * + * NX_IP_FRAG_ANY and NX_IP_FRAG_LATER are declared to symbolically represent + * the meanings of bits 0 and 1. + * + * The switch may reject matches against values that can never appear. + * + * It is important to understand how this field interacts with the OpenFlow IP + * fragment handling mode: + * + * - In OFPC_FRAG_DROP mode, the OpenFlow switch drops all IP fragments + * before they reach the flow table, so every packet that is available for + * matching will have value 0 in this field. + * + * - Open vSwitch does not implement OFPC_FRAG_REASM mode, but if it did then + * IP fragments would be reassembled before they reached the flow table and + * again every packet available for matching would always have value 0. + * + * - In OFPC_FRAG_NORMAL mode, all three values are possible, but OpenFlow + * 1.0 says that fragments' transport ports are always 0, even for the + * first fragment, so this does not provide much extra information. + * + * - In OFPC_FRAG_NX_MATCH mode, all three values are possible. For + * fragments with offset 0, Open vSwitch makes L4 header information + * available. + */ +#define NXM_NX_IP_FRAG NXM_HEADER (0x0001, 26, 1) +#define NXM_NX_IP_FRAG_W NXM_HEADER_W(0x0001, 26, 1) + +/* Bits in the value of NXM_NX_IP_FRAG. */ +#define NX_IP_FRAG_ANY (1 << 0) /* Is this a fragment? */ +#define NX_IP_FRAG_LATER (1 << 1) /* Is this a fragment with nonzero offset? */ /* ## --------------------- ## */ /* ## Requests and replies. ## */ diff --git a/include/openflow/openflow.h b/include/openflow/openflow.h index fd8fbeb1..cee62e8f 100644 --- a/include/openflow/openflow.h +++ b/include/openflow/openflow.h @@ -134,6 +134,7 @@ enum ofp_config_flags { OFPC_FRAG_NORMAL = 0, /* No special handling for fragments. */ OFPC_FRAG_DROP = 1, /* Drop fragments. */ OFPC_FRAG_REASM = 2, /* Reassemble (only if OFPC_IP_REASM set). */ + OFPC_FRAG_NX_MATCH = 3, /* Make first fragments available for matching. */ OFPC_FRAG_MASK = 3 }; diff --git a/lib/classifier.c b/lib/classifier.c index 9f4c42b0..869029f5 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -319,8 +319,26 @@ cls_rule_set_nw_dst_masked(struct cls_rule *rule, ovs_be32 ip, ovs_be32 mask) void cls_rule_set_nw_tos(struct cls_rule *rule, uint8_t nw_tos) { - rule->wc.wildcards &= ~FWW_NW_TOS; - rule->flow.nw_tos = nw_tos & IP_DSCP_MASK; + rule->wc.tos_frag_mask |= IP_DSCP_MASK; + rule->flow.tos_frag &= ~IP_DSCP_MASK; + rule->flow.tos_frag |= nw_tos & IP_DSCP_MASK; +} + +void +cls_rule_set_frag(struct cls_rule *rule, uint8_t frag) +{ + rule->wc.tos_frag_mask |= FLOW_FRAG_MASK; + rule->flow.tos_frag &= ~FLOW_FRAG_MASK; + rule->flow.tos_frag |= frag & FLOW_FRAG_MASK; +} + +void +cls_rule_set_frag_masked(struct cls_rule *rule, uint8_t frag, uint8_t mask) +{ + mask &= FLOW_FRAG_MASK; + frag &= mask; + rule->wc.tos_frag_mask = (rule->wc.tos_frag_mask & ~FLOW_FRAG_MASK) | mask; + rule->flow.tos_frag = (rule->flow.tos_frag & ~FLOW_FRAG_MASK) | frag; } void @@ -450,7 +468,7 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); if (rule->priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%d,", rule->priority); @@ -592,8 +610,26 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s) ETH_ADDR_ARGS(f->arp_tha)); } } - if (!(w & FWW_NW_TOS)) { - ds_put_format(s, "nw_tos=%"PRIu8",", f->nw_tos); + if (wc->tos_frag_mask & IP_DSCP_MASK) { + ds_put_format(s, "nw_tos=%"PRIu8",", f->tos_frag & IP_DSCP_MASK); + } + switch (wc->tos_frag_mask & FLOW_FRAG_MASK) { + case FLOW_FRAG_ANY | FLOW_FRAG_LATER: + ds_put_format(s, "frag=%s,", + f->tos_frag & FLOW_FRAG_ANY + ? (f->tos_frag & FLOW_FRAG_LATER ? "later" : "first") + : (f->tos_frag & FLOW_FRAG_LATER ? "" : "no")); + break; + + case FLOW_FRAG_ANY: + ds_put_format(s, "frag=%s,", + f->tos_frag & FLOW_FRAG_ANY ? "yes" : "no"); + break; + + case FLOW_FRAG_LATER: + ds_put_format(s, "frag=%s,", + f->tos_frag & FLOW_FRAG_LATER ? "later" : "not_later"); + break; } if (f->nw_proto == IPPROTO_ICMP) { if (!(w & FWW_TP_SRC)) { @@ -1123,7 +1159,7 @@ flow_equal_except(const struct flow *a, const struct flow *b, const flow_wildcards_t wc = wildcards->wildcards; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); for (i = 0; i < FLOW_N_REGS; i++) { if ((a->regs[i] ^ b->regs[i]) & wildcards->reg_masks[i]) { @@ -1150,7 +1186,7 @@ flow_equal_except(const struct flow *a, const struct flow *b, && (wc & FWW_ETH_MCAST || !((a->dl_dst[0] ^ b->dl_dst[0]) & 0x01)) && (wc & FWW_NW_PROTO || a->nw_proto == b->nw_proto) - && (wc & FWW_NW_TOS || a->nw_tos == b->nw_tos) + && !((a->tos_frag ^ b->tos_frag) & wildcards->tos_frag_mask) && (wc & FWW_ARP_SHA || eth_addr_equals(a->arp_sha, b->arp_sha)) && (wc & FWW_ARP_THA || eth_addr_equals(a->arp_tha, b->arp_tha)) && ipv6_equal_except(&a->ipv6_src, &b->ipv6_src, diff --git a/lib/classifier.h b/lib/classifier.h index db090225..d5c19f09 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -117,6 +117,8 @@ bool cls_rule_set_nw_src_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask); void cls_rule_set_nw_dst(struct cls_rule *, ovs_be32); bool cls_rule_set_nw_dst_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask); void cls_rule_set_nw_tos(struct cls_rule *, uint8_t); +void cls_rule_set_frag(struct cls_rule *, uint8_t frag); +void cls_rule_set_frag_masked(struct cls_rule *, uint8_t frag, uint8_t mask); void cls_rule_set_icmp_type(struct cls_rule *, uint8_t); void cls_rule_set_icmp_code(struct cls_rule *, uint8_t); void cls_rule_set_arp_sha(struct cls_rule *, const uint8_t[6]); diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index fa6a05e0..2466f915 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -79,7 +79,6 @@ struct dpif_linux_dp { const char *name; /* OVS_DP_ATTR_NAME. */ const uint32_t *upcall_pid; /* OVS_DP_UPCALL_PID. */ struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */ - enum ovs_datapath_frag ipv4_frags; /* OVS_DP_ATTR_IPV4_FRAGS. */ }; static void dpif_linux_dp_init(struct dpif_linux_dp *); @@ -347,7 +346,6 @@ dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) error = dpif_linux_dp_get(dpif_, &dp, &buf); if (!error) { - stats->n_frags = dp.stats.n_frags; stats->n_hit = dp.stats.n_hit; stats->n_missed = dp.stats.n_missed; stats->n_lost = dp.stats.n_lost; @@ -357,34 +355,6 @@ dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) return error; } -static int -dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp) -{ - struct dpif_linux_dp dp; - struct ofpbuf *buf; - int error; - - error = dpif_linux_dp_get(dpif_, &dp, &buf); - if (!error) { - *drop_fragsp = dp.ipv4_frags == OVS_DP_FRAG_DROP; - ofpbuf_delete(buf); - } - return error; -} - -static int -dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags) -{ - struct dpif_linux *dpif = dpif_linux_cast(dpif_); - struct dpif_linux_dp dp; - - dpif_linux_dp_init(&dp); - dp.cmd = OVS_DP_CMD_SET; - dp.dp_ifindex = dpif->dp_ifindex; - dp.ipv4_frags = drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO; - return dpif_linux_dp_transact(&dp, NULL, NULL); -} - static int dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev, uint16_t *port_nop) @@ -1206,8 +1176,6 @@ const struct dpif_class dpif_linux_class = { dpif_linux_run, dpif_linux_wait, dpif_linux_get_stats, - dpif_linux_get_drop_frags, - dpif_linux_set_drop_frags, dpif_linux_port_add, dpif_linux_port_del, dpif_linux_port_query_by_number, @@ -1540,7 +1508,6 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) .min_len = sizeof(struct ovs_dp_stats), .max_len = sizeof(struct ovs_dp_stats), .optional = true }, - [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true }, }; struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)]; @@ -1571,9 +1538,6 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) memcpy(&dp->stats, nl_attr_get(a[OVS_DP_ATTR_STATS]), sizeof dp->stats); } - if (a[OVS_DP_ATTR_IPV4_FRAGS]) { - dp->ipv4_frags = nl_attr_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]); - } return 0; } @@ -1599,10 +1563,6 @@ dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) } /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */ - - if (dp->ipv4_frags) { - nl_msg_put_u32(buf, OVS_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags); - } } /* Clears 'dp' to "empty" values. */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index babbce04..1d05dd8d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -81,12 +81,10 @@ struct dp_netdev { int open_cnt; bool destroyed; - bool drop_frags; /* Drop all IP fragments, if true. */ struct dp_netdev_queue queues[N_QUEUES]; struct hmap flow_table; /* Flow table. */ /* Statistics. */ - long long int n_frags; /* Number of dropped IP fragments. */ long long int n_hit; /* Number of flow table matches. */ long long int n_missed; /* Number of flow table misses. */ long long int n_lost; /* Number of misses not passed to client. */ @@ -198,7 +196,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class, dp->class = class; dp->name = xstrdup(name); dp->open_cnt = 0; - dp->drop_frags = false; for (i = 0; i < N_QUEUES; i++) { dp->queues[i].head = dp->queues[i].tail = 0; } @@ -302,29 +299,12 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) { struct dp_netdev *dp = get_dp_netdev(dpif); stats->n_flows = hmap_count(&dp->flow_table); - stats->n_frags = dp->n_frags; stats->n_hit = dp->n_hit; stats->n_missed = dp->n_missed; stats->n_lost = dp->n_lost; return 0; } -static int -dpif_netdev_get_drop_frags(const struct dpif *dpif, bool *drop_fragsp) -{ - struct dp_netdev *dp = get_dp_netdev(dpif); - *drop_fragsp = dp->drop_frags; - return 0; -} - -static int -dpif_netdev_set_drop_frags(struct dpif *dpif, bool drop_frags) -{ - struct dp_netdev *dp = get_dp_netdev(dpif); - dp->drop_frags = drop_frags; - return 0; -} - static int do_add_port(struct dp_netdev *dp, const char *devname, const char *type, uint16_t port_no) @@ -1001,11 +981,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, if (packet->size < ETH_HEADER_LEN) { return; } - if (flow_extract(packet, 0, port->port_no, &key) && dp->drop_frags) { - dp->n_frags++; - return; - } - + flow_extract(packet, 0, port->port_no, &key); flow = dp_netdev_lookup_flow(dp, &key); if (flow) { dp_netdev_flow_used(flow, &key, packet); @@ -1364,8 +1340,6 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_run, dpif_netdev_wait, dpif_netdev_get_stats, - dpif_netdev_get_drop_frags, - dpif_netdev_set_drop_frags, dpif_netdev_port_add, dpif_netdev_port_del, dpif_netdev_port_query_by_number, diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index ec662e7d..83d56d63 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -110,16 +110,6 @@ struct dpif_class { /* Retrieves statistics for 'dpif' into 'stats'. */ int (*get_stats)(const struct dpif *dpif, struct dpif_dp_stats *stats); - /* Retrieves 'dpif''s current treatment of IP fragments into '*drop_frags': - * true indicates that fragments are dropped, false indicates that - * fragments are treated in the same way as other IP packets (except that - * the L4 header cannot be read). */ - int (*get_drop_frags)(const struct dpif *dpif, bool *drop_frags); - - /* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose - * meaning is the same as for the get_drop_frags member function. */ - int (*set_drop_frags)(struct dpif *dpif, bool drop_frags); - /* Adds 'netdev' as a new port in 'dpif'. If successful, sets '*port_no' * to the new port's port number. */ int (*port_add)(struct dpif *dpif, struct netdev *netdev, diff --git a/lib/dpif.c b/lib/dpif.c index a95985a1..68a95f67 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -388,33 +388,6 @@ dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) return error; } -/* Retrieves the current IP fragment handling policy for 'dpif' into - * '*drop_frags': true indicates that fragments are dropped, false indicates - * that fragments are treated in the same way as other IP packets (except that - * the L4 header cannot be read). Returns 0 if successful, otherwise a - * positive errno value. */ -int -dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags) -{ - int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags); - if (error) { - *drop_frags = false; - } - log_operation(dpif, "get_drop_frags", error); - return error; -} - -/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose meaning is - * the same as for the get_drop_frags member function. Returns 0 if - * successful, otherwise a positive errno value. */ -int -dpif_set_drop_frags(struct dpif *dpif, bool drop_frags) -{ - int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags); - log_operation(dpif, "set_drop_frags", error); - return error; -} - /* Attempts to add 'netdev' as a port on 'dpif'. If successful, returns 0 and * sets '*port_nop' to the new port's port number (if 'port_nop' is non-null). * On failure, returns a positive errno value and sets '*port_nop' to diff --git a/lib/dpif.h b/lib/dpif.h index 404c05a1..223f990e 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -61,7 +61,6 @@ int dpif_delete(struct dpif *); /* Statisticss for a dpif as a whole. */ struct dpif_dp_stats { - uint64_t n_frags; /* Number of dropped IP fragments. */ uint64_t n_hit; /* Number of flow table matches. */ uint64_t n_missed; /* Number of flow table misses. */ uint64_t n_lost; /* Number of misses not sent to userspace. */ @@ -69,8 +68,6 @@ struct dpif_dp_stats { }; int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *); -int dpif_get_drop_frags(const struct dpif *, bool *drop_frags); -int dpif_set_drop_frags(struct dpif *, bool drop_frags); /* Port operations. */ diff --git a/lib/flow.c b/lib/flow.c index ded98b26..06cc822a 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -148,7 +148,7 @@ parse_ipv6(struct ofpbuf *packet, struct flow *flow) flow->ipv6_dst = nh->ip6_dst; tc_flow = get_unaligned_be32(&nh->ip6_flow); - flow->nw_tos = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK; + flow->tos_frag = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK; flow->nw_proto = IPPROTO_NONE; while (1) { @@ -201,7 +201,10 @@ parse_ipv6(struct ofpbuf *packet, struct flow *flow) } /* We only process the first fragment. */ + flow->tos_frag &= ~FLOW_FRAG_MASK; + flow->tos_frag |= FLOW_FRAG_ANY; if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) { + flow->tos_frag |= FLOW_FRAG_LATER; nexthdr = IPPROTO_FRAGMENT; break; } @@ -320,13 +323,12 @@ invalid: * - packet->l7 to just past the TCP or UDP or ICMP header, if one is * present and has a correct length, and otherwise NULL. */ -int +void flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, struct flow *flow) { struct ofpbuf b = *packet; struct eth_header *eth; - int retval = 0; COVERAGE_INC(flow_extract); @@ -340,7 +342,7 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, packet->l7 = NULL; if (b.size < sizeof *eth) { - return 0; + return; } /* Link layer. */ @@ -360,12 +362,21 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, if (flow->dl_type == htons(ETH_TYPE_IP)) { const struct ip_header *nh = pull_ip(&b); if (nh) { + packet->l4 = b.data; + flow->nw_src = get_unaligned_be32(&nh->ip_src); flow->nw_dst = get_unaligned_be32(&nh->ip_dst); - flow->nw_tos = nh->ip_tos & IP_DSCP_MASK; flow->nw_proto = nh->ip_proto; - packet->l4 = b.data; - if (!IP_IS_FRAGMENT(nh->ip_frag_off)) { + + flow->tos_frag = nh->ip_tos & IP_DSCP_MASK; + if (IP_IS_FRAGMENT(nh->ip_frag_off)) { + flow->tos_frag |= FLOW_FRAG_ANY; + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { + flow->tos_frag |= FLOW_FRAG_LATER; + } + } + + if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) { if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { @@ -378,15 +389,11 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, packet->l7 = b.data; } } - } else { - retval = 1; } } } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { - - retval = parse_ipv6(&b, flow); - if (retval) { - return 0; + if (parse_ipv6(&b, flow)) { + return; } packet->l4 = b.data; @@ -419,8 +426,6 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port, } } } - - return retval; } /* For every bit of a field that is wildcarded in 'wildcards', sets the @@ -431,7 +436,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards) const flow_wildcards_t wc = wildcards->wildcards; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); for (i = 0; i < FLOW_N_REGS; i++) { flow->regs[i] &= wildcards->reg_masks[i]; @@ -465,9 +470,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards) if (wc & FWW_NW_PROTO) { flow->nw_proto = 0; } - if (wc & FWW_NW_TOS) { - flow->nw_tos = 0; - } + flow->tos_frag &= wildcards->tos_frag_mask; if (wc & FWW_ARP_SHA) { memset(flow->arp_sha, 0, sizeof flow->arp_sha); } @@ -494,6 +497,8 @@ flow_to_string(const struct flow *flow) void flow_format(struct ds *ds, const struct flow *flow) { + int frag; + ds_put_format(ds, "tunnel%#"PRIx64":in_port%04"PRIx16":tci(", ntohll(flow->tun_id), flow->in_port); if (flow->vlan_tci) { @@ -511,7 +516,7 @@ flow_format(struct ds *ds, const struct flow *flow) if (flow->dl_type == htons(ETH_TYPE_IPV6)) { ds_put_format(ds, " proto%"PRIu8" tos%"PRIu8" ipv6", - flow->nw_proto, flow->nw_tos); + flow->nw_proto, flow->tos_frag & IP_DSCP_MASK); print_ipv6_addr(ds, &flow->ipv6_src); ds_put_cstr(ds, "->"); print_ipv6_addr(ds, &flow->ipv6_dst); @@ -521,10 +526,17 @@ flow_format(struct ds *ds, const struct flow *flow) " tos%"PRIu8 " ip"IP_FMT"->"IP_FMT, flow->nw_proto, - flow->nw_tos, + flow->tos_frag & IP_DSCP_MASK, IP_ARGS(&flow->nw_src), IP_ARGS(&flow->nw_dst)); } + frag = flow->tos_frag & FLOW_FRAG_MASK; + if (frag) { + ds_put_format(ds, " frag(%s)", + frag == FLOW_FRAG_ANY ? "first" + : frag == (FLOW_FRAG_ANY | FLOW_FRAG_LATER) ? "later" + : ""); + } if (flow->tp_src || flow->tp_dst) { ds_put_format(ds, " port%"PRIu16"->%"PRIu16, ntohs(flow->tp_src), ntohs(flow->tp_dst)); @@ -550,6 +562,8 @@ flow_print(FILE *stream, const struct flow *flow) void flow_wildcards_init_catchall(struct flow_wildcards *wc) { + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); + wc->wildcards = FWW_ALL; wc->tun_id_mask = htonll(0); wc->nw_src_mask = htonl(0); @@ -558,6 +572,7 @@ flow_wildcards_init_catchall(struct flow_wildcards *wc) wc->ipv6_dst_mask = in6addr_any; memset(wc->reg_masks, 0, sizeof wc->reg_masks); wc->vlan_tci_mask = htons(0); + wc->tos_frag_mask = 0; memset(wc->zeros, 0, sizeof wc->zeros); } @@ -566,6 +581,8 @@ flow_wildcards_init_catchall(struct flow_wildcards *wc) void flow_wildcards_init_exact(struct flow_wildcards *wc) { + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); + wc->wildcards = 0; wc->tun_id_mask = htonll(UINT64_MAX); wc->nw_src_mask = htonl(UINT32_MAX); @@ -574,6 +591,7 @@ flow_wildcards_init_exact(struct flow_wildcards *wc) wc->ipv6_dst_mask = in6addr_exact; memset(wc->reg_masks, 0xff, sizeof wc->reg_masks); wc->vlan_tci_mask = htons(UINT16_MAX); + wc->tos_frag_mask = UINT8_MAX; memset(wc->zeros, 0, sizeof wc->zeros); } @@ -584,7 +602,7 @@ flow_wildcards_is_exact(const struct flow_wildcards *wc) { int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); if (wc->wildcards || wc->tun_id_mask != htonll(UINT64_MAX) @@ -592,7 +610,8 @@ flow_wildcards_is_exact(const struct flow_wildcards *wc) || wc->nw_dst_mask != htonl(UINT32_MAX) || wc->vlan_tci_mask != htons(UINT16_MAX) || !ipv6_mask_is_exact(&wc->ipv6_src_mask) - || !ipv6_mask_is_exact(&wc->ipv6_dst_mask)) { + || !ipv6_mask_is_exact(&wc->ipv6_dst_mask) + || wc->tos_frag_mask != UINT8_MAX) { return false; } @@ -612,7 +631,7 @@ flow_wildcards_is_catchall(const struct flow_wildcards *wc) { int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); if (wc->wildcards != FWW_ALL || wc->tun_id_mask != htonll(0) @@ -620,7 +639,8 @@ flow_wildcards_is_catchall(const struct flow_wildcards *wc) || wc->nw_dst_mask != htonl(0) || wc->vlan_tci_mask != htons(0) || !ipv6_mask_is_any(&wc->ipv6_src_mask) - || !ipv6_mask_is_any(&wc->ipv6_dst_mask)) { + || !ipv6_mask_is_any(&wc->ipv6_dst_mask) + || wc->tos_frag_mask != 0) { return false; } @@ -982,29 +1002,38 @@ flow_compose(struct ofpbuf *b, const struct flow *flow) b->l3 = ip = ofpbuf_put_zeros(b, sizeof *ip); ip->ip_ihl_ver = IP_IHL_VER(5, 4); - ip->ip_tos = flow->nw_tos; + ip->ip_tos = flow->tos_frag & IP_DSCP_MASK; ip->ip_proto = flow->nw_proto; ip->ip_src = flow->nw_src; ip->ip_dst = flow->nw_dst; - if (flow->nw_proto == IPPROTO_TCP) { - struct tcp_header *tcp; - - b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp); - tcp->tcp_src = flow->tp_src; - tcp->tcp_dst = flow->tp_dst; - } else if (flow->nw_proto == IPPROTO_UDP) { - struct udp_header *udp; - - b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp); - udp->udp_src = flow->tp_src; - udp->udp_dst = flow->tp_dst; - } else if (flow->nw_proto == IPPROTO_ICMP) { - struct icmp_header *icmp; - - b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp); - icmp->icmp_type = ntohs(flow->tp_src); - icmp->icmp_code = ntohs(flow->tp_dst); + if (flow->tos_frag & FLOW_FRAG_ANY) { + ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS); + if (flow->tos_frag & FLOW_FRAG_LATER) { + ip->ip_frag_off |= htons(100); + } + } + if (!(flow->tos_frag & FLOW_FRAG_ANY) + || !(flow->tos_frag & FLOW_FRAG_LATER)) { + if (flow->nw_proto == IPPROTO_TCP) { + struct tcp_header *tcp; + + b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp); + tcp->tcp_src = flow->tp_src; + tcp->tcp_dst = flow->tp_dst; + } else if (flow->nw_proto == IPPROTO_UDP) { + struct udp_header *udp; + + b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp); + udp->udp_src = flow->tp_src; + udp->udp_dst = flow->tp_dst; + } else if (flow->nw_proto == IPPROTO_ICMP) { + struct icmp_header *icmp; + + b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp); + icmp->icmp_type = ntohs(flow->tp_src); + icmp->icmp_code = ntohs(flow->tp_dst); + } } } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { /* XXX */ diff --git a/lib/flow.h b/lib/flow.h index 736890a3..e9da2ad4 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -35,7 +35,7 @@ struct ofpbuf; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 2 +#define FLOW_WC_SEQ 3 #define FLOW_N_REGS 5 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS); @@ -44,6 +44,14 @@ BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS); * type, that is, pure 802.2 frames. */ #define FLOW_DL_TYPE_NONE 0x5ff +/* Fragment bits, used for IPv4 and IPv6, always zero for non-IP flows. */ +#define FLOW_FRAG_ANY (1 << 0) /* Set for any IP fragment. */ +#define FLOW_FRAG_LATER (1 << 1) /* Set for IP fragment with nonzero offset. */ +#define FLOW_FRAG_MASK (FLOW_FRAG_ANY | FLOW_FRAG_LATER) + +BUILD_ASSERT_DECL(FLOW_FRAG_ANY == NX_IP_FRAG_ANY); +BUILD_ASSERT_DECL(FLOW_FRAG_LATER == NX_IP_FRAG_LATER); + struct flow { ovs_be64 tun_id; /* Encapsulating tunnel ID. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ @@ -57,7 +65,7 @@ struct flow { uint8_t dl_src[6]; /* Ethernet source address. */ uint8_t dl_dst[6]; /* Ethernet destination address. */ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ - uint8_t nw_tos; /* IP ToS (DSCP field, 6 bits). */ + uint8_t tos_frag; /* IP ToS in top bits, FLOW_FRAG_* in low. */ uint8_t arp_sha[6]; /* ARP/ND source hardware address. */ uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ struct in6_addr ipv6_src; /* IPv6 source address. */ @@ -74,10 +82,10 @@ BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->nd_target) == 16); BUILD_ASSERT_DECL(sizeof(struct flow) == FLOW_SIG_SIZE + FLOW_PAD_SIZE); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ -BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 2); +BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 3); -int flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port, - struct flow *); +void flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port, + struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); char *flow_to_string(const struct flow *); @@ -124,18 +132,16 @@ typedef unsigned int OVS_BITWISE flow_wildcards_t; #define FWW_NW_PROTO ((OVS_FORCE flow_wildcards_t) (1 << 5)) #define FWW_TP_SRC ((OVS_FORCE flow_wildcards_t) (1 << 6)) #define FWW_TP_DST ((OVS_FORCE flow_wildcards_t) (1 << 7)) -/* Same meanings as corresponding OFPFW_* bits, but differ in value. */ -#define FWW_NW_TOS ((OVS_FORCE flow_wildcards_t) (1 << 1)) /* No corresponding OFPFW_* bits. */ -#define FWW_ETH_MCAST ((OVS_FORCE flow_wildcards_t) (1 << 8)) +#define FWW_ETH_MCAST ((OVS_FORCE flow_wildcards_t) (1 << 1)) /* multicast bit only */ -#define FWW_ARP_SHA ((OVS_FORCE flow_wildcards_t) (1 << 9)) -#define FWW_ARP_THA ((OVS_FORCE flow_wildcards_t) (1 << 10)) -#define FWW_ND_TARGET ((OVS_FORCE flow_wildcards_t) (1 << 11)) -#define FWW_ALL ((OVS_FORCE flow_wildcards_t) (((1 << 12)) - 1)) +#define FWW_ARP_SHA ((OVS_FORCE flow_wildcards_t) (1 << 8)) +#define FWW_ARP_THA ((OVS_FORCE flow_wildcards_t) (1 << 9)) +#define FWW_ND_TARGET ((OVS_FORCE flow_wildcards_t) (1 << 10)) +#define FWW_ALL ((OVS_FORCE flow_wildcards_t) (((1 << 11)) - 1)) /* Remember to update FLOW_WC_SEQ when adding or removing FWW_*. */ -BUILD_ASSERT_DECL(FWW_ALL == ((1 << 12) - 1) && FLOW_WC_SEQ == 2); +BUILD_ASSERT_DECL(FWW_ALL == ((1 << 11) - 1) && FLOW_WC_SEQ == 3); /* Information on wildcards for a flow, as a supplement to "struct flow". * @@ -150,11 +156,12 @@ struct flow_wildcards { struct in6_addr ipv6_src_mask; /* 1-bit in each signficant ipv6_src bit. */ struct in6_addr ipv6_dst_mask; /* 1-bit in each signficant ipv6_dst bit. */ ovs_be16 vlan_tci_mask; /* 1-bit in each significant vlan_tci bit. */ - uint8_t zeros[6]; /* Padding field set to zero. */ + uint8_t tos_frag_mask; /* 1-bit in each significant tos_frag bit. */ + uint8_t zeros[5]; /* Padding field set to zero. */ }; /* Remember to update FLOW_WC_SEQ when updating struct flow_wildcards. */ -BUILD_ASSERT_DECL(sizeof(struct flow_wildcards) == 80 && FLOW_WC_SEQ == 2); +BUILD_ASSERT_DECL(sizeof(struct flow_wildcards) == 80 && FLOW_WC_SEQ == 3); void flow_wildcards_init_catchall(struct flow_wildcards *); void flow_wildcards_init_exact(struct flow_wildcards *); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index d5226f02..0f00996d 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -182,10 +182,17 @@ static const struct mf_field mf_fields[MFF_N_IDS] = { }, { MFF_IP_TOS, "nw_tos", NULL, MF_FIELD_SIZES(u8), - MFM_NONE, FWW_NW_TOS, + MFM_NONE, 0, MFS_DECIMAL, MFP_IP_ANY, NXM_OF_IP_TOS, + }, { + MFF_IP_FRAG, "ip_frag", NULL, + 1, 2, + MFM_FULLY, 0, + MFS_FRAG, + MFP_IP_ANY, + NXM_NX_IP_FRAG, }, { @@ -347,7 +354,6 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) case MFF_ETH_SRC: case MFF_ETH_TYPE: case MFF_IP_PROTO: - case MFF_IP_TOS: case MFF_ARP_OP: case MFF_ARP_SHA: case MFF_ARP_THA: @@ -407,6 +413,11 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) case MFF_IPV6_DST: return ipv6_mask_is_any(&wc->ipv6_dst_mask); + case MFF_IP_TOS: + return !(wc->tos_frag_mask & IP_DSCP_MASK); + case MFF_IP_FRAG: + return !(wc->tos_frag_mask & FLOW_FRAG_MASK); + case MFF_ARP_SPA: return !wc->nw_src_mask; case MFF_ARP_TPA: @@ -433,7 +444,6 @@ mf_get_mask(const struct mf_field *mf, const struct flow_wildcards *wc, case MFF_ETH_SRC: case MFF_ETH_TYPE: case MFF_IP_PROTO: - case MFF_IP_TOS: case MFF_ARP_OP: case MFF_ARP_SHA: case MFF_ARP_THA: @@ -504,6 +514,13 @@ mf_get_mask(const struct mf_field *mf, const struct flow_wildcards *wc, mask->ipv6 = wc->ipv6_dst_mask; break; + case MFF_IP_TOS: + mask->u8 = wc->tos_frag_mask & IP_DSCP_MASK; + break; + case MFF_IP_FRAG: + mask->u8 = wc->tos_frag_mask & FLOW_FRAG_MASK; + break; + case MFF_ARP_SPA: mask->be32 = wc->nw_src_mask; break; @@ -666,7 +683,9 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value) return true; case MFF_IP_TOS: - return !(value->u8 & 0x03); + return !(value->u8 & ~IP_DSCP_MASK); + case MFF_IP_FRAG: + return !(value->u8 & ~FLOW_FRAG_MASK); case MFF_ARP_OP: return !(value->be16 & htons(0xff00)); @@ -764,7 +783,11 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, break; case MFF_IP_TOS: - value->u8 = flow->nw_tos; + value->u8 = flow->tos_frag & IP_DSCP_MASK; + break; + + case MFF_IP_FRAG: + value->u8 = flow->tos_frag & FLOW_FRAG_MASK; break; case MFF_ARP_OP: @@ -910,6 +933,10 @@ mf_set_value(const struct mf_field *mf, cls_rule_set_nw_tos(rule, value->u8); break; + case MFF_IP_FRAG: + cls_rule_set_frag(rule, value->u8); + break; + case MFF_ARP_OP: cls_rule_set_nw_proto(rule, ntohs(value->be16)); break; @@ -1065,8 +1092,13 @@ mf_set_wild(const struct mf_field *mf, struct cls_rule *rule) break; case MFF_IP_TOS: - rule->wc.wildcards |= FWW_NW_TOS; - rule->flow.nw_tos = 0; + rule->wc.tos_frag_mask |= IP_DSCP_MASK; + rule->flow.tos_frag &= ~IP_DSCP_MASK; + break; + + case MFF_IP_FRAG: + rule->wc.tos_frag_mask |= FLOW_FRAG_MASK; + rule->flow.tos_frag &= ~FLOW_FRAG_MASK; break; case MFF_ARP_OP: @@ -1209,6 +1241,10 @@ mf_set(const struct mf_field *mf, cls_rule_set_ipv6_dst_masked(rule, &value->ipv6, &mask->ipv6); break; + case MFF_IP_FRAG: + cls_rule_set_frag_masked(rule, value->u8, mask->u8); + break; + case MFF_ARP_SPA: cls_rule_set_nw_src_masked(rule, value->be32, mask->be32); break; @@ -1361,6 +1397,10 @@ mf_random_value(const struct mf_field *mf, union mf_value *value) value->u8 &= ~0x03; break; + case MFF_IP_FRAG: + value->u8 &= FLOW_FRAG_MASK; + break; + case MFF_ARP_OP: value->be16 &= htons(0xff); break; @@ -1524,6 +1564,49 @@ mf_from_ofp_port_string(const struct mf_field *mf, const char *s, } } +struct frag_handling { + const char *name; + uint8_t mask; + uint8_t value; +}; + +static const struct frag_handling all_frags[] = { +#define A FLOW_FRAG_ANY +#define L FLOW_FRAG_LATER + /* name mask value */ + + { "no", A|L, 0 }, + { "first", A|L, A }, + { "later", A|L, A|L }, + + { "no", A, 0 }, + { "yes", A, A }, + + { "not_later", L, 0 }, + { "later", L, L }, +#undef A +#undef L +}; + +static char * +mf_from_frag_string(const char *s, uint8_t *valuep, uint8_t *maskp) +{ + const struct frag_handling *h; + + for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) { + if (!strcasecmp(s, h->name)) { + /* We force the upper bits of the mask on to make mf_parse_value() + * happy (otherwise it will never think it's an exact match.) */ + *maskp = h->mask | ~FLOW_FRAG_MASK; + *valuep = h->value; + return NULL; + } + } + + return xasprintf("%s: unknown fragment type (valid types are \"no\", " + "\"yes\", \"first\", \"later\", \"not_first\"", s); +} + /* Parses 's', a string value for field 'mf', into 'value' and 'mask'. Returns * NULL if successful, otherwise a malloc()'d string describing the error. */ char * @@ -1553,6 +1636,9 @@ mf_parse(const struct mf_field *mf, const char *s, case MFS_OFP_PORT: return mf_from_ofp_port_string(mf, s, &value->be16, &mask->be16); + + case MFS_FRAG: + return mf_from_frag_string(s, &value->u8, &mask->u8); } NOT_REACHED(); } @@ -1610,6 +1696,26 @@ mf_format_integer_string(const struct mf_field *mf, const uint8_t *valuep, } } +static void +mf_format_frag_string(const uint8_t *valuep, const uint8_t *maskp, + struct ds *s) +{ + const struct frag_handling *h; + uint8_t value = *valuep; + uint8_t mask = *maskp; + + value &= mask; + mask &= FLOW_FRAG_MASK; + + for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) { + if (value == h->value && mask == h->mask) { + ds_put_cstr(s, h->name); + return; + } + } + ds_put_cstr(s, ""); +} + /* Appends to 's' a string representation of field 'mf' whose value is in * 'value' and 'mask'. 'mask' may be NULL to indicate an exact match. */ void @@ -1654,6 +1760,10 @@ mf_format(const struct mf_field *mf, print_ipv6_masked(s, &value->ipv6, mask ? &mask->ipv6 : NULL); break; + case MFS_FRAG: + mf_format_frag_string(&value->u8, &mask->u8, s); + break; + default: NOT_REACHED(); } diff --git a/lib/meta-flow.h b/lib/meta-flow.h index f2508d52..696b8ca6 100644 --- a/lib/meta-flow.h +++ b/lib/meta-flow.h @@ -68,6 +68,7 @@ enum mf_field_id { MFF_IP_PROTO, /* u8 (used for IPv4 or IPv6) */ MFF_IP_TOS, /* u8 (used for IPv4 or IPv6) */ + MFF_IP_FRAG, /* u8 (used for IPv4 or IPv6) */ MFF_ARP_OP, /* be16 */ MFF_ARP_SPA, /* be32 */ @@ -142,7 +143,8 @@ enum mf_string { MFS_ETHERNET, MFS_IPV4, MFS_IPV6, - MFS_OFP_PORT /* An OpenFlow port number or name. */ + MFS_OFP_PORT, /* An OpenFlow port number or name. */ + MFS_FRAG /* no, yes, first, later, not_later */ }; struct mf_field { diff --git a/lib/nx-match.c b/lib/nx-match.c index a850be2a..84a14de3 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -265,6 +265,24 @@ nxm_put_8(struct ofpbuf *b, uint32_t header, uint8_t value) ofpbuf_put(b, &value, sizeof value); } +static void +nxm_put_8m(struct ofpbuf *b, uint32_t header, uint8_t value, uint8_t mask) +{ + switch (mask) { + case 0: + break; + + case UINT8_MAX: + nxm_put_8(b, header, value); + break; + + default: + nxm_put_header(b, NXM_MAKE_WILD_HEADER(header)); + ofpbuf_put(b, &value, sizeof value); + ofpbuf_put(b, &mask, sizeof mask); + } +} + static void nxm_put_16(struct ofpbuf *b, uint32_t header, ovs_be16 value) { @@ -403,6 +421,32 @@ nxm_put_ipv6(struct ofpbuf *b, uint32_t header, } } +static void +nxm_put_tos_frag(struct ofpbuf *b, const struct cls_rule *cr) +{ + uint8_t tos_frag = cr->flow.tos_frag; + uint8_t tos_frag_mask = cr->wc.tos_frag_mask; + + if (tos_frag_mask & IP_DSCP_MASK) { + nxm_put_8(b, NXM_OF_IP_TOS, tos_frag & IP_DSCP_MASK); + } + + switch (tos_frag_mask & FLOW_FRAG_MASK) { + case 0: + break; + + case FLOW_FRAG_MASK: + /* Output it as exact-match even though only the low 2 bits matter. */ + nxm_put_8(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK); + break; + + default: + nxm_put_8m(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK, + tos_frag_mask & FLOW_FRAG_MASK); + break; + } +} + /* Appends to 'b' the nx_match format that expresses 'cr' (except for * 'cr->priority', because priority is not part of nx_match), plus enough * zero bytes to pad the nx_match out to a multiple of 8. @@ -422,7 +466,7 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr) int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); /* Metadata. */ if (!(wc & FWW_IN_PORT)) { @@ -446,9 +490,7 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr) /* L3. */ if (!(wc & FWW_DL_TYPE) && flow->dl_type == htons(ETH_TYPE_IP)) { /* IP. */ - if (!(wc & FWW_NW_TOS)) { - nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & 0xfc); - } + nxm_put_tos_frag(b, cr); nxm_put_32m(b, NXM_OF_IP_SRC, flow->nw_src, cr->wc.nw_src_mask); nxm_put_32m(b, NXM_OF_IP_DST, flow->nw_dst, cr->wc.nw_dst_mask); @@ -488,10 +530,7 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr) } } else if (!(wc & FWW_DL_TYPE) && flow->dl_type == htons(ETH_TYPE_IPV6)) { /* IPv6. */ - - if (!(wc & FWW_NW_TOS)) { - nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & 0xfc); - } + nxm_put_tos_frag(b, cr); nxm_put_ipv6(b, NXM_NX_IPV6_SRC, &flow->ipv6_src, &cr->wc.ipv6_src_mask); nxm_put_ipv6(b, NXM_NX_IPV6_DST, &flow->ipv6_dst, @@ -1000,7 +1039,10 @@ nxm_read_field(const struct nxm_field *src, const struct flow *flow) return ntohs(flow->vlan_tci); case NFI_NXM_OF_IP_TOS: - return flow->nw_tos; + return flow->tos_frag & IP_DSCP_MASK; + + case NFI_NXM_NX_IP_FRAG: + return flow->tos_frag & FLOW_FRAG_MASK; case NFI_NXM_OF_IP_PROTO: case NFI_NXM_OF_ARP_OP: @@ -1075,6 +1117,7 @@ nxm_read_field(const struct nxm_field *src, const struct flow *flow) case NFI_NXM_NX_IPV6_SRC_W: case NFI_NXM_NX_IPV6_DST: case NFI_NXM_NX_IPV6_DST_W: + case NFI_NXM_NX_IP_FRAG_W: case NFI_NXM_NX_ND_TARGET: case N_NXM_FIELDS: NOT_REACHED(); @@ -1146,7 +1189,13 @@ nxm_write_field(const struct nxm_field *dst, struct flow *flow, #endif case NFI_NXM_OF_IP_TOS: - flow->nw_tos = new_value & IP_DSCP_MASK; + flow->tos_frag &= ~IP_DSCP_MASK; + flow->tos_frag |= new_value & IP_DSCP_MASK; + break; + + case NFI_NXM_NX_IP_FRAG: + flow->tos_frag &= ~FLOW_FRAG_MASK; + flow->tos_frag |= new_value & FLOW_FRAG_MASK; break; case NFI_NXM_OF_IP_SRC: @@ -1188,6 +1237,7 @@ nxm_write_field(const struct nxm_field *dst, struct flow *flow, case NFI_NXM_NX_IPV6_SRC_W: case NFI_NXM_NX_IPV6_DST: case NFI_NXM_NX_IPV6_DST_W: + case NFI_NXM_NX_IP_FRAG_W: case NFI_NXM_NX_ICMPV6_TYPE: case NFI_NXM_NX_ICMPV6_CODE: case NFI_NXM_NX_ND_TARGET: diff --git a/lib/nx-match.def b/lib/nx-match.def index 3f2882c1..5c0a2383 100644 --- a/lib/nx-match.def +++ b/lib/nx-match.def @@ -30,6 +30,7 @@ DEFINE_FIELD (OF_IP_TOS, MFF_IP_TOS, true) DEFINE_FIELD (OF_IP_PROTO, MFF_IP_PROTO, false) DEFINE_FIELD_M(OF_IP_SRC, MFF_IPV4_SRC, true) DEFINE_FIELD_M(OF_IP_DST, MFF_IPV4_DST, true) +DEFINE_FIELD_M(NX_IP_FRAG, MFF_IP_FRAG, false) DEFINE_FIELD (OF_TCP_SRC, MFF_TCP_SRC, true) DEFINE_FIELD (OF_TCP_DST, MFF_TCP_DST, true) DEFINE_FIELD (OF_UDP_SRC, MFF_UDP_SRC, true) diff --git a/lib/odp-util.c b/lib/odp-util.c index db3535db..33672c8c 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -299,6 +299,22 @@ format_generic_odp_key(const struct nlattr *a, struct ds *ds) } } +static const char * +ovs_frag_type_to_string(enum ovs_frag_type type) +{ + switch (type) { + case OVS_FRAG_TYPE_NONE: + return "no"; + case OVS_FRAG_TYPE_FIRST: + return "first"; + case OVS_FRAG_TYPE_LATER: + return "later"; + case __OVS_FRAG_TYPE_MAX: + default: + return ""; + } +} + static void format_odp_key_attr(const struct nlattr *a, struct ds *ds) { @@ -356,10 +372,11 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) case OVS_KEY_ATTR_IPV4: ipv4_key = nl_attr_get(a); ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT"," - "proto=%"PRId8",tos=%"PRIu8")", + "proto=%"PRId8",tos=%"PRIu8",frag=%s)", IP_ARGS(&ipv4_key->ipv4_src), IP_ARGS(&ipv4_key->ipv4_dst), - ipv4_key->ipv4_proto, ipv4_key->ipv4_tos); + ipv4_key->ipv4_proto, ipv4_key->ipv4_tos, + ovs_frag_type_to_string(ipv4_key->ipv4_frag)); break; case OVS_KEY_ATTR_IPV6: { @@ -370,9 +387,11 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) inet_ntop(AF_INET6, ipv6_key->ipv6_src, src_str, sizeof src_str); inet_ntop(AF_INET6, ipv6_key->ipv6_dst, dst_str, sizeof dst_str); - ds_put_format(ds, "ipv6(src=%s,dst=%s,proto=%"PRId8",tos=%"PRIu8")", + ds_put_format(ds, "ipv6(src=%s,dst=%s,proto=%"PRId8",tos=%"PRIu8"," + "frag=%s)", src_str, dst_str, ipv6_key->ipv6_proto, - ipv6_key->ipv6_tos); + ipv6_key->ipv6_tos, + ovs_frag_type_to_string(ipv6_key->ipv6_frag)); break; } @@ -480,6 +499,21 @@ put_nd_key(int n, const char *nd_target_s, return n; } +static bool +ovs_frag_type_from_string(const char *s, enum ovs_frag_type *type) +{ + if (!strcasecmp(s, "no")) { + *type = OVS_FRAG_TYPE_NONE; + } else if (!strcasecmp(s, "first")) { + *type = OVS_FRAG_TYPE_FIRST; + } else if (!strcasecmp(s, "later")) { + *type = OVS_FRAG_TYPE_LATER; + } else { + return false; + } + return true; +} + static int parse_odp_key_attr(const char *s, struct ofpbuf *key) { @@ -564,13 +598,16 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key) ovs_be32 ipv4_dst; int ipv4_proto; int ipv4_tos; + char frag[8]; + enum ovs_frag_type ipv4_frag; int n = -1; if (sscanf(s, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT"," - "proto=%i,tos=%i)%n", - IP_SCAN_ARGS(&ipv4_src), - IP_SCAN_ARGS(&ipv4_dst), &ipv4_proto, &ipv4_tos, &n) > 0 - && n > 0) { + "proto=%i,tos=%i,frag=%7[a-z])%n", + IP_SCAN_ARGS(&ipv4_src), IP_SCAN_ARGS(&ipv4_dst), + &ipv4_proto, &ipv4_tos, frag, &n) > 0 + && n > 0 + && ovs_frag_type_from_string(frag, &ipv4_frag)) { struct ovs_key_ipv4 ipv4_key; memset(&ipv4_key, 0, sizeof ipv4_key); @@ -578,6 +615,7 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key) ipv4_key.ipv4_dst = ipv4_dst; ipv4_key.ipv4_proto = ipv4_proto; ipv4_key.ipv4_tos = ipv4_tos; + ipv4_key.ipv4_frag = ipv4_frag; nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof ipv4_key); return n; @@ -589,12 +627,16 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key) char ipv6_dst_s[IPV6_SCAN_LEN + 1]; int ipv6_proto; int ipv6_tos; + char frag[8]; + enum ovs_frag_type ipv6_frag; int n = -1; if (sscanf(s, "ipv6(src="IPV6_SCAN_FMT",dst="IPV6_SCAN_FMT"," - "proto=%i,tos=%i)%n", + "proto=%i,tos=%i,frag=%7[a-z])%n", ipv6_src_s, ipv6_dst_s, - &ipv6_proto, &ipv6_tos, &n) > 0 && n > 0) { + &ipv6_proto, &ipv6_tos, frag, &n) > 0 + && n > 0 + && ovs_frag_type_from_string(frag, &ipv6_frag)) { struct ovs_key_ipv6 ipv6_key; memset(&ipv6_key, 0, sizeof ipv6_key); @@ -604,6 +646,7 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key) } ipv6_key.ipv6_proto = ipv6_proto; ipv6_key.ipv6_tos = ipv6_tos; + ipv6_key.ipv6_frag = ipv6_frag; nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV6, &ipv6_key, sizeof ipv6_key); return n; @@ -767,6 +810,14 @@ odp_flow_key_from_string(const char *s, struct ofpbuf *key) return 0; } +static uint8_t +tos_frag_to_odp_frag(uint8_t tos_frag) +{ + return (tos_frag & FLOW_FRAG_LATER ? OVS_FRAG_TYPE_LATER + : tos_frag & FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST + : OVS_FRAG_TYPE_NONE); +} + /* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. */ void odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) @@ -811,7 +862,8 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) ipv4_key->ipv4_src = flow->nw_src; ipv4_key->ipv4_dst = flow->nw_dst; ipv4_key->ipv4_proto = flow->nw_proto; - ipv4_key->ipv4_tos = flow->nw_tos; + ipv4_key->ipv4_tos = flow->tos_frag & IP_DSCP_MASK; + ipv4_key->ipv4_frag = tos_frag_to_odp_frag(flow->tos_frag); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { struct ovs_key_ipv6 *ipv6_key; @@ -821,7 +873,8 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) memcpy(ipv6_key->ipv6_src, &flow->ipv6_src, sizeof ipv6_key->ipv6_src); memcpy(ipv6_key->ipv6_dst, &flow->ipv6_dst, sizeof ipv6_key->ipv6_dst); ipv6_key->ipv6_proto = flow->nw_proto; - ipv6_key->ipv6_tos = flow->nw_tos; + ipv6_key->ipv6_tos = flow->tos_frag & IP_DSCP_MASK; + ipv6_key->ipv6_frag = tos_frag_to_odp_frag(flow->tos_frag); } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { struct ovs_key_arp *arp_key; @@ -835,8 +888,9 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) memcpy(arp_key->arp_tha, flow->arp_tha, ETH_ADDR_LEN); } - if (flow->dl_type == htons(ETH_TYPE_IP) - || flow->dl_type == htons(ETH_TYPE_IPV6)) { + if ((flow->dl_type == htons(ETH_TYPE_IP) + || flow->dl_type == htons(ETH_TYPE_IPV6)) + && !(flow->tos_frag & FLOW_FRAG_LATER)) { if (flow->nw_proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key; @@ -884,6 +938,23 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) } } +static bool +odp_to_tos_frag(uint8_t odp_tos, uint8_t odp_frag, struct flow *flow) +{ + if (odp_tos & ~IP_DSCP_MASK || odp_frag > OVS_FRAG_TYPE_LATER) { + return false; + } + + flow->tos_frag = odp_tos; + if (odp_frag != OVS_FRAG_TYPE_NONE) { + flow->tos_frag |= FLOW_FRAG_ANY; + if (odp_frag == OVS_FRAG_TYPE_LATER) { + flow->tos_frag |= FLOW_FRAG_LATER; + } + } + return true; +} + /* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a flow * structure in 'flow'. Returns 0 if successful, otherwise EINVAL. */ int @@ -968,8 +1039,8 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, flow->nw_src = ipv4_key->ipv4_src; flow->nw_dst = ipv4_key->ipv4_dst; flow->nw_proto = ipv4_key->ipv4_proto; - flow->nw_tos = ipv4_key->ipv4_tos; - if (flow->nw_tos & IP_ECN_MASK) { + if (!odp_to_tos_frag(ipv4_key->ipv4_tos, ipv4_key->ipv4_frag, + flow)) { return EINVAL; } break; @@ -982,8 +1053,8 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, memcpy(&flow->ipv6_src, ipv6_key->ipv6_src, sizeof flow->ipv6_src); memcpy(&flow->ipv6_dst, ipv6_key->ipv6_dst, sizeof flow->ipv6_dst); flow->nw_proto = ipv6_key->ipv6_proto; - flow->nw_tos = ipv6_key->ipv6_tos; - if (flow->nw_tos & IP_ECN_MASK) { + if (!odp_to_tos_frag(ipv6_key->ipv6_tos, ipv6_key->ipv6_frag, + flow)) { return EINVAL; } break; @@ -1083,6 +1154,9 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, return 0; case OVS_KEY_ATTR_IPV4: + if (flow->tos_frag & FLOW_FRAG_LATER) { + return 0; + } if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_UDP || flow->nw_proto == IPPROTO_ICMP) { @@ -1091,6 +1165,9 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, return 0; case OVS_KEY_ATTR_IPV6: + if (flow->tos_frag & FLOW_FRAG_LATER) { + return 0; + } if (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_UDP || flow->nw_proto == IPPROTO_ICMPV6) { @@ -1100,7 +1177,8 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, case OVS_KEY_ATTR_ICMPV6: if (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) - || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) { + || flow->tp_src == htons(ND_NEIGHBOR_ADVERT) + || flow->tos_frag & FLOW_FRAG_LATER) { return EINVAL; } return 0; @@ -1108,8 +1186,13 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, case OVS_KEY_ATTR_TCP: case OVS_KEY_ATTR_UDP: case OVS_KEY_ATTR_ICMP: - case OVS_KEY_ATTR_ARP: case OVS_KEY_ATTR_ND: + if (flow->tos_frag & FLOW_FRAG_LATER) { + return EINVAL; + } + return 0; + + case OVS_KEY_ATTR_ARP: return 0; case __OVS_KEY_ATTR_MAX: diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 64712b52..7bdbc220 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -604,21 +604,9 @@ ofp_print_switch_config(struct ds *string, const struct ofp_switch_config *osc) flags = ntohs(osc->flags); - ds_put_cstr(string, " frags="); - switch (flags & OFPC_FRAG_MASK) { - case OFPC_FRAG_NORMAL: - ds_put_cstr(string, "normal"); - flags &= ~OFPC_FRAG_MASK; - break; - case OFPC_FRAG_DROP: - ds_put_cstr(string, "drop"); - flags &= ~OFPC_FRAG_MASK; - break; - case OFPC_FRAG_REASM: - ds_put_cstr(string, "reassemble"); - flags &= ~OFPC_FRAG_MASK; - break; - } + ds_put_format(string, " frags=%s", ofputil_frag_handling_to_string(flags)); + flags &= ~OFPC_FRAG_MASK; + if (flags) { ds_put_format(string, " ***unknown flags 0x%04"PRIx16"***", flags); } diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 09301962..328d0df8 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -99,7 +99,7 @@ static const flow_wildcards_t WC_INVARIANTS = 0 void ofputil_wildcard_from_openflow(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); /* Initialize most of rule->wc. */ flow_wildcards_init_catchall(wc); @@ -108,9 +108,10 @@ ofputil_wildcard_from_openflow(uint32_t ofpfw, struct flow_wildcards *wc) /* Wildcard fields that aren't defined by ofp_match or tun_id. */ wc->wildcards |= (FWW_ARP_SHA | FWW_ARP_THA | FWW_ND_TARGET); - if (ofpfw & OFPFW_NW_TOS) { - wc->wildcards |= FWW_NW_TOS; + if (!(ofpfw & OFPFW_NW_TOS)) { + wc->tos_frag_mask |= IP_DSCP_MASK; } + wc->nw_src_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_SRC_SHIFT); wc->nw_dst_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_DST_SHIFT); @@ -151,7 +152,7 @@ ofputil_cls_rule_from_match(const struct ofp_match *match, rule->flow.tp_dst = match->tp_dst; memcpy(rule->flow.dl_src, match->dl_src, ETH_ADDR_LEN); memcpy(rule->flow.dl_dst, match->dl_dst, ETH_ADDR_LEN); - rule->flow.nw_tos = match->nw_tos; + rule->flow.tos_frag = match->nw_tos & IP_DSCP_MASK; rule->flow.nw_proto = match->nw_proto; /* Translate VLANs. */ @@ -190,7 +191,7 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule, struct ofp_match *match) ofpfw = (OVS_FORCE uint32_t) (wc->wildcards & WC_INVARIANTS); ofpfw |= ofputil_netmask_to_wcbits(wc->nw_src_mask) << OFPFW_NW_SRC_SHIFT; ofpfw |= ofputil_netmask_to_wcbits(wc->nw_dst_mask) << OFPFW_NW_DST_SHIFT; - if (wc->wildcards & FWW_NW_TOS) { + if (!(wc->tos_frag_mask & IP_DSCP_MASK)) { ofpfw |= OFPFW_NW_TOS; } @@ -224,7 +225,7 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule, struct ofp_match *match) match->dl_type = ofputil_dl_type_to_openflow(rule->flow.dl_type); match->nw_src = rule->flow.nw_src; match->nw_dst = rule->flow.nw_dst; - match->nw_tos = rule->flow.nw_tos; + match->nw_tos = rule->flow.tos_frag & IP_DSCP_MASK; match->nw_proto = rule->flow.nw_proto; match->tp_src = rule->flow.tp_src; match->tp_dst = rule->flow.tp_dst; @@ -790,7 +791,7 @@ ofputil_min_flow_format(const struct cls_rule *rule) { const struct flow_wildcards *wc = &rule->wc; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3); /* Only NXM supports separately wildcards the Ethernet multicast bit. */ if (!(wc->wildcards & FWW_DL_DST) != !(wc->wildcards & FWW_ETH_MCAST)) { @@ -818,6 +819,11 @@ ofputil_min_flow_format(const struct cls_rule *rule) return NXFF_NXM; } + /* Only NXM supports matching fragments. */ + if (wc->tos_frag_mask & FLOW_FRAG_MASK) { + return NXFF_NXM; + } + /* Other formats can express this rule. */ return NXFF_OPENFLOW10; } @@ -1932,6 +1938,36 @@ make_echo_reply(const struct ofp_header *rq) return out; } +const char * +ofputil_frag_handling_to_string(enum ofp_config_flags flags) +{ + switch (flags & OFPC_FRAG_MASK) { + case OFPC_FRAG_NORMAL: return "normal"; + case OFPC_FRAG_DROP: return "drop"; + case OFPC_FRAG_REASM: return "reassemble"; + case OFPC_FRAG_NX_MATCH: return "nx-match"; + } + + NOT_REACHED(); +} + +bool +ofputil_frag_handling_from_string(const char *s, enum ofp_config_flags *flags) +{ + if (!strcasecmp(s, "normal")) { + *flags = OFPC_FRAG_NORMAL; + } else if (!strcasecmp(s, "drop")) { + *flags = OFPC_FRAG_DROP; + } else if (!strcasecmp(s, "reassemble")) { + *flags = OFPC_FRAG_REASM; + } else if (!strcasecmp(s, "nx-match")) { + *flags = OFPC_FRAG_NX_MATCH; + } else { + return false; + } + return true; +} + /* Checks that 'port' is a valid output port for the OFPAT_OUTPUT action, given * that the switch will never have more than 'max_ports' ports. Returns 0 if * 'port' is valid, otherwise an ofp_mkerr() return code. */ @@ -2401,7 +2437,7 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format) MAY_NW_ADDR = 1 << 0, /* nw_src, nw_dst */ MAY_TP_ADDR = 1 << 1, /* tp_src, tp_dst */ MAY_NW_PROTO = 1 << 2, /* nw_proto */ - MAY_NW_TOS = 1 << 3, /* nw_tos */ + MAY_TOS_FRAG = 1 << 3, /* tos_frag */ MAY_ARP_SHA = 1 << 4, /* arp_sha */ MAY_ARP_THA = 1 << 5, /* arp_tha */ MAY_IPV6_ADDR = 1 << 6, /* ipv6_src, ipv6_dst */ @@ -2412,7 +2448,7 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format) /* Figure out what fields may be matched. */ if (rule->flow.dl_type == htons(ETH_TYPE_IP)) { - may_match = MAY_NW_PROTO | MAY_NW_TOS | MAY_NW_ADDR; + may_match = MAY_NW_PROTO | MAY_TOS_FRAG | MAY_NW_ADDR; if (rule->flow.nw_proto == IPPROTO_TCP || rule->flow.nw_proto == IPPROTO_UDP || rule->flow.nw_proto == IPPROTO_ICMP) { @@ -2420,7 +2456,7 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format) } } else if (rule->flow.dl_type == htons(ETH_TYPE_IPV6) && flow_format == NXFF_NXM) { - may_match = MAY_NW_PROTO | MAY_NW_TOS | MAY_IPV6_ADDR; + may_match = MAY_NW_PROTO | MAY_TOS_FRAG | MAY_IPV6_ADDR; if (rule->flow.nw_proto == IPPROTO_TCP || rule->flow.nw_proto == IPPROTO_UDP) { may_match |= MAY_TP_ADDR; @@ -2452,8 +2488,8 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format) if (!(may_match & MAY_NW_PROTO)) { wc.wildcards |= FWW_NW_PROTO; } - if (!(may_match & MAY_NW_TOS)) { - wc.wildcards |= FWW_NW_TOS; + if (!(may_match & MAY_TOS_FRAG)) { + wc.tos_frag_mask = 0; } if (!(may_match & MAY_ARP_SHA)) { wc.wildcards |= FWW_ARP_SHA; diff --git a/lib/ofp-util.h b/lib/ofp-util.h index ecd77cc4..5af9d2ba 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -277,6 +277,9 @@ struct ofpbuf *make_unbuffered_packet_out(const struct ofpbuf *packet, uint16_t in_port, uint16_t out_port); struct ofpbuf *make_echo_request(void); struct ofpbuf *make_echo_reply(const struct ofp_header *rq); + +const char *ofputil_frag_handling_to_string(enum ofp_config_flags); +bool ofputil_frag_handling_from_string(const char *, enum ofp_config_flags *); /* Actions. */ diff --git a/ofproto/netflow.c b/ofproto/netflow.c index f0af436d..9d9ef198 100644 --- a/ofproto/netflow.c +++ b/ofproto/netflow.c @@ -160,7 +160,7 @@ gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow, } nf_rec->tcp_flags = nf_flow->tcp_flags; nf_rec->ip_proto = expired->flow.nw_proto; - nf_rec->ip_tos = expired->flow.nw_tos; + nf_rec->ip_tos = expired->flow.tos_frag & IP_DSCP_MASK; /* NetFlow messages are limited to 30 records. */ if (ntohs(nf_hdr->count) >= 30) { diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index ff607f78..d1087bf0 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -2955,13 +2955,26 @@ static struct rule_dpif * rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, uint8_t table_id) { + struct cls_rule *cls_rule; + struct classifier *cls; + if (table_id >= N_TABLES) { return NULL; } - return rule_dpif_cast(rule_from_cls_rule( - classifier_lookup(&ofproto->up.tables[table_id], - flow))); + cls = &ofproto->up.tables[table_id]; + if (flow->tos_frag & FLOW_FRAG_ANY + && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { + /* For OFPC_NORMAL frag_handling, we must pretend that transport ports + * are unavailable. */ + struct flow ofpc_normal_flow = *flow; + ofpc_normal_flow.tp_src = htons(0); + ofpc_normal_flow.tp_dst = htons(0); + cls_rule = classifier_lookup(cls, &ofpc_normal_flow); + } else { + cls_rule = classifier_lookup(cls, flow); + } + return rule_dpif_cast(rule_from_cls_rule(cls_rule)); } static void @@ -3363,6 +3376,7 @@ static void commit_set_nw_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions) { + int frag = base->tos_frag & FLOW_FRAG_MASK; struct ovs_key_ipv4 ipv4_key; if (base->dl_type != htons(ETH_TYPE_IP) || @@ -3372,16 +3386,19 @@ commit_set_nw_action(const struct flow *flow, struct flow *base, if (base->nw_src == flow->nw_src && base->nw_dst == flow->nw_dst && - base->nw_tos == flow->nw_tos) { + base->tos_frag == flow->tos_frag) { return; } + memset(&ipv4_key, 0, sizeof(ipv4_key)); ipv4_key.ipv4_src = base->nw_src = flow->nw_src; ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst; - ipv4_key.ipv4_tos = base->nw_tos = flow->nw_tos; - ipv4_key.ipv4_proto = base->nw_proto; + ipv4_key.ipv4_tos = flow->tos_frag & IP_DSCP_MASK; + ipv4_key.ipv4_frag = (frag == 0 ? OVS_FRAG_TYPE_NONE + : frag == FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST + : OVS_FRAG_TYPE_LATER); commit_action__(odp_actions, OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof(ipv4_key)); @@ -3843,7 +3860,8 @@ do_xlate_actions(const union ofp_action *in, size_t n_in, break; case OFPUTIL_OFPAT_SET_NW_TOS: - ctx->flow.nw_tos = ia->nw_tos.nw_tos & IP_DSCP_MASK; + ctx->flow.tos_frag &= ~IP_DSCP_MASK; + ctx->flow.tos_frag |= ia->nw_tos.nw_tos & IP_DSCP_MASK; break; case OFPUTIL_OFPAT_SET_TP_SRC: @@ -3961,6 +3979,27 @@ xlate_actions(struct action_xlate_ctx *ctx, ctx->odp_actions = ofpbuf_new(512); ofpbuf_reserve(ctx->odp_actions, NL_A_U32_SIZE); + + if (ctx->flow.tos_frag & FLOW_FRAG_ANY) { + switch (ctx->ofproto->up.frag_handling) { + case OFPC_FRAG_NORMAL: + /* We must pretend that transport ports are unavailable. */ + ctx->flow.tp_src = htons(0); + ctx->flow.tp_dst = htons(0); + break; + + case OFPC_FRAG_DROP: + return ctx->odp_actions; + + case OFPC_FRAG_REASM: + NOT_REACHED(); + + case OFPC_FRAG_NX_MATCH: + /* Nothing to do. */ + break; + } + } + ctx->tags = 0; ctx->may_set_up_flow = true; ctx->has_learn = false; @@ -4702,21 +4741,17 @@ rule_invalidate(const struct rule_dpif *rule) } static bool -get_drop_frags(struct ofproto *ofproto_) +set_frag_handling(struct ofproto *ofproto_, + enum ofp_config_flags frag_handling) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - bool drop_frags; - - dpif_get_drop_frags(ofproto->dpif, &drop_frags); - return drop_frags; -} -static void -set_drop_frags(struct ofproto *ofproto_, bool drop_frags) -{ - struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - - dpif_set_drop_frags(ofproto->dpif, drop_frags); + if (frag_handling != OFPC_FRAG_REASM) { + ofproto->need_revalidate = true; + return true; + } else { + return false; + } } static int @@ -5056,8 +5091,7 @@ const struct ofproto_class ofproto_dpif_class = { rule_get_stats, rule_execute, rule_modify_actions, - get_drop_frags, - set_drop_frags, + set_frag_handling, packet_out, set_netflow, get_netflow_ids, diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index f596abcb..38dbd2d7 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -51,6 +51,7 @@ struct ofproto { char *sw_desc; /* Software version. */ char *serial_desc; /* Serial number. */ char *dp_desc; /* Datapath description. */ + enum ofp_config_flags frag_handling; /* One of OFPC_*. */ /* Datapath. */ struct hmap ports; /* Contains "struct ofport"s. */ @@ -81,7 +82,6 @@ struct ofport *ofproto_get_port(const struct ofproto *, uint16_t ofp_port); (CLS) < &(OFPROTO)->tables[(OFPROTO)->n_tables]; \ (CLS)++) - /* An OpenFlow port within a "struct ofproto". * * With few exceptions, ofproto implementations may look at these fields but @@ -807,14 +807,36 @@ struct ofproto_class { * rule. */ void (*rule_modify_actions)(struct rule *rule); - /* These functions implement the OpenFlow IP fragment handling policy. By - * default ('drop_frags' == false), an OpenFlow switch should treat IP - * fragments the same way as other packets (although TCP and UDP port - * numbers cannot be determined). With 'drop_frags' == true, the switch - * should drop all IP fragments without passing them through the flow - * table. */ - bool (*get_drop_frags)(struct ofproto *ofproto); - void (*set_drop_frags)(struct ofproto *ofproto, bool drop_frags); + /* Changes the OpenFlow IP fragment handling policy to 'frag_handling', + * which takes one of the following values, with the corresponding + * meanings: + * + * - OFPC_FRAG_NORMAL: The switch should treat IP fragments the same way + * as other packets, omitting TCP and UDP port numbers (always setting + * them to 0). + * + * - OFPC_FRAG_DROP: The switch should drop all IP fragments without + * passing them through the flow table. + * + * - OFPC_FRAG_REASM: The switch should reassemble IP fragments before + * passing packets through the flow table. + * + * - OFPC_FRAG_NX_MATCH (a Nicira extension): Similar to OFPC_FRAG_NORMAL, + * except that TCP and UDP port numbers should be included in fragments + * with offset 0. + * + * Implementations are not required to support every mode. + * OFPC_FRAG_NORMAL is the default mode when an ofproto is created. + * + * At the time of the call to ->set_frag_handling(), the current mode is + * available in 'ofproto->frag_handling'. ->set_frag_handling() returns + * true if the requested mode was set, false if it is not supported. + * + * Upon successful return, the caller changes 'ofproto->frag_handling' to + * reflect the new mode. + */ + bool (*set_frag_handling)(struct ofproto *ofproto, + enum ofp_config_flags frag_handling); /* Implements the OpenFlow OFPT_PACKET_OUT command. The datapath should * execute the 'n_actions' in the 'actions' array on 'packet'. diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 1cc1e4e1..d64901e2 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -327,6 +327,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ofproto->sw_desc = xstrdup(DEFAULT_SW_DESC); ofproto->serial_desc = xstrdup(DEFAULT_SERIAL_DESC); ofproto->dp_desc = xstrdup(DEFAULT_DP_DESC); + ofproto->frag_handling = OFPC_FRAG_NORMAL; hmap_init(&ofproto->ports); shash_init(&ofproto->port_by_name); ofproto->tables = NULL; @@ -1580,18 +1581,12 @@ static int handle_get_config_request(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); - struct ofpbuf *buf; struct ofp_switch_config *osc; - uint16_t flags; - bool drop_frags; - - /* Figure out flags. */ - drop_frags = ofproto->ofproto_class->get_drop_frags(ofproto); - flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL; + struct ofpbuf *buf; /* Send reply. */ osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf); - osc->flags = htons(flags); + osc->flags = htons(ofproto->frag_handling); osc->miss_send_len = htons(ofconn_get_miss_send_len(ofconn)); ofconn_send_reply(ofconn, buf); @@ -1604,19 +1599,20 @@ handle_set_config(struct ofconn *ofconn, const struct ofp_switch_config *osc) struct ofproto *ofproto = ofconn_get_ofproto(ofconn); uint16_t flags = ntohs(osc->flags); - if (ofconn_get_type(ofconn) == OFCONN_PRIMARY - && ofconn_get_role(ofconn) != NX_ROLE_SLAVE) { - switch (flags & OFPC_FRAG_MASK) { - case OFPC_FRAG_NORMAL: - ofproto->ofproto_class->set_drop_frags(ofproto, false); - break; - case OFPC_FRAG_DROP: - ofproto->ofproto_class->set_drop_frags(ofproto, true); - break; - default: - VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")", - osc->flags); - break; + if (ofconn_get_type(ofconn) != OFCONN_PRIMARY + || ofconn_get_role(ofconn) != NX_ROLE_SLAVE) { + enum ofp_config_flags cur = ofproto->frag_handling; + enum ofp_config_flags next = flags & OFPC_FRAG_MASK; + + assert((cur & OFPC_FRAG_MASK) == cur); + if (cur != next) { + if (ofproto->ofproto_class->set_frag_handling(ofproto, next)) { + ofproto->frag_handling = next; + } else { + VLOG_WARN_RL(&rl, "%s: unsupported fragment handling mode %s", + ofproto->name, + ofputil_frag_handling_to_string(next)); + } } } diff --git a/tests/flowgen.pl b/tests/flowgen.pl index d397515a..95c8f48d 100755 --- a/tests/flowgen.pl +++ b/tests/flowgen.pl @@ -1,6 +1,6 @@ #! /usr/bin/perl -# Copyright (c) 2009, 2010 Nicira Networks. +# Copyright (c) 2009, 2010, 2011 Nicira Networks. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -98,7 +98,7 @@ sub output { } else { die; } - if ($attrs{IP_FRAGMENT} ne 'no') { + if ($attrs{IP_FRAGMENT} ne 'no' && $attrs{IP_FRAGMENT} ne 'first') { $flow{TP_SRC} = $flow{TP_DST} = 0; } } elsif ($attrs{DL_TYPE} eq 'non-ip') { @@ -158,14 +158,14 @@ sub output { if ($attrs{TP_PROTO} =~ '^TCP') { my $tcp = pack('nnNNnnnn', - $flow{TP_SRC}, # source port - $flow{TP_DST}, # dest port - 87123455, # seqno - 712378912, # ackno + $flow{TP_SRC}, # source port + $flow{TP_DST}, # dest port + 87123455, # seqno + 712378912, # ackno (5 << 12) | 0x02 | 0x10, # hdrlen, SYN, ACK 5823, # window size 18923, # checksum - 12893); # urgent pointer + 12893); # urgent pointer if ($attrs{TP_PROTO} eq 'TCP+options') { substr($tcp, 12, 2) = pack('n', (6 << 12) | 0x02 | 0x10); $tcp .= pack('CCn', 2, 4, 1975); # MSS option @@ -179,17 +179,16 @@ sub output { $ip .= $udp; } elsif ($attrs{TP_PROTO} eq 'ICMP') { $ip .= pack('CCnnn', - 8, # echo request - 0, # code - 0, # checksum - 736, # identifier - 931); # sequence number + 8, # echo request + 0, # code + 0, # checksum + 736, # identifier + 931); # sequence number } elsif ($attrs{TP_PROTO} eq 'other') { $ip .= 'other header'; } else { die; } - substr($ip, 2, 2) = pack('n', length($ip)); $packet .= $ip; } diff --git a/tests/odp.at b/tests/odp.at index 21aa8973..65a9fb93 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -2,18 +2,22 @@ AT_SETUP([OVS datapath parsing and formatting - valid forms]) AT_DATA([odp-base.txt], [dnl in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0),tcp(src=80,dst=8080) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0),udp(src=81,dst=6632) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0),icmp(type=1,code=2) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=6,tos=0),tcp(src=80,dst=8080) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=17,tos=0),udp(src=6630,dst=22) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=1,code=2) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=135,code=0),nd(target=::3) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e) -in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=no) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=first) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=later) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,frag=no),tcp(src=80,dst=8080) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,frag=no),udp(src=81,dst=6632) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,frag=no),icmp(type=1,code=2) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=no) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=first) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=later) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=6,tos=0,frag=no),tcp(src=80,dst=8080) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=17,tos=0,frag=no),udp(src=6630,dst=22) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=1,code=2) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e) +in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18) ]) @@ -31,7 +35,15 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp echo echo '# Valid forms with tun_id and VLAN headers.' sed 's/^/tun_id(0xfedcba9876543210),/ -s/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt) > odp.txt +s/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt + + echo + echo '# Valid forms with IP first fragment.' +sed -n 's/,frag=no),/,frag=first),/p' odp-base.txt + + echo + echo '# Valid forms with IP later fragment.' +sed -n 's/,frag=no),.*/,frag=later)/p' odp-base.txt) > odp.txt AT_CAPTURE_FILE([odp.txt]) AT_CHECK_UNQUOTED([test-odp < odp.txt], [0], [`cat odp.txt` ]) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 59b14e49..b97fc9b1 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -11,7 +11,7 @@ table=1 in_port=2 priority=1500 icmp actions=output(17),resubmit(,2) table=1 in_port=3 priority=1500 icmp actions=output(14),resubmit(,2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout]) +AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 10,11,12,13,14,15,16,17,18,19,20,21 ]) @@ -36,7 +36,7 @@ in_port=10,reg1=0xdeadbeef actions=output:21 in_port=11,reg2=0xeef22dea actions=output:22 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout]) +AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 20,21,22 ]) @@ -55,7 +55,7 @@ in_port=6 actions=output:NXM_NX_REG0[[0..15]],output:NXM_NX_REG0[[16..31]] in_port=7 actions=load:0x110000ff->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]] ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout]) +AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 9,55,10,55,66,11,77,88 ]) @@ -73,7 +73,7 @@ in_port=4 actions=set_tunnel:4,set_tunnel:3,output:4 in_port=5 actions=set_tunnel:5 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'tun_id(0x1),in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout]) +AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'tun_id(0x1),in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tun_id(0x1)),1,2,set(tun_id(0x3)),3,4 ]) @@ -225,3 +225,44 @@ done OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([ofproto-dpif - fragment handling]) +OFPROTO_START +AT_DATA([flows.txt], [dnl +priority=75 tcp ip_frag=no tp_dst=80 actions=output:1 +priority=75 tcp ip_frag=first tp_dst=80 actions=output:2 +priority=75 tcp ip_frag=later tp_dst=80 actions=output:3 +priority=50 tcp ip_frag=no actions=output:4 +priority=50 tcp ip_frag=first actions=output:5 +priority=50 tcp ip_frag=later actions=output:6 +]) +AT_CHECK([ovs-ofctl replace-flows br0 flows.txt]) + +base_flow="in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0" +no_flow="$base_flow,frag=no),tcp(src=12345,dst=80)" +first_flow="$base_flow,frag=first),tcp(src=12345,dst=80)" +later_flow="$base_flow,frag=later)" + + # mode no first later +for tuple in \ + 'normal 1 5 6' \ + 'drop 1 drop drop' \ + 'nx-match 1 2 6' +do + set $tuple + mode=$1 + no=$2 + first=$3 + later=$4 + + AT_CHECK([ovs-ofctl set-frags br0 $mode]) + for type in no first later; do + eval flow=\$${type}_flow exp_output=\$$type + AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 "$flow"], + [0], [stdout]) + AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $exp_output +]) + done +done +OFPROTO_STOP +AT_CLEANUP diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index 9dcd2493..f3feff36 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -301,6 +301,32 @@ NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGE NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3b) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4) +# IPv4 fragments. +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(00) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(01) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(02) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(03) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/03) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/fd) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/02) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(01/01) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(02/02) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(03/03) +NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(f3) + +# IPv6 fragments. +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(00) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(01) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(02) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(03) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/03) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/01) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/02) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(01/01) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(02/02) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(03/03) +NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(f3) + # Tunnel ID. NXM_NX_TUN_ID(00000000abcdef01) NXM_NX_TUN_ID_W(84200000abcdef01/84200000FFFFFFFF) @@ -453,6 +479,32 @@ nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_ nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ) nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ) +# IPv4 fragments. +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(01) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(02) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/01) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/02) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(01/01) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(02/02) +NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03) +nx_pull_match() returned error 44010102 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_VALUE) + +# IPv6 fragments. +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(01) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(02) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/01) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/02) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(01/01) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(02/02) +NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03) +nx_pull_match() returned error 44010102 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_VALUE) + # Tunnel ID. NXM_NX_TUN_ID(00000000abcdef01) NXM_NX_TUN_ID_W(84200000abcdef01/84200000ffffffff) diff --git a/tests/test-classifier.c b/tests/test-classifier.c index 5f5d7971..0e2b13f0 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -55,7 +55,7 @@ CLS_FIELD(FWW_DL_SRC, dl_src, DL_SRC) \ CLS_FIELD(FWW_DL_DST | FWW_ETH_MCAST, dl_dst, DL_DST) \ CLS_FIELD(FWW_NW_PROTO, nw_proto, NW_PROTO) \ - CLS_FIELD(FWW_NW_TOS, nw_tos, NW_TOS) + CLS_FIELD(0, tos_frag, TOS_FRAG) /* Field indexes. * @@ -203,6 +203,9 @@ match(const struct cls_rule *wild, const struct flow *fixed) & wild->wc.vlan_tci_mask); } else if (f_idx == CLS_F_IDX_TUN_ID) { eq = !((fixed->tun_id ^ wild->flow.tun_id) & wild->wc.tun_id_mask); + } else if (f_idx == CLS_F_IDX_TOS_FRAG) { + eq = !((fixed->tos_frag ^ wild->flow.tos_frag) + & wild->wc.tos_frag_mask); } else { NOT_REACHED(); } @@ -263,7 +266,7 @@ static uint8_t dl_src_values[][6] = { { 0x00, 0x02, 0xe3, 0x0f, 0x80, 0xa4 }, static uint8_t dl_dst_values[][6] = { { 0x4a, 0x27, 0x71, 0xae, 0x64, 0xc1 }, { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } }; static uint8_t nw_proto_values[] = { IPPROTO_TCP, IPPROTO_ICMP }; -static uint8_t nw_tos_values[] = { 49, 0 }; +static uint8_t tos_frag_values[] = { 48, 0 }; static void *values[CLS_N_FIELDS][2]; @@ -297,8 +300,8 @@ init_values(void) values[CLS_F_IDX_NW_PROTO][0] = &nw_proto_values[0]; values[CLS_F_IDX_NW_PROTO][1] = &nw_proto_values[1]; - values[CLS_F_IDX_NW_TOS][0] = &nw_tos_values[0]; - values[CLS_F_IDX_NW_TOS][1] = &nw_tos_values[1]; + values[CLS_F_IDX_TOS_FRAG][0] = &tos_frag_values[0]; + values[CLS_F_IDX_TOS_FRAG][1] = &tos_frag_values[1]; values[CLS_F_IDX_TP_SRC][0] = &tp_src_values[0]; values[CLS_F_IDX_TP_SRC][1] = &tp_src_values[1]; @@ -318,7 +321,7 @@ init_values(void) #define N_DL_SRC_VALUES ARRAY_SIZE(dl_src_values) #define N_DL_DST_VALUES ARRAY_SIZE(dl_dst_values) #define N_NW_PROTO_VALUES ARRAY_SIZE(nw_proto_values) -#define N_NW_TOS_VALUES ARRAY_SIZE(nw_tos_values) +#define N_TOS_FRAG_VALUES ARRAY_SIZE(tos_frag_values) #define N_FLOW_VALUES (N_NW_SRC_VALUES * \ N_NW_DST_VALUES * \ @@ -331,7 +334,7 @@ init_values(void) N_DL_SRC_VALUES * \ N_DL_DST_VALUES * \ N_NW_PROTO_VALUES * \ - N_NW_TOS_VALUES) + N_TOS_FRAG_VALUES) static unsigned int get_value(unsigned int *x, unsigned n_values) @@ -367,7 +370,7 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls) memcpy(flow.dl_dst, dl_dst_values[get_value(&x, N_DL_DST_VALUES)], ETH_ADDR_LEN); flow.nw_proto = nw_proto_values[get_value(&x, N_NW_PROTO_VALUES)]; - flow.nw_tos = nw_tos_values[get_value(&x, N_NW_TOS_VALUES)]; + flow.tos_frag = tos_frag_values[get_value(&x, N_TOS_FRAG_VALUES)]; cr0 = classifier_lookup(cls, &flow); cr1 = tcls_lookup(tcls, &flow); @@ -465,6 +468,8 @@ make_rule(int wc_fields, unsigned int priority, int value_pat) rule->cls_rule.wc.vlan_tci_mask = htons(UINT16_MAX); } else if (f_idx == CLS_F_IDX_TUN_ID) { rule->cls_rule.wc.tun_id_mask = htonll(UINT64_MAX); + } else if (f_idx == CLS_F_IDX_TOS_FRAG) { + rule->cls_rule.wc.tos_frag_mask = UINT8_MAX; } else { NOT_REACHED(); } diff --git a/tests/test-flows.c b/tests/test-flows.c index 559af3a3..57157c97 100644 --- a/tests/test-flows.c +++ b/tests/test-flows.c @@ -80,8 +80,11 @@ main(int argc OVS_UNUSED, char *argv[]) printf("Packet:\n"); ofp_print_packet(stdout, packet->data, packet->size, packet->size); ovs_hex_dump(stdout, packet->data, packet->size, 0, true); + cls_rule_print(&rule); printf("Expected flow:\n%s\n", exp_s); printf("Actually extracted flow:\n%s\n", got_s); + ovs_hex_dump(stdout, &expected_match, sizeof expected_match, 0, false); + ovs_hex_dump(stdout, &extracted_match, sizeof extracted_match, 0, false); printf("\n"); free(exp_s); free(got_s); diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c index 4d0d3c2d..d78fb317 100644 --- a/utilities/ovs-dpctl.c +++ b/utilities/ovs-dpctl.c @@ -371,12 +371,9 @@ show_dpif(struct dpif *dpif) printf("%s:\n", dpif_name(dpif)); if (!dpif_get_dp_stats(dpif, &stats)) { - printf("\tlookups: frags:%"PRIu64, stats.n_frags); - printf(" hit:%"PRIu64, stats.n_hit); - printf(" missed:%"PRIu64, stats.n_missed); - printf(" lost:%"PRIu64"\n", stats.n_lost); - - printf("\tflows: %"PRIu64"\n", stats.n_flows); + printf("\tlookups: hit:%"PRIu64" missed:%"PRIu64" lost:%"PRIu64"\n" + "\tflows: %"PRIu64"\n", + stats.n_hit, stats.n_missed, stats.n_lost, stats.n_flows); } DPIF_PORT_FOR_EACH (&dpif_port, &dump, dpif) { printf("\tport %u: %s", dpif_port.port_no, dpif_port.name); diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index f2ed8a48..215f8f92 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -92,6 +92,39 @@ spanning tree protocol is not in use. . .RE . +.IP "\fBget\-frags \fIswitch\fR" +Prints \fIswitch\fR's fragment handling mode. See \fBset\-frags\fR, +below, for a description of each fragment handling mode. +.IP +The \fBshow\fR command also prints the fragment handling mode among +its other output. +. +.IP "\fBset\-frags \fIswitch frag_mode\fR" +Configures \fIswitch\fR's treatment of IPv4 and IPv6 fragments. The +choices for \fIfrag_mode\fR are: +.RS +.IP "\fBnormal\fR" +Fragments pass through the flow table like non-fragmented packets. +The TCP ports, UDP ports, and ICMP type and code fields are always set +to 0, even for fragments where that information would otherwise be +available (fragments with offset 0). This is the default fragment +handling mode for an OpenFlow switch. +.IP "\fBdrop\fR" +Fragments are dropped without passing through the flow table. +.IP "\fBreassemble\fR" +The switch reassembles fragments into full IP packets before passing +them through the flow table. Open vSwitch does not implement this +fragment handling mode. +.IP "\fBnx\-match\fR" +Fragments pass through the flow table like non-fragmented packets. +The TCP ports, UDP ports, and ICMP type and code fields are available +for matching for fragments with offset 0, and set to 0 in fragments +with nonzero offset. This mode is a Nicira extension. +.RE +.IP +See the description of \fBip_frag\fR, below, for a way to match on +whether a packet is a fragment and on its fragment offset. +. .TP \fBdump\-flows \fIswitch \fR[\fIflows\fR] Prints to the console all flow entries in \fIswitch\fR's @@ -476,6 +509,27 @@ Match packets with no 802.1Q header or tagged with VLAN 0 and priority Some of these matching possibilities can also be achieved with \fBdl_vlan\fR and \fBdl_vlan_pcp\fR. . +.IP \fBip_frag=\fIfrag_type\fR +When \fBdl_type\fR specifies IP or IPv6, \fIfrag_type\fR +specifies what kind of IP fragments or non-fragments to match. The +following values of \fIfrag_type\fR are supported: +.RS +.IP "\fBno\fR" +Matches only non-fragmented packets. +.IP "\fByes\fR" +Matches all fragments. +.IP "\fBfirst\fR" +Matches only fragments with offset 0. +.IP "\fBlater\fR" +Matches only fragments with nonzero offset. +.IP "\fBnot_later\fR" +Matches non-fragmented packets and fragments with zero offset. +.RE +.IP +The \fBip_frag\fR match type is likely to be most useful in +\fBnx\-match\fR mode. See the description of the \fBset\-frags\fR +command, above, for more details. +. .IP \fBarp_sha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR .IQ \fBarp_tha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR When \fBdl_type\fR specifies ARP, \fBarp_sha\fR and \fBarp_tha\fR match diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 164d0830..ce9723b9 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -172,6 +172,8 @@ usage(void) " dump-desc SWITCH print switch description\n" " dump-tables SWITCH print table stats\n" " mod-port SWITCH IFACE ACT modify port behavior\n" + " get-frags SWITCH print fragment handling behavior\n" + " set-frags SWITCH FRAG_MODE set fragment handling behavior\n" " dump-ports SWITCH [PORT] print port statistics\n" " dump-flows SWITCH print all flow entries\n" " dump-flows SWITCH FLOW print matching FLOWs\n" @@ -351,7 +353,9 @@ dump_trivial_stats_transaction(const char *vconn_name, uint8_t stats_type) /* Sends 'request', which should be a request that only has a reply if an error * occurs, and waits for it to succeed or fail. If an error does occur, prints - * it and exits with an error. */ + * it and exits with an error. + * + * Destroys all of the 'requests'. */ static void transact_multiple_noreply(struct vconn *vconn, struct list *requests) { @@ -372,7 +376,9 @@ transact_multiple_noreply(struct vconn *vconn, struct list *requests) /* Sends 'request', which should be a request that only has a reply if an error * occurs, and waits for it to succeed or fail. If an error does occur, prints - * it and exits with an error. */ + * it and exits with an error. + * + * Destroys 'request'. */ static void transact_noreply(struct vconn *vconn, struct ofpbuf *request) { @@ -383,6 +389,44 @@ transact_noreply(struct vconn *vconn, struct ofpbuf *request) transact_multiple_noreply(vconn, &requests); } +static void +fetch_switch_config(struct vconn *vconn, struct ofp_switch_config *config_) +{ + struct ofp_switch_config *config; + struct ofp_header *header; + struct ofpbuf *request; + struct ofpbuf *reply; + + make_openflow(sizeof(struct ofp_header), OFPT_GET_CONFIG_REQUEST, + &request); + run(vconn_transact(vconn, request, &reply), + "talking to %s", vconn_get_name(vconn)); + + header = reply->data; + if (header->type != OFPT_GET_CONFIG_REPLY || + header->length != htons(sizeof *config)) { + ovs_fatal(0, "%s: bad reply to config request", vconn_get_name(vconn)); + } + + config = reply->data; + *config_ = *config; +} + +static void +set_switch_config(struct vconn *vconn, struct ofp_switch_config *config_) +{ + struct ofp_switch_config *config; + struct ofp_header save_header; + struct ofpbuf *request; + + config = make_openflow(sizeof *config, OFPT_SET_CONFIG, &request); + save_header = config->header; + *config = *config_; + config->header = save_header; + + transact_noreply(vconn, request); +} + static void do_show(int argc OVS_UNUSED, char *argv[]) { @@ -720,13 +764,11 @@ do_monitor(int argc, char *argv[]) open_vconn(argv[1], &vconn); if (argc > 2) { - int miss_send_len = atoi(argv[2]); - struct ofp_switch_config *osc; - struct ofpbuf *buf; + struct ofp_switch_config config; - osc = make_openflow(sizeof *osc, OFPT_SET_CONFIG, &buf); - osc->miss_send_len = htons(miss_send_len); - transact_noreply(vconn, buf); + fetch_switch_config(vconn, &config); + config.miss_send_len = htons(atoi(argv[2])); + set_switch_config(vconn, &config); } monitor_vconn(vconn); } @@ -806,6 +848,51 @@ do_mod_port(int argc OVS_UNUSED, char *argv[]) vconn_close(vconn); } +static void +do_get_frags(int argc OVS_UNUSED, char *argv[]) +{ + struct ofp_switch_config config; + struct vconn *vconn; + + open_vconn(argv[1], &vconn); + fetch_switch_config(vconn, &config); + puts(ofputil_frag_handling_to_string(ntohs(config.flags))); + vconn_close(vconn); +} + +static void +do_set_frags(int argc OVS_UNUSED, char *argv[]) +{ + struct ofp_switch_config config; + enum ofp_config_flags mode; + struct vconn *vconn; + ovs_be16 flags; + + if (!ofputil_frag_handling_from_string(argv[2], &mode)) { + ovs_fatal(0, "%s: unknown fragment handling mode", argv[2]); + } + + open_vconn(argv[1], &vconn); + fetch_switch_config(vconn, &config); + flags = htons(mode) | (config.flags & htons(~OFPC_FRAG_MASK)); + if (flags != config.flags) { + /* Set the configuration. */ + config.flags = flags; + set_switch_config(vconn, &config); + + /* Then retrieve the configuration to see if it really took. OpenFlow + * doesn't define error reporting for bad modes, so this is all we can + * do. */ + fetch_switch_config(vconn, &config); + if (flags != config.flags) { + ovs_fatal(0, "%s: setting fragment handling mode failed (this " + "switch probably doesn't support mode \"%s\")", + argv[1], ofputil_frag_handling_to_string(mode)); + } + } + vconn_close(vconn); +} + static void do_ping(int argc, char *argv[]) { @@ -1442,6 +1529,8 @@ static const struct command all_commands[] = { { "diff-flows", 2, 2, do_diff_flows }, { "dump-ports", 1, 2, do_dump_ports }, { "mod-port", 3, 3, do_mod_port }, + { "get-frags", 1, 1, do_get_frags }, + { "set-frags", 2, 2, do_set_frags }, { "probe", 1, 1, do_probe }, { "ping", 1, 2, do_ping }, { "benchmark", 3, 3, do_benchmark },