new NXAST_RESUBMIT_TABLE action can look up in additional
tables. Tables 128 and above are reserved for use by the
switch itself; please use only tables 0 through 127.
+ - Fragment handling extensions:
+ - New OFPC_FRAG_NX_MATCH fragment handling mode, in which L4
+ fields are made available for matching in fragments with
+ offset 0.
+ - New NXM_NX_IP_FRAG match field for matching IP fragments (usable
+ via "ip_frag" in ovs-ofctl).
+ - New ovs-ofctl "get-frags" and "set-frags" commands to get and set
+ fragment handling policy.
- CAPWAP tunneling now supports an extension to transport a 64-key. By
default it remains compatible with the old version and other
standards-based implementations.
* etc.) are protected by RTNL.
*
* Writes to other state (flow table modifications, set miscellaneous datapath
- * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL
- * lock nests inside genl_mutex.
+ * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
+ * genl_mutex.
*
* Reads are protected by RCU.
*
static LIST_HEAD(dps);
static struct vport *new_vport(const struct vport_parms *);
-static int queue_userspace_packets(struct datapath *, struct sk_buff *,
- const struct dp_upcall_info *);
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+ const struct dp_upcall_info *);
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+ const struct dp_upcall_info *);
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
struct datapath *get_dp(int dp_ifindex)
if (!OVS_CB(skb)->flow) {
struct sw_flow_key key;
int key_len;
- bool is_frag;
/* Extract flow from 'skb' into 'key'. */
- error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
+ error = flow_extract(skb, p->port_no, &key, &key_len);
if (unlikely(error)) {
kfree_skb(skb);
return;
}
- if (is_frag && dp->drop_frags) {
- consume_skb(skb);
- stats_counter = &stats->n_frags;
- goto out;
- }
-
/* Look up flow. */
flow = flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
if (unlikely(!flow)) {
int dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
- struct sk_buff *segs = NULL;
struct dp_stats_percpu *stats;
+ int dp_ifindex;
int err;
if (upcall_info->pid == 0) {
goto err;
}
- forward_ip_summed(skb, true);
-
- /* Break apart GSO packets into their component pieces. Otherwise
- * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
- if (skb_is_gso(skb)) {
- segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-
- if (IS_ERR(segs)) {
- err = PTR_ERR(segs);
- goto err;
- }
- skb = segs;
+ dp_ifindex = get_dpifindex(dp);
+ if (!dp_ifindex) {
+ err = -ENODEV;
+ goto err;
}
- err = queue_userspace_packets(dp, skb, upcall_info);
- if (segs) {
- struct sk_buff *next;
- /* Free GSO-segments */
- do {
- next = segs->next;
- kfree_skb(segs);
- } while ((segs = next) != NULL);
- }
+ forward_ip_summed(skb, true);
+ if (!skb_is_gso(skb))
+ err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ else
+ err = queue_gso_packets(dp_ifindex, skb, upcall_info);
if (err)
goto err;
return err;
}
-/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
- * 'upcall_info'. There will be only one packet unless we broke up a GSO
- * packet.
- */
-static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
- const struct dp_upcall_info *upcall_info)
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info)
{
- int dp_ifindex;
+ struct dp_upcall_info later_info;
+ struct sw_flow_key later_key;
+ struct sk_buff *segs, *nskb;
+ int err;
- dp_ifindex = get_dpifindex(dp);
- if (!dp_ifindex)
- return -ENODEV;
+ segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+ /* Queue all of the segments. */
+ skb = segs;
do {
- struct ovs_header *upcall;
- struct sk_buff *user_skb; /* to be queued to userspace */
- struct nlattr *nla;
- unsigned int len;
- int err;
-
- err = vlan_deaccel_tag(skb);
- if (unlikely(err))
- return err;
-
- if (nla_attr_size(skb->len) > USHRT_MAX)
- return -EFBIG;
-
- len = sizeof(struct ovs_header);
- len += nla_total_size(skb->len);
- len += nla_total_size(FLOW_BUFSIZE);
- if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
- len += nla_total_size(8);
-
- user_skb = genlmsg_new(len, GFP_ATOMIC);
- if (!user_skb)
- return -ENOMEM;
-
- upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
- 0, upcall_info->cmd);
- upcall->dp_ifindex = dp_ifindex;
-
- nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- flow_to_nlattrs(upcall_info->key, user_skb);
- nla_nest_end(user_skb, nla);
-
- if (upcall_info->userdata)
- nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
- nla_get_u64(upcall_info->userdata));
-
- nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- copy_and_csum_skb(skb, nla_data(nla));
- else
- skb_copy_bits(skb, 0, nla_data(nla), skb->len);
-
- err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+ err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
if (err)
- return err;
+ break;
+ if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+ /* The initial flow key extracted by flow_extract() in
+ * this case is for a first fragment, so we need to
+ * properly mark later fragments.
+ */
+ later_key = *upcall_info->key;
+ later_key.ip.tos_frag &= ~OVS_FRAG_TYPE_MASK;
+ later_key.ip.tos_frag |= OVS_FRAG_TYPE_LATER;
+
+ later_info = *upcall_info;
+ later_info.key = &later_key;
+ upcall_info = &later_info;
+ }
} while ((skb = skb->next));
- return 0;
+ /* Free all of the segments. */
+ skb = segs;
+ do {
+ nskb = skb->next;
+ if (err)
+ kfree_skb(skb);
+ else
+ consume_skb(skb);
+ } while ((skb = nskb));
+ return err;
+}
+
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info)
+{
+ struct ovs_header *upcall;
+ struct sk_buff *user_skb; /* to be queued to userspace */
+ struct nlattr *nla;
+ unsigned int len;
+ int err;
+
+ err = vlan_deaccel_tag(skb);
+ if (unlikely(err))
+ return err;
+
+ if (nla_attr_size(skb->len) > USHRT_MAX)
+ return -EFBIG;
+
+ len = sizeof(struct ovs_header);
+ len += nla_total_size(skb->len);
+ len += nla_total_size(FLOW_BUFSIZE);
+ if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
+ len += nla_total_size(8);
+
+ user_skb = genlmsg_new(len, GFP_ATOMIC);
+ if (!user_skb)
+ return -ENOMEM;
+
+ upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+ 0, upcall_info->cmd);
+ upcall->dp_ifindex = dp_ifindex;
+
+ nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
+ flow_to_nlattrs(upcall_info->key, user_skb);
+ nla_nest_end(user_skb, nla);
+
+ if (upcall_info->userdata)
+ nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
+ nla_get_u64(upcall_info->userdata));
+
+ nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ copy_and_csum_skb(skb, nla_data(nla));
+ else
+ skb_copy_bits(skb, 0, nla_data(nla), skb->len);
+
+ return genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
}
/* Called with genl_mutex. */
if (ipv4_key->ipv4_tos & INET_ECN_MASK)
return -EINVAL;
+
+ if (ipv4_key->ipv4_frag !=
+ (flow_key->ip.tos_frag & OVS_FRAG_TYPE_MASK))
+ return -EINVAL;
+
break;
case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_TCP):
struct sw_flow *flow;
struct datapath *dp;
struct ethhdr *eth;
- bool is_frag;
int len;
int err;
int key_len;
if (IS_ERR(flow))
goto err_kfree_skb;
- err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
+ err = flow_extract(packet, -1, &flow->key, &key_len);
if (err)
goto err_flow_put;
stats->n_flows = flow_tbl_count(table);
- stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
+ stats->n_hit = stats->n_missed = stats->n_lost = 0;
for_each_possible_cpu(i) {
const struct dp_stats_percpu *percpu_stats;
struct dp_stats_percpu local_stats;
local_stats = *percpu_stats;
} while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
- stats->n_frags += local_stats.n_frags;
stats->n_hit += local_stats.n_hit;
stats->n_missed += local_stats.n_missed;
stats->n_lost += local_stats.n_lost;
[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
#endif
[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
- [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
};
static struct genl_family dp_datapath_genl_family = {
goto nla_put_failure;
get_dp_stats(dp, nla_data(nla));
- NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
- dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
-
return genlmsg_end(skb, ovs_header);
nla_put_failure:
static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
{
- if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
- u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
-
- if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP)
- return -EINVAL;
- }
-
return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
}
return dp ? dp : ERR_PTR(-ENODEV);
}
-/* Called with genl_mutex. */
-static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
-{
- if (a[OVS_DP_ATTR_IPV4_FRAGS])
- dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
-}
-
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
if (!dp->table)
goto err_free_dp;
- dp->drop_frags = 0;
dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
if (!dp->stats_percpu) {
err = -ENOMEM;
goto err_destroy_table;
}
- change_datapath(dp, a);
-
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;
if (IS_ERR(dp))
return PTR_ERR(dp);
- change_datapath(dp, info->attrs);
-
reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.
- * @n_frags: Number of IP fragments processed by datapath.
* @n_hit: Number of received packets for which a matching flow was found in
* the flow table.
* @n_miss: Number of received packets that had no matching flow in the flow
* one of the datapath's queues).
*/
struct dp_stats_percpu {
- u64 n_frags;
u64 n_hit;
u64 n_missed;
u64 n_lost;
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
* @ifobj: Represents /sys/class/net/<devname>/brif. Protected by RTNL.
- * @drop_frags: Drop all IP fragments if nonzero.
* @n_flows: Number of flows currently in flow table.
* @table: Current flow table. Protected by genl_lock and RCU.
* @ports: Map from port number to &struct vport. %OVSP_LOCAL port
struct list_head list_node;
struct kobject ifobj;
- int drop_frags;
-
/* Flow table. */
struct flow_table __rcu *table;
offsetof(struct sw_flow_key, field) + \
FIELD_SIZEOF(struct sw_flow_key, field)
+/**
+ * skip_exthdr - skip any IPv6 extension headers
+ * @skb: skbuff to parse
+ * @start: offset of first extension header
+ * @nexthdrp: Initially, points to the type of the extension header at @start.
+ * This function updates it to point to the extension header at the final
+ * offset.
+ * @tos_frag: Points to the @tos_frag member in a &struct sw_flow_key. This
+ * function sets an appropriate %OVS_FRAG_TYPE_* value.
+ *
+ * This is based on ipv6_skip_exthdr() but adds the updates to *@tos_frag.
+ *
+ * When there is more than one fragment header, this version reports whether
+ * the final fragment header that it examines is a first fragment.
+ *
+ * Returns the final payload offset, or -1 on error.
+ */
+static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
+ u8 *tos_frag)
+{
+ u8 nexthdr = *nexthdrp;
+
+ while (ipv6_ext_hdr(nexthdr)) {
+ struct ipv6_opt_hdr _hdr, *hp;
+ int hdrlen;
+
+ if (nexthdr == NEXTHDR_NONE)
+ return -1;
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return -1;
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ __be16 _frag_off, *fp;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -1;
+
+ *tos_frag &= ~OVS_FRAG_TYPE_MASK;
+ if (ntohs(*fp) & ~0x7) {
+ *tos_frag |= OVS_FRAG_TYPE_LATER;
+ break;
+ }
+ *tos_frag |= OVS_FRAG_TYPE_FIRST;
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hp->hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(hp);
+
+ nexthdr = hp->nexthdr;
+ start += hdrlen;
+ }
+
+ *nexthdrp = nexthdr;
+ return start;
+}
+
static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
int *key_lenp)
{
payload_ofs = (u8 *)(nh + 1) - skb->data;
key->ip.proto = NEXTHDR_NONE;
- key->ip.tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+ key->ip.tos_frag = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr);
ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr);
- payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr);
+ payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.tos_frag);
if (unlikely(payload_ofs < 0))
return -EINVAL;
* @in_port: port number on which @skb was received.
* @key: output flow key
* @key_lenp: length of output flow key
- * @is_frag: set to 1 if @skb contains an IPv4 fragment, or to 0 if @skb does
- * not contain an IPv4 packet or if it is not a fragment.
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
* For other key->dl_type values it is left untouched.
*/
int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
- int *key_lenp, bool *is_frag)
+ int *key_lenp)
{
int error = 0;
int key_len = SW_FLOW_KEY_OFFSET(eth);
memset(key, 0, sizeof(*key));
key->eth.tun_id = OVS_CB(skb)->tun_id;
key->eth.in_port = in_port;
- *is_frag = false;
skb_reset_mac_header(skb);
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
struct iphdr *nh;
+ __be16 offset;
key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
nh = ip_hdr(skb);
key->ipv4.addr.src = nh->saddr;
key->ipv4.addr.dst = nh->daddr;
- key->ip.tos = nh->tos & ~INET_ECN_MASK;
+
key->ip.proto = nh->protocol;
+ key->ip.tos_frag = nh->tos & ~INET_ECN_MASK;
- /* Transport layer. */
- if ((nh->frag_off & htons(IP_MF | IP_OFFSET)) ||
- (skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
- *is_frag = true;
+ offset = nh->frag_off & htons(IP_OFFSET);
+ if (offset) {
+ key->ip.tos_frag |= OVS_FRAG_TYPE_LATER;
+ goto out;
+ }
+ if (nh->frag_off & htons(IP_MF) ||
+ skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
+ /* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- if (!*is_frag && tcphdr_ok(skb)) {
+ if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
}
} else if (key->ip.proto == IPPROTO_UDP) {
key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- if (!*is_frag && udphdr_ok(skb)) {
+ if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
}
} else if (key->ip.proto == IPPROTO_ICMP) {
key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- if (!*is_frag && icmphdr_ok(skb)) {
+ if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store them
goto out;
}
+ if ((key->ip.tos_frag & OVS_FRAG_TYPE_MASK) == OVS_FRAG_TYPE_LATER)
+ goto out;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
+
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
}
}
+static int parse_tos_frag(struct sw_flow_key *swkey, u8 tos, u8 frag)
+{
+ if (tos & INET_ECN_MASK || frag > OVS_FRAG_TYPE_MAX)
+ return -EINVAL;
+
+ swkey->ip.tos_frag = tos | frag;
+ return 0;
+}
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_TUN_ID] = 8,
*
* [tun_id] [in_port] ethernet [8021q] [ethertype \
* [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]]
+ *
+ * except that IPv4 or IPv6 terminates the sequence if its @ipv4_frag or
+ * @ipv6_frag member, respectively, equals %OVS_FRAG_TYPE_LATER.
*/
int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *attr)
{
int error = 0;
+ enum ovs_frag_type frag_type;
const struct nlattr *nla;
u16 prev_type;
int rem;
goto invalid;
ipv4_key = nla_data(nla);
swkey->ip.proto = ipv4_key->ipv4_proto;
- swkey->ip.tos = ipv4_key->ipv4_tos;
+ if (parse_tos_frag(swkey, ipv4_key->ipv4_tos,
+ ipv4_key->ipv4_frag))
+ goto invalid;
swkey->ipv4.addr.src = ipv4_key->ipv4_src;
swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
- if (swkey->ip.tos & INET_ECN_MASK)
- goto invalid;
break;
case TRANSITION(OVS_KEY_ATTR_ETHERTYPE, OVS_KEY_ATTR_IPV6):
goto invalid;
ipv6_key = nla_data(nla);
swkey->ip.proto = ipv6_key->ipv6_proto;
- swkey->ip.tos = ipv6_key->ipv6_tos;
+ if (parse_tos_frag(swkey, ipv6_key->ipv6_tos,
+ ipv6_key->ipv6_frag))
+ goto invalid;
memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
sizeof(swkey->ipv6.addr.src));
memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
sizeof(swkey->ipv6.addr.dst));
- if (swkey->ip.tos & INET_ECN_MASK)
- goto invalid;
break;
case TRANSITION(OVS_KEY_ATTR_IPV4, OVS_KEY_ATTR_TCP):
if (rem)
goto invalid;
+ frag_type = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
switch (prev_type) {
case OVS_KEY_ATTR_UNSPEC:
goto invalid;
goto ok;
case OVS_KEY_ATTR_IPV4:
+ if (frag_type == OVS_FRAG_TYPE_LATER)
+ goto ok;
if (swkey->ip.proto == IPPROTO_TCP ||
swkey->ip.proto == IPPROTO_UDP ||
swkey->ip.proto == IPPROTO_ICMP)
goto ok;
case OVS_KEY_ATTR_IPV6:
+ if (frag_type == OVS_FRAG_TYPE_LATER)
+ goto ok;
if (swkey->ip.proto == IPPROTO_TCP ||
swkey->ip.proto == IPPROTO_UDP ||
swkey->ip.proto == IPPROTO_ICMPV6)
case OVS_KEY_ATTR_ICMPV6:
if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+ swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT) ||
+ frag_type == OVS_FRAG_TYPE_LATER)
goto invalid;
goto ok;
case OVS_KEY_ATTR_TCP:
case OVS_KEY_ATTR_UDP:
case OVS_KEY_ATTR_ICMP:
- case OVS_KEY_ATTR_ARP:
case OVS_KEY_ATTR_ND:
+ if (frag_type == OVS_FRAG_TYPE_LATER)
+ goto invalid;
+ goto ok;
+
+ case OVS_KEY_ATTR_ARP:
goto ok;
default:
ipv4_key->ipv4_src = swkey->ipv4.addr.src;
ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
ipv4_key->ipv4_proto = swkey->ip.proto;
- ipv4_key->ipv4_tos = swkey->ip.tos;
+ ipv4_key->ipv4_tos = swkey->ip.tos_frag & ~INET_ECN_MASK;
+ ipv4_key->ipv4_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
sizeof(ipv6_key->ipv6_dst));
ipv6_key->ipv6_proto = swkey->ip.proto;
- ipv6_key->ipv6_tos = swkey->ip.tos;
+ ipv6_key->ipv6_tos = swkey->ip.tos_frag & ~INET_ECN_MASK;
+ ipv6_key->ipv6_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
} else if (swkey->eth.type == htons(ETH_P_ARP)) {
struct ovs_key_arp *arp_key;
memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
}
- if (swkey->eth.type == htons(ETH_P_IP) ||
- swkey->eth.type == htons(ETH_P_IPV6)) {
+ if ((swkey->eth.type == htons(ETH_P_IP) ||
+ swkey->eth.type == htons(ETH_P_IPV6)) &&
+ (swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK) != OVS_FRAG_TYPE_LATER) {
if (swkey->ip.proto == IPPROTO_TCP) {
struct ovs_key_tcp *tcp_key;
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/flex_array.h>
+#include <net/inet_ecn.h>
struct sk_buff;
struct nlattr actions[];
};
+/* Mask for the OVS_FRAG_TYPE_* value in the low 2 bits of ip.tos_frag in
+ * struct sw_flow_key. */
+#define OVS_FRAG_TYPE_MASK INET_ECN_MASK
+
struct sw_flow_key {
struct {
__be64 tun_id; /* Encapsulating tunnel ID. */
} eth;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
- u8 tos; /* IP ToS (DSCP field, 6 bits). */
+ u8 tos_frag; /* IP ToS DSCP in high 6 bits,
+ * OVS_FRAG_TYPE_* in low 2 bits. */
} ip;
union {
struct {
void flow_put(struct sw_flow *);
int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
- int *key_lenp, bool *is_frag);
+ int *key_lenp);
void flow_used(struct sw_flow *, struct sk_buff *);
u64 flow_used_time(unsigned long flow_jiffies);
struct sw_flow_key flow_key;
struct vport *dst_vport;
struct sk_buff *skb;
- bool is_frag;
int err;
int flow_key_len;
struct sw_flow *flow;
memcpy(skb->data, get_cached_header(cache), cache->len);
err = flow_extract(skb, dst_vport->port_no, &flow_key,
- &flow_key_len, &is_frag);
+ &flow_key_len);
consume_skb(skb);
- if (err || is_frag)
+ if (err)
goto done;
flow = flow_tbl_lookup(rcu_dereference(dst_vport->dp->table),
* not be sent.
* @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
* datapath. Always present in notifications.
- * @OVS_DP_ATTR_IPV4_FRAGS: One of %OVS_DP_FRAG_*. Always present in
- * notifications. May be included in %OVS_DP_NEW or %OVS_DP_SET requests to
- * change the fragment handling policy.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_DP_* commands.
OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
- OVS_DP_ATTR_IPV4_FRAGS, /* 32-bit enum ovs_datapath_frag */
__OVS_DP_ATTR_MAX
};
#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
-/**
- * enum ovs_datapath_frag - policy for handling received IPv4 fragments.
- * @OVS_DP_FRAG_ZERO: Treat IP fragments as IP protocol 0 and transport ports
- * zero.
- * @OVS_DP_FRAG_DROP: Drop IP fragments. Do not pass them through the flow
- * table or up to userspace.
- */
-enum ovs_datapath_frag {
- OVS_DP_FRAG_UNSPEC,
- OVS_DP_FRAG_ZERO, /* Treat IP fragments as transport port 0. */
- OVS_DP_FRAG_DROP /* Drop IP fragments. */
-};
-
struct ovs_dp_stats {
- __u64 n_frags; /* Number of dropped IP fragments. */
__u64 n_hit; /* Number of flow table matches. */
__u64 n_missed; /* Number of flow table misses. */
__u64 n_lost; /* Number of misses not sent to userspace. */
#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
+/**
+ * enum ovs_frag_type - IPv4 and IPv6 fragment type
+ * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
+ * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
+ * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
+ *
+ * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
+ * ovs_key_ipv6.
+ */
+enum ovs_frag_type {
+ OVS_FRAG_TYPE_NONE,
+ OVS_FRAG_TYPE_FIRST,
+ OVS_FRAG_TYPE_LATER,
+ __OVS_FRAG_TYPE_MAX
+};
+
+#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
+
struct ovs_key_ethernet {
__u8 eth_src[6];
__u8 eth_dst[6];
__be32 ipv4_dst;
__u8 ipv4_proto;
__u8 ipv4_tos;
+ __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
};
struct ovs_key_ipv6 {
__be32 ipv6_dst[4];
__u8 ipv6_proto;
__u8 ipv6_tos;
+ __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
};
struct ovs_key_tcp {
* Masking: Not maskable. */
#define NXM_NX_ND_TLL NXM_HEADER (0x0001, 25, 6)
+/* IP fragment information.
+ *
+ * Prereqs:
+ * NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd.
+ *
+ * Format: 8-bit value with one of the values 0, 1, or 3, as described below.
+ *
+ * Masking: Fully maskable.
+ *
+ * This field has three possible values:
+ *
+ * - A packet that is not an IP fragment has value 0.
+ *
+ * - A packet that is an IP fragment with offset 0 (the first fragment) has
+ * bit 0 set and thus value 1.
+ *
+ * - A packet that is an IP fragment with nonzero offset has bits 0 and 1 set
+ * and thus value 3.
+ *
+ * NX_IP_FRAG_ANY and NX_IP_FRAG_LATER are declared to symbolically represent
+ * the meanings of bits 0 and 1.
+ *
+ * The switch may reject matches against values that can never appear.
+ *
+ * It is important to understand how this field interacts with the OpenFlow IP
+ * fragment handling mode:
+ *
+ * - In OFPC_FRAG_DROP mode, the OpenFlow switch drops all IP fragments
+ * before they reach the flow table, so every packet that is available for
+ * matching will have value 0 in this field.
+ *
+ * - Open vSwitch does not implement OFPC_FRAG_REASM mode, but if it did then
+ * IP fragments would be reassembled before they reached the flow table and
+ * again every packet available for matching would always have value 0.
+ *
+ * - In OFPC_FRAG_NORMAL mode, all three values are possible, but OpenFlow
+ * 1.0 says that fragments' transport ports are always 0, even for the
+ * first fragment, so this does not provide much extra information.
+ *
+ * - In OFPC_FRAG_NX_MATCH mode, all three values are possible. For
+ * fragments with offset 0, Open vSwitch makes L4 header information
+ * available.
+ */
+#define NXM_NX_IP_FRAG NXM_HEADER (0x0001, 26, 1)
+#define NXM_NX_IP_FRAG_W NXM_HEADER_W(0x0001, 26, 1)
+
+/* Bits in the value of NXM_NX_IP_FRAG. */
+#define NX_IP_FRAG_ANY (1 << 0) /* Is this a fragment? */
+#define NX_IP_FRAG_LATER (1 << 1) /* Is this a fragment with nonzero offset? */
/* ## --------------------- ## */
/* ## Requests and replies. ## */
OFPC_FRAG_NORMAL = 0, /* No special handling for fragments. */
OFPC_FRAG_DROP = 1, /* Drop fragments. */
OFPC_FRAG_REASM = 2, /* Reassemble (only if OFPC_IP_REASM set). */
+ OFPC_FRAG_NX_MATCH = 3, /* Make first fragments available for matching. */
OFPC_FRAG_MASK = 3
};
void
cls_rule_set_nw_tos(struct cls_rule *rule, uint8_t nw_tos)
{
- rule->wc.wildcards &= ~FWW_NW_TOS;
- rule->flow.nw_tos = nw_tos & IP_DSCP_MASK;
+ rule->wc.tos_frag_mask |= IP_DSCP_MASK;
+ rule->flow.tos_frag &= ~IP_DSCP_MASK;
+ rule->flow.tos_frag |= nw_tos & IP_DSCP_MASK;
+}
+
+void
+cls_rule_set_frag(struct cls_rule *rule, uint8_t frag)
+{
+ rule->wc.tos_frag_mask |= FLOW_FRAG_MASK;
+ rule->flow.tos_frag &= ~FLOW_FRAG_MASK;
+ rule->flow.tos_frag |= frag & FLOW_FRAG_MASK;
+}
+
+void
+cls_rule_set_frag_masked(struct cls_rule *rule, uint8_t frag, uint8_t mask)
+{
+ mask &= FLOW_FRAG_MASK;
+ frag &= mask;
+ rule->wc.tos_frag_mask = (rule->wc.tos_frag_mask & ~FLOW_FRAG_MASK) | mask;
+ rule->flow.tos_frag = (rule->flow.tos_frag & ~FLOW_FRAG_MASK) | frag;
}
void
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
if (rule->priority != OFP_DEFAULT_PRIORITY) {
ds_put_format(s, "priority=%d,", rule->priority);
ETH_ADDR_ARGS(f->arp_tha));
}
}
- if (!(w & FWW_NW_TOS)) {
- ds_put_format(s, "nw_tos=%"PRIu8",", f->nw_tos);
+ if (wc->tos_frag_mask & IP_DSCP_MASK) {
+ ds_put_format(s, "nw_tos=%"PRIu8",", f->tos_frag & IP_DSCP_MASK);
+ }
+ switch (wc->tos_frag_mask & FLOW_FRAG_MASK) {
+ case FLOW_FRAG_ANY | FLOW_FRAG_LATER:
+ ds_put_format(s, "frag=%s,",
+ f->tos_frag & FLOW_FRAG_ANY
+ ? (f->tos_frag & FLOW_FRAG_LATER ? "later" : "first")
+ : (f->tos_frag & FLOW_FRAG_LATER ? "<error>" : "no"));
+ break;
+
+ case FLOW_FRAG_ANY:
+ ds_put_format(s, "frag=%s,",
+ f->tos_frag & FLOW_FRAG_ANY ? "yes" : "no");
+ break;
+
+ case FLOW_FRAG_LATER:
+ ds_put_format(s, "frag=%s,",
+ f->tos_frag & FLOW_FRAG_LATER ? "later" : "not_later");
+ break;
}
if (f->nw_proto == IPPROTO_ICMP) {
if (!(w & FWW_TP_SRC)) {
const flow_wildcards_t wc = wildcards->wildcards;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
for (i = 0; i < FLOW_N_REGS; i++) {
if ((a->regs[i] ^ b->regs[i]) & wildcards->reg_masks[i]) {
&& (wc & FWW_ETH_MCAST
|| !((a->dl_dst[0] ^ b->dl_dst[0]) & 0x01))
&& (wc & FWW_NW_PROTO || a->nw_proto == b->nw_proto)
- && (wc & FWW_NW_TOS || a->nw_tos == b->nw_tos)
+ && !((a->tos_frag ^ b->tos_frag) & wildcards->tos_frag_mask)
&& (wc & FWW_ARP_SHA || eth_addr_equals(a->arp_sha, b->arp_sha))
&& (wc & FWW_ARP_THA || eth_addr_equals(a->arp_tha, b->arp_tha))
&& ipv6_equal_except(&a->ipv6_src, &b->ipv6_src,
void cls_rule_set_nw_dst(struct cls_rule *, ovs_be32);
bool cls_rule_set_nw_dst_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask);
void cls_rule_set_nw_tos(struct cls_rule *, uint8_t);
+void cls_rule_set_frag(struct cls_rule *, uint8_t frag);
+void cls_rule_set_frag_masked(struct cls_rule *, uint8_t frag, uint8_t mask);
void cls_rule_set_icmp_type(struct cls_rule *, uint8_t);
void cls_rule_set_icmp_code(struct cls_rule *, uint8_t);
void cls_rule_set_arp_sha(struct cls_rule *, const uint8_t[6]);
const char *name; /* OVS_DP_ATTR_NAME. */
const uint32_t *upcall_pid; /* OVS_DP_UPCALL_PID. */
struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */
- enum ovs_datapath_frag ipv4_frags; /* OVS_DP_ATTR_IPV4_FRAGS. */
};
static void dpif_linux_dp_init(struct dpif_linux_dp *);
error = dpif_linux_dp_get(dpif_, &dp, &buf);
if (!error) {
- stats->n_frags = dp.stats.n_frags;
stats->n_hit = dp.stats.n_hit;
stats->n_missed = dp.stats.n_missed;
stats->n_lost = dp.stats.n_lost;
return error;
}
-static int
-dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
-{
- struct dpif_linux_dp dp;
- struct ofpbuf *buf;
- int error;
-
- error = dpif_linux_dp_get(dpif_, &dp, &buf);
- if (!error) {
- *drop_fragsp = dp.ipv4_frags == OVS_DP_FRAG_DROP;
- ofpbuf_delete(buf);
- }
- return error;
-}
-
-static int
-dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
-{
- struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- struct dpif_linux_dp dp;
-
- dpif_linux_dp_init(&dp);
- dp.cmd = OVS_DP_CMD_SET;
- dp.dp_ifindex = dpif->dp_ifindex;
- dp.ipv4_frags = drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO;
- return dpif_linux_dp_transact(&dp, NULL, NULL);
-}
-
static int
dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
uint16_t *port_nop)
dpif_linux_run,
dpif_linux_wait,
dpif_linux_get_stats,
- dpif_linux_get_drop_frags,
- dpif_linux_set_drop_frags,
dpif_linux_port_add,
dpif_linux_port_del,
dpif_linux_port_query_by_number,
.min_len = sizeof(struct ovs_dp_stats),
.max_len = sizeof(struct ovs_dp_stats),
.optional = true },
- [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
};
struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
memcpy(&dp->stats, nl_attr_get(a[OVS_DP_ATTR_STATS]),
sizeof dp->stats);
}
- if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
- dp->ipv4_frags = nl_attr_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
- }
return 0;
}
}
/* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
-
- if (dp->ipv4_frags) {
- nl_msg_put_u32(buf, OVS_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
- }
}
/* Clears 'dp' to "empty" values. */
int open_cnt;
bool destroyed;
- bool drop_frags; /* Drop all IP fragments, if true. */
struct dp_netdev_queue queues[N_QUEUES];
struct hmap flow_table; /* Flow table. */
/* Statistics. */
- long long int n_frags; /* Number of dropped IP fragments. */
long long int n_hit; /* Number of flow table matches. */
long long int n_missed; /* Number of flow table misses. */
long long int n_lost; /* Number of misses not passed to client. */
dp->class = class;
dp->name = xstrdup(name);
dp->open_cnt = 0;
- dp->drop_frags = false;
for (i = 0; i < N_QUEUES; i++) {
dp->queues[i].head = dp->queues[i].tail = 0;
}
{
struct dp_netdev *dp = get_dp_netdev(dpif);
stats->n_flows = hmap_count(&dp->flow_table);
- stats->n_frags = dp->n_frags;
stats->n_hit = dp->n_hit;
stats->n_missed = dp->n_missed;
stats->n_lost = dp->n_lost;
return 0;
}
-static int
-dpif_netdev_get_drop_frags(const struct dpif *dpif, bool *drop_fragsp)
-{
- struct dp_netdev *dp = get_dp_netdev(dpif);
- *drop_fragsp = dp->drop_frags;
- return 0;
-}
-
-static int
-dpif_netdev_set_drop_frags(struct dpif *dpif, bool drop_frags)
-{
- struct dp_netdev *dp = get_dp_netdev(dpif);
- dp->drop_frags = drop_frags;
- return 0;
-}
-
static int
do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
uint16_t port_no)
if (packet->size < ETH_HEADER_LEN) {
return;
}
- if (flow_extract(packet, 0, port->port_no, &key) && dp->drop_frags) {
- dp->n_frags++;
- return;
- }
-
+ flow_extract(packet, 0, port->port_no, &key);
flow = dp_netdev_lookup_flow(dp, &key);
if (flow) {
dp_netdev_flow_used(flow, &key, packet);
dpif_netdev_run,
dpif_netdev_wait,
dpif_netdev_get_stats,
- dpif_netdev_get_drop_frags,
- dpif_netdev_set_drop_frags,
dpif_netdev_port_add,
dpif_netdev_port_del,
dpif_netdev_port_query_by_number,
/* Retrieves statistics for 'dpif' into 'stats'. */
int (*get_stats)(const struct dpif *dpif, struct dpif_dp_stats *stats);
- /* Retrieves 'dpif''s current treatment of IP fragments into '*drop_frags':
- * true indicates that fragments are dropped, false indicates that
- * fragments are treated in the same way as other IP packets (except that
- * the L4 header cannot be read). */
- int (*get_drop_frags)(const struct dpif *dpif, bool *drop_frags);
-
- /* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose
- * meaning is the same as for the get_drop_frags member function. */
- int (*set_drop_frags)(struct dpif *dpif, bool drop_frags);
-
/* Adds 'netdev' as a new port in 'dpif'. If successful, sets '*port_no'
* to the new port's port number. */
int (*port_add)(struct dpif *dpif, struct netdev *netdev,
return error;
}
-/* Retrieves the current IP fragment handling policy for 'dpif' into
- * '*drop_frags': true indicates that fragments are dropped, false indicates
- * that fragments are treated in the same way as other IP packets (except that
- * the L4 header cannot be read). Returns 0 if successful, otherwise a
- * positive errno value. */
-int
-dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
-{
- int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags);
- if (error) {
- *drop_frags = false;
- }
- log_operation(dpif, "get_drop_frags", error);
- return error;
-}
-
-/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose meaning is
- * the same as for the get_drop_frags member function. Returns 0 if
- * successful, otherwise a positive errno value. */
-int
-dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
-{
- int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags);
- log_operation(dpif, "set_drop_frags", error);
- return error;
-}
-
/* Attempts to add 'netdev' as a port on 'dpif'. If successful, returns 0 and
* sets '*port_nop' to the new port's port number (if 'port_nop' is non-null).
* On failure, returns a positive errno value and sets '*port_nop' to
/* Statisticss for a dpif as a whole. */
struct dpif_dp_stats {
- uint64_t n_frags; /* Number of dropped IP fragments. */
uint64_t n_hit; /* Number of flow table matches. */
uint64_t n_missed; /* Number of flow table misses. */
uint64_t n_lost; /* Number of misses not sent to userspace. */
};
int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
-int dpif_get_drop_frags(const struct dpif *, bool *drop_frags);
-int dpif_set_drop_frags(struct dpif *, bool drop_frags);
\f
/* Port operations. */
flow->ipv6_dst = nh->ip6_dst;
tc_flow = get_unaligned_be32(&nh->ip6_flow);
- flow->nw_tos = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK;
+ flow->tos_frag = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK;
flow->nw_proto = IPPROTO_NONE;
while (1) {
}
/* We only process the first fragment. */
+ flow->tos_frag &= ~FLOW_FRAG_MASK;
+ flow->tos_frag |= FLOW_FRAG_ANY;
if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
+ flow->tos_frag |= FLOW_FRAG_LATER;
nexthdr = IPPROTO_FRAGMENT;
break;
}
* - packet->l7 to just past the TCP or UDP or ICMP header, if one is
* present and has a correct length, and otherwise NULL.
*/
-int
+void
flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
struct flow *flow)
{
struct ofpbuf b = *packet;
struct eth_header *eth;
- int retval = 0;
COVERAGE_INC(flow_extract);
packet->l7 = NULL;
if (b.size < sizeof *eth) {
- return 0;
+ return;
}
/* Link layer. */
if (flow->dl_type == htons(ETH_TYPE_IP)) {
const struct ip_header *nh = pull_ip(&b);
if (nh) {
+ packet->l4 = b.data;
+
flow->nw_src = get_unaligned_be32(&nh->ip_src);
flow->nw_dst = get_unaligned_be32(&nh->ip_dst);
- flow->nw_tos = nh->ip_tos & IP_DSCP_MASK;
flow->nw_proto = nh->ip_proto;
- packet->l4 = b.data;
- if (!IP_IS_FRAGMENT(nh->ip_frag_off)) {
+
+ flow->tos_frag = nh->ip_tos & IP_DSCP_MASK;
+ if (IP_IS_FRAGMENT(nh->ip_frag_off)) {
+ flow->tos_frag |= FLOW_FRAG_ANY;
+ if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
+ flow->tos_frag |= FLOW_FRAG_LATER;
+ }
+ }
+
+ if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) {
if (flow->nw_proto == IPPROTO_TCP) {
parse_tcp(packet, &b, flow);
} else if (flow->nw_proto == IPPROTO_UDP) {
packet->l7 = b.data;
}
}
- } else {
- retval = 1;
}
}
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
-
- retval = parse_ipv6(&b, flow);
- if (retval) {
- return 0;
+ if (parse_ipv6(&b, flow)) {
+ return;
}
packet->l4 = b.data;
}
}
}
-
- return retval;
}
/* For every bit of a field that is wildcarded in 'wildcards', sets the
const flow_wildcards_t wc = wildcards->wildcards;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
for (i = 0; i < FLOW_N_REGS; i++) {
flow->regs[i] &= wildcards->reg_masks[i];
if (wc & FWW_NW_PROTO) {
flow->nw_proto = 0;
}
- if (wc & FWW_NW_TOS) {
- flow->nw_tos = 0;
- }
+ flow->tos_frag &= wildcards->tos_frag_mask;
if (wc & FWW_ARP_SHA) {
memset(flow->arp_sha, 0, sizeof flow->arp_sha);
}
void
flow_format(struct ds *ds, const struct flow *flow)
{
+ int frag;
+
ds_put_format(ds, "tunnel%#"PRIx64":in_port%04"PRIx16":tci(",
ntohll(flow->tun_id), flow->in_port);
if (flow->vlan_tci) {
if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
ds_put_format(ds, " proto%"PRIu8" tos%"PRIu8" ipv6",
- flow->nw_proto, flow->nw_tos);
+ flow->nw_proto, flow->tos_frag & IP_DSCP_MASK);
print_ipv6_addr(ds, &flow->ipv6_src);
ds_put_cstr(ds, "->");
print_ipv6_addr(ds, &flow->ipv6_dst);
" tos%"PRIu8
" ip"IP_FMT"->"IP_FMT,
flow->nw_proto,
- flow->nw_tos,
+ flow->tos_frag & IP_DSCP_MASK,
IP_ARGS(&flow->nw_src),
IP_ARGS(&flow->nw_dst));
}
+ frag = flow->tos_frag & FLOW_FRAG_MASK;
+ if (frag) {
+ ds_put_format(ds, " frag(%s)",
+ frag == FLOW_FRAG_ANY ? "first"
+ : frag == (FLOW_FRAG_ANY | FLOW_FRAG_LATER) ? "later"
+ : "<error>");
+ }
if (flow->tp_src || flow->tp_dst) {
ds_put_format(ds, " port%"PRIu16"->%"PRIu16,
ntohs(flow->tp_src), ntohs(flow->tp_dst));
void
flow_wildcards_init_catchall(struct flow_wildcards *wc)
{
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
+
wc->wildcards = FWW_ALL;
wc->tun_id_mask = htonll(0);
wc->nw_src_mask = htonl(0);
wc->ipv6_dst_mask = in6addr_any;
memset(wc->reg_masks, 0, sizeof wc->reg_masks);
wc->vlan_tci_mask = htons(0);
+ wc->tos_frag_mask = 0;
memset(wc->zeros, 0, sizeof wc->zeros);
}
void
flow_wildcards_init_exact(struct flow_wildcards *wc)
{
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
+
wc->wildcards = 0;
wc->tun_id_mask = htonll(UINT64_MAX);
wc->nw_src_mask = htonl(UINT32_MAX);
wc->ipv6_dst_mask = in6addr_exact;
memset(wc->reg_masks, 0xff, sizeof wc->reg_masks);
wc->vlan_tci_mask = htons(UINT16_MAX);
+ wc->tos_frag_mask = UINT8_MAX;
memset(wc->zeros, 0, sizeof wc->zeros);
}
{
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
if (wc->wildcards
|| wc->tun_id_mask != htonll(UINT64_MAX)
|| wc->nw_dst_mask != htonl(UINT32_MAX)
|| wc->vlan_tci_mask != htons(UINT16_MAX)
|| !ipv6_mask_is_exact(&wc->ipv6_src_mask)
- || !ipv6_mask_is_exact(&wc->ipv6_dst_mask)) {
+ || !ipv6_mask_is_exact(&wc->ipv6_dst_mask)
+ || wc->tos_frag_mask != UINT8_MAX) {
return false;
}
{
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
if (wc->wildcards != FWW_ALL
|| wc->tun_id_mask != htonll(0)
|| wc->nw_dst_mask != htonl(0)
|| wc->vlan_tci_mask != htons(0)
|| !ipv6_mask_is_any(&wc->ipv6_src_mask)
- || !ipv6_mask_is_any(&wc->ipv6_dst_mask)) {
+ || !ipv6_mask_is_any(&wc->ipv6_dst_mask)
+ || wc->tos_frag_mask != 0) {
return false;
}
b->l3 = ip = ofpbuf_put_zeros(b, sizeof *ip);
ip->ip_ihl_ver = IP_IHL_VER(5, 4);
- ip->ip_tos = flow->nw_tos;
+ ip->ip_tos = flow->tos_frag & IP_DSCP_MASK;
ip->ip_proto = flow->nw_proto;
ip->ip_src = flow->nw_src;
ip->ip_dst = flow->nw_dst;
- if (flow->nw_proto == IPPROTO_TCP) {
- struct tcp_header *tcp;
-
- b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp);
- tcp->tcp_src = flow->tp_src;
- tcp->tcp_dst = flow->tp_dst;
- } else if (flow->nw_proto == IPPROTO_UDP) {
- struct udp_header *udp;
-
- b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp);
- udp->udp_src = flow->tp_src;
- udp->udp_dst = flow->tp_dst;
- } else if (flow->nw_proto == IPPROTO_ICMP) {
- struct icmp_header *icmp;
-
- b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp);
- icmp->icmp_type = ntohs(flow->tp_src);
- icmp->icmp_code = ntohs(flow->tp_dst);
+ if (flow->tos_frag & FLOW_FRAG_ANY) {
+ ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
+ if (flow->tos_frag & FLOW_FRAG_LATER) {
+ ip->ip_frag_off |= htons(100);
+ }
+ }
+ if (!(flow->tos_frag & FLOW_FRAG_ANY)
+ || !(flow->tos_frag & FLOW_FRAG_LATER)) {
+ if (flow->nw_proto == IPPROTO_TCP) {
+ struct tcp_header *tcp;
+
+ b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp);
+ tcp->tcp_src = flow->tp_src;
+ tcp->tcp_dst = flow->tp_dst;
+ } else if (flow->nw_proto == IPPROTO_UDP) {
+ struct udp_header *udp;
+
+ b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp);
+ udp->udp_src = flow->tp_src;
+ udp->udp_dst = flow->tp_dst;
+ } else if (flow->nw_proto == IPPROTO_ICMP) {
+ struct icmp_header *icmp;
+
+ b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp);
+ icmp->icmp_type = ntohs(flow->tp_src);
+ icmp->icmp_code = ntohs(flow->tp_dst);
+ }
}
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
/* XXX */
/* This sequence number should be incremented whenever anything involving flows
* or the wildcarding of flows changes. This will cause build assertion
* failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 2
+#define FLOW_WC_SEQ 3
#define FLOW_N_REGS 5
BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
* type, that is, pure 802.2 frames. */
#define FLOW_DL_TYPE_NONE 0x5ff
+/* Fragment bits, used for IPv4 and IPv6, always zero for non-IP flows. */
+#define FLOW_FRAG_ANY (1 << 0) /* Set for any IP fragment. */
+#define FLOW_FRAG_LATER (1 << 1) /* Set for IP fragment with nonzero offset. */
+#define FLOW_FRAG_MASK (FLOW_FRAG_ANY | FLOW_FRAG_LATER)
+
+BUILD_ASSERT_DECL(FLOW_FRAG_ANY == NX_IP_FRAG_ANY);
+BUILD_ASSERT_DECL(FLOW_FRAG_LATER == NX_IP_FRAG_LATER);
+
struct flow {
ovs_be64 tun_id; /* Encapsulating tunnel ID. */
uint32_t regs[FLOW_N_REGS]; /* Registers. */
uint8_t dl_src[6]; /* Ethernet source address. */
uint8_t dl_dst[6]; /* Ethernet destination address. */
uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
- uint8_t nw_tos; /* IP ToS (DSCP field, 6 bits). */
+ uint8_t tos_frag; /* IP ToS in top bits, FLOW_FRAG_* in low. */
uint8_t arp_sha[6]; /* ARP/ND source hardware address. */
uint8_t arp_tha[6]; /* ARP/ND target hardware address. */
struct in6_addr ipv6_src; /* IPv6 source address. */
BUILD_ASSERT_DECL(sizeof(struct flow) == FLOW_SIG_SIZE + FLOW_PAD_SIZE);
/* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
-BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 2);
+BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 3);
-int flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port,
- struct flow *);
+void flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port,
+ struct flow *);
void flow_zero_wildcards(struct flow *, const struct flow_wildcards *);
char *flow_to_string(const struct flow *);
#define FWW_NW_PROTO ((OVS_FORCE flow_wildcards_t) (1 << 5))
#define FWW_TP_SRC ((OVS_FORCE flow_wildcards_t) (1 << 6))
#define FWW_TP_DST ((OVS_FORCE flow_wildcards_t) (1 << 7))
-/* Same meanings as corresponding OFPFW_* bits, but differ in value. */
-#define FWW_NW_TOS ((OVS_FORCE flow_wildcards_t) (1 << 1))
/* No corresponding OFPFW_* bits. */
-#define FWW_ETH_MCAST ((OVS_FORCE flow_wildcards_t) (1 << 8))
+#define FWW_ETH_MCAST ((OVS_FORCE flow_wildcards_t) (1 << 1))
/* multicast bit only */
-#define FWW_ARP_SHA ((OVS_FORCE flow_wildcards_t) (1 << 9))
-#define FWW_ARP_THA ((OVS_FORCE flow_wildcards_t) (1 << 10))
-#define FWW_ND_TARGET ((OVS_FORCE flow_wildcards_t) (1 << 11))
-#define FWW_ALL ((OVS_FORCE flow_wildcards_t) (((1 << 12)) - 1))
+#define FWW_ARP_SHA ((OVS_FORCE flow_wildcards_t) (1 << 8))
+#define FWW_ARP_THA ((OVS_FORCE flow_wildcards_t) (1 << 9))
+#define FWW_ND_TARGET ((OVS_FORCE flow_wildcards_t) (1 << 10))
+#define FWW_ALL ((OVS_FORCE flow_wildcards_t) (((1 << 11)) - 1))
/* Remember to update FLOW_WC_SEQ when adding or removing FWW_*. */
-BUILD_ASSERT_DECL(FWW_ALL == ((1 << 12) - 1) && FLOW_WC_SEQ == 2);
+BUILD_ASSERT_DECL(FWW_ALL == ((1 << 11) - 1) && FLOW_WC_SEQ == 3);
/* Information on wildcards for a flow, as a supplement to "struct flow".
*
struct in6_addr ipv6_src_mask; /* 1-bit in each signficant ipv6_src bit. */
struct in6_addr ipv6_dst_mask; /* 1-bit in each signficant ipv6_dst bit. */
ovs_be16 vlan_tci_mask; /* 1-bit in each significant vlan_tci bit. */
- uint8_t zeros[6]; /* Padding field set to zero. */
+ uint8_t tos_frag_mask; /* 1-bit in each significant tos_frag bit. */
+ uint8_t zeros[5]; /* Padding field set to zero. */
};
/* Remember to update FLOW_WC_SEQ when updating struct flow_wildcards. */
-BUILD_ASSERT_DECL(sizeof(struct flow_wildcards) == 80 && FLOW_WC_SEQ == 2);
+BUILD_ASSERT_DECL(sizeof(struct flow_wildcards) == 80 && FLOW_WC_SEQ == 3);
void flow_wildcards_init_catchall(struct flow_wildcards *);
void flow_wildcards_init_exact(struct flow_wildcards *);
}, {
MFF_IP_TOS, "nw_tos", NULL,
MF_FIELD_SIZES(u8),
- MFM_NONE, FWW_NW_TOS,
+ MFM_NONE, 0,
MFS_DECIMAL,
MFP_IP_ANY,
NXM_OF_IP_TOS,
+ }, {
+ MFF_IP_FRAG, "ip_frag", NULL,
+ 1, 2,
+ MFM_FULLY, 0,
+ MFS_FRAG,
+ MFP_IP_ANY,
+ NXM_NX_IP_FRAG,
},
{
case MFF_ETH_SRC:
case MFF_ETH_TYPE:
case MFF_IP_PROTO:
- case MFF_IP_TOS:
case MFF_ARP_OP:
case MFF_ARP_SHA:
case MFF_ARP_THA:
case MFF_IPV6_DST:
return ipv6_mask_is_any(&wc->ipv6_dst_mask);
+ case MFF_IP_TOS:
+ return !(wc->tos_frag_mask & IP_DSCP_MASK);
+ case MFF_IP_FRAG:
+ return !(wc->tos_frag_mask & FLOW_FRAG_MASK);
+
case MFF_ARP_SPA:
return !wc->nw_src_mask;
case MFF_ARP_TPA:
case MFF_ETH_SRC:
case MFF_ETH_TYPE:
case MFF_IP_PROTO:
- case MFF_IP_TOS:
case MFF_ARP_OP:
case MFF_ARP_SHA:
case MFF_ARP_THA:
mask->ipv6 = wc->ipv6_dst_mask;
break;
+ case MFF_IP_TOS:
+ mask->u8 = wc->tos_frag_mask & IP_DSCP_MASK;
+ break;
+ case MFF_IP_FRAG:
+ mask->u8 = wc->tos_frag_mask & FLOW_FRAG_MASK;
+ break;
+
case MFF_ARP_SPA:
mask->be32 = wc->nw_src_mask;
break;
return true;
case MFF_IP_TOS:
- return !(value->u8 & 0x03);
+ return !(value->u8 & ~IP_DSCP_MASK);
+ case MFF_IP_FRAG:
+ return !(value->u8 & ~FLOW_FRAG_MASK);
case MFF_ARP_OP:
return !(value->be16 & htons(0xff00));
break;
case MFF_IP_TOS:
- value->u8 = flow->nw_tos;
+ value->u8 = flow->tos_frag & IP_DSCP_MASK;
+ break;
+
+ case MFF_IP_FRAG:
+ value->u8 = flow->tos_frag & FLOW_FRAG_MASK;
break;
case MFF_ARP_OP:
cls_rule_set_nw_tos(rule, value->u8);
break;
+ case MFF_IP_FRAG:
+ cls_rule_set_frag(rule, value->u8);
+ break;
+
case MFF_ARP_OP:
cls_rule_set_nw_proto(rule, ntohs(value->be16));
break;
break;
case MFF_IP_TOS:
- rule->wc.wildcards |= FWW_NW_TOS;
- rule->flow.nw_tos = 0;
+ rule->wc.tos_frag_mask |= IP_DSCP_MASK;
+ rule->flow.tos_frag &= ~IP_DSCP_MASK;
+ break;
+
+ case MFF_IP_FRAG:
+ rule->wc.tos_frag_mask |= FLOW_FRAG_MASK;
+ rule->flow.tos_frag &= ~FLOW_FRAG_MASK;
break;
case MFF_ARP_OP:
cls_rule_set_ipv6_dst_masked(rule, &value->ipv6, &mask->ipv6);
break;
+ case MFF_IP_FRAG:
+ cls_rule_set_frag_masked(rule, value->u8, mask->u8);
+ break;
+
case MFF_ARP_SPA:
cls_rule_set_nw_src_masked(rule, value->be32, mask->be32);
break;
value->u8 &= ~0x03;
break;
+ case MFF_IP_FRAG:
+ value->u8 &= FLOW_FRAG_MASK;
+ break;
+
case MFF_ARP_OP:
value->be16 &= htons(0xff);
break;
}
}
+struct frag_handling {
+ const char *name;
+ uint8_t mask;
+ uint8_t value;
+};
+
+static const struct frag_handling all_frags[] = {
+#define A FLOW_FRAG_ANY
+#define L FLOW_FRAG_LATER
+ /* name mask value */
+
+ { "no", A|L, 0 },
+ { "first", A|L, A },
+ { "later", A|L, A|L },
+
+ { "no", A, 0 },
+ { "yes", A, A },
+
+ { "not_later", L, 0 },
+ { "later", L, L },
+#undef A
+#undef L
+};
+
+static char *
+mf_from_frag_string(const char *s, uint8_t *valuep, uint8_t *maskp)
+{
+ const struct frag_handling *h;
+
+ for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) {
+ if (!strcasecmp(s, h->name)) {
+ /* We force the upper bits of the mask on to make mf_parse_value()
+ * happy (otherwise it will never think it's an exact match.) */
+ *maskp = h->mask | ~FLOW_FRAG_MASK;
+ *valuep = h->value;
+ return NULL;
+ }
+ }
+
+ return xasprintf("%s: unknown fragment type (valid types are \"no\", "
+ "\"yes\", \"first\", \"later\", \"not_first\"", s);
+}
+
/* Parses 's', a string value for field 'mf', into 'value' and 'mask'. Returns
* NULL if successful, otherwise a malloc()'d string describing the error. */
char *
case MFS_OFP_PORT:
return mf_from_ofp_port_string(mf, s, &value->be16, &mask->be16);
+
+ case MFS_FRAG:
+ return mf_from_frag_string(s, &value->u8, &mask->u8);
}
NOT_REACHED();
}
}
}
+static void
+mf_format_frag_string(const uint8_t *valuep, const uint8_t *maskp,
+ struct ds *s)
+{
+ const struct frag_handling *h;
+ uint8_t value = *valuep;
+ uint8_t mask = *maskp;
+
+ value &= mask;
+ mask &= FLOW_FRAG_MASK;
+
+ for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) {
+ if (value == h->value && mask == h->mask) {
+ ds_put_cstr(s, h->name);
+ return;
+ }
+ }
+ ds_put_cstr(s, "<error>");
+}
+
/* Appends to 's' a string representation of field 'mf' whose value is in
* 'value' and 'mask'. 'mask' may be NULL to indicate an exact match. */
void
print_ipv6_masked(s, &value->ipv6, mask ? &mask->ipv6 : NULL);
break;
+ case MFS_FRAG:
+ mf_format_frag_string(&value->u8, &mask->u8, s);
+ break;
+
default:
NOT_REACHED();
}
MFF_IP_PROTO, /* u8 (used for IPv4 or IPv6) */
MFF_IP_TOS, /* u8 (used for IPv4 or IPv6) */
+ MFF_IP_FRAG, /* u8 (used for IPv4 or IPv6) */
MFF_ARP_OP, /* be16 */
MFF_ARP_SPA, /* be32 */
MFS_ETHERNET,
MFS_IPV4,
MFS_IPV6,
- MFS_OFP_PORT /* An OpenFlow port number or name. */
+ MFS_OFP_PORT, /* An OpenFlow port number or name. */
+ MFS_FRAG /* no, yes, first, later, not_later */
};
struct mf_field {
ofpbuf_put(b, &value, sizeof value);
}
+static void
+nxm_put_8m(struct ofpbuf *b, uint32_t header, uint8_t value, uint8_t mask)
+{
+ switch (mask) {
+ case 0:
+ break;
+
+ case UINT8_MAX:
+ nxm_put_8(b, header, value);
+ break;
+
+ default:
+ nxm_put_header(b, NXM_MAKE_WILD_HEADER(header));
+ ofpbuf_put(b, &value, sizeof value);
+ ofpbuf_put(b, &mask, sizeof mask);
+ }
+}
+
static void
nxm_put_16(struct ofpbuf *b, uint32_t header, ovs_be16 value)
{
}
}
+static void
+nxm_put_tos_frag(struct ofpbuf *b, const struct cls_rule *cr)
+{
+ uint8_t tos_frag = cr->flow.tos_frag;
+ uint8_t tos_frag_mask = cr->wc.tos_frag_mask;
+
+ if (tos_frag_mask & IP_DSCP_MASK) {
+ nxm_put_8(b, NXM_OF_IP_TOS, tos_frag & IP_DSCP_MASK);
+ }
+
+ switch (tos_frag_mask & FLOW_FRAG_MASK) {
+ case 0:
+ break;
+
+ case FLOW_FRAG_MASK:
+ /* Output it as exact-match even though only the low 2 bits matter. */
+ nxm_put_8(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK);
+ break;
+
+ default:
+ nxm_put_8m(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK,
+ tos_frag_mask & FLOW_FRAG_MASK);
+ break;
+ }
+}
+
/* Appends to 'b' the nx_match format that expresses 'cr' (except for
* 'cr->priority', because priority is not part of nx_match), plus enough
* zero bytes to pad the nx_match out to a multiple of 8.
int match_len;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
/* Metadata. */
if (!(wc & FWW_IN_PORT)) {
/* L3. */
if (!(wc & FWW_DL_TYPE) && flow->dl_type == htons(ETH_TYPE_IP)) {
/* IP. */
- if (!(wc & FWW_NW_TOS)) {
- nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & 0xfc);
- }
+ nxm_put_tos_frag(b, cr);
nxm_put_32m(b, NXM_OF_IP_SRC, flow->nw_src, cr->wc.nw_src_mask);
nxm_put_32m(b, NXM_OF_IP_DST, flow->nw_dst, cr->wc.nw_dst_mask);
}
} else if (!(wc & FWW_DL_TYPE) && flow->dl_type == htons(ETH_TYPE_IPV6)) {
/* IPv6. */
-
- if (!(wc & FWW_NW_TOS)) {
- nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & 0xfc);
- }
+ nxm_put_tos_frag(b, cr);
nxm_put_ipv6(b, NXM_NX_IPV6_SRC, &flow->ipv6_src,
&cr->wc.ipv6_src_mask);
nxm_put_ipv6(b, NXM_NX_IPV6_DST, &flow->ipv6_dst,
return ntohs(flow->vlan_tci);
case NFI_NXM_OF_IP_TOS:
- return flow->nw_tos;
+ return flow->tos_frag & IP_DSCP_MASK;
+
+ case NFI_NXM_NX_IP_FRAG:
+ return flow->tos_frag & FLOW_FRAG_MASK;
case NFI_NXM_OF_IP_PROTO:
case NFI_NXM_OF_ARP_OP:
case NFI_NXM_NX_IPV6_SRC_W:
case NFI_NXM_NX_IPV6_DST:
case NFI_NXM_NX_IPV6_DST_W:
+ case NFI_NXM_NX_IP_FRAG_W:
case NFI_NXM_NX_ND_TARGET:
case N_NXM_FIELDS:
NOT_REACHED();
#endif
case NFI_NXM_OF_IP_TOS:
- flow->nw_tos = new_value & IP_DSCP_MASK;
+ flow->tos_frag &= ~IP_DSCP_MASK;
+ flow->tos_frag |= new_value & IP_DSCP_MASK;
+ break;
+
+ case NFI_NXM_NX_IP_FRAG:
+ flow->tos_frag &= ~FLOW_FRAG_MASK;
+ flow->tos_frag |= new_value & FLOW_FRAG_MASK;
break;
case NFI_NXM_OF_IP_SRC:
case NFI_NXM_NX_IPV6_SRC_W:
case NFI_NXM_NX_IPV6_DST:
case NFI_NXM_NX_IPV6_DST_W:
+ case NFI_NXM_NX_IP_FRAG_W:
case NFI_NXM_NX_ICMPV6_TYPE:
case NFI_NXM_NX_ICMPV6_CODE:
case NFI_NXM_NX_ND_TARGET:
DEFINE_FIELD (OF_IP_PROTO, MFF_IP_PROTO, false)
DEFINE_FIELD_M(OF_IP_SRC, MFF_IPV4_SRC, true)
DEFINE_FIELD_M(OF_IP_DST, MFF_IPV4_DST, true)
+DEFINE_FIELD_M(NX_IP_FRAG, MFF_IP_FRAG, false)
DEFINE_FIELD (OF_TCP_SRC, MFF_TCP_SRC, true)
DEFINE_FIELD (OF_TCP_DST, MFF_TCP_DST, true)
DEFINE_FIELD (OF_UDP_SRC, MFF_UDP_SRC, true)
}
}
+static const char *
+ovs_frag_type_to_string(enum ovs_frag_type type)
+{
+ switch (type) {
+ case OVS_FRAG_TYPE_NONE:
+ return "no";
+ case OVS_FRAG_TYPE_FIRST:
+ return "first";
+ case OVS_FRAG_TYPE_LATER:
+ return "later";
+ case __OVS_FRAG_TYPE_MAX:
+ default:
+ return "<error>";
+ }
+}
+
static void
format_odp_key_attr(const struct nlattr *a, struct ds *ds)
{
case OVS_KEY_ATTR_IPV4:
ipv4_key = nl_attr_get(a);
ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT","
- "proto=%"PRId8",tos=%"PRIu8")",
+ "proto=%"PRId8",tos=%"PRIu8",frag=%s)",
IP_ARGS(&ipv4_key->ipv4_src),
IP_ARGS(&ipv4_key->ipv4_dst),
- ipv4_key->ipv4_proto, ipv4_key->ipv4_tos);
+ ipv4_key->ipv4_proto, ipv4_key->ipv4_tos,
+ ovs_frag_type_to_string(ipv4_key->ipv4_frag));
break;
case OVS_KEY_ATTR_IPV6: {
inet_ntop(AF_INET6, ipv6_key->ipv6_src, src_str, sizeof src_str);
inet_ntop(AF_INET6, ipv6_key->ipv6_dst, dst_str, sizeof dst_str);
- ds_put_format(ds, "ipv6(src=%s,dst=%s,proto=%"PRId8",tos=%"PRIu8")",
+ ds_put_format(ds, "ipv6(src=%s,dst=%s,proto=%"PRId8",tos=%"PRIu8","
+ "frag=%s)",
src_str, dst_str, ipv6_key->ipv6_proto,
- ipv6_key->ipv6_tos);
+ ipv6_key->ipv6_tos,
+ ovs_frag_type_to_string(ipv6_key->ipv6_frag));
break;
}
return n;
}
+static bool
+ovs_frag_type_from_string(const char *s, enum ovs_frag_type *type)
+{
+ if (!strcasecmp(s, "no")) {
+ *type = OVS_FRAG_TYPE_NONE;
+ } else if (!strcasecmp(s, "first")) {
+ *type = OVS_FRAG_TYPE_FIRST;
+ } else if (!strcasecmp(s, "later")) {
+ *type = OVS_FRAG_TYPE_LATER;
+ } else {
+ return false;
+ }
+ return true;
+}
+
static int
parse_odp_key_attr(const char *s, struct ofpbuf *key)
{
ovs_be32 ipv4_dst;
int ipv4_proto;
int ipv4_tos;
+ char frag[8];
+ enum ovs_frag_type ipv4_frag;
int n = -1;
if (sscanf(s, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT","
- "proto=%i,tos=%i)%n",
- IP_SCAN_ARGS(&ipv4_src),
- IP_SCAN_ARGS(&ipv4_dst), &ipv4_proto, &ipv4_tos, &n) > 0
- && n > 0) {
+ "proto=%i,tos=%i,frag=%7[a-z])%n",
+ IP_SCAN_ARGS(&ipv4_src), IP_SCAN_ARGS(&ipv4_dst),
+ &ipv4_proto, &ipv4_tos, frag, &n) > 0
+ && n > 0
+ && ovs_frag_type_from_string(frag, &ipv4_frag)) {
struct ovs_key_ipv4 ipv4_key;
memset(&ipv4_key, 0, sizeof ipv4_key);
ipv4_key.ipv4_dst = ipv4_dst;
ipv4_key.ipv4_proto = ipv4_proto;
ipv4_key.ipv4_tos = ipv4_tos;
+ ipv4_key.ipv4_frag = ipv4_frag;
nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV4,
&ipv4_key, sizeof ipv4_key);
return n;
char ipv6_dst_s[IPV6_SCAN_LEN + 1];
int ipv6_proto;
int ipv6_tos;
+ char frag[8];
+ enum ovs_frag_type ipv6_frag;
int n = -1;
if (sscanf(s, "ipv6(src="IPV6_SCAN_FMT",dst="IPV6_SCAN_FMT","
- "proto=%i,tos=%i)%n",
+ "proto=%i,tos=%i,frag=%7[a-z])%n",
ipv6_src_s, ipv6_dst_s,
- &ipv6_proto, &ipv6_tos, &n) > 0 && n > 0) {
+ &ipv6_proto, &ipv6_tos, frag, &n) > 0
+ && n > 0
+ && ovs_frag_type_from_string(frag, &ipv6_frag)) {
struct ovs_key_ipv6 ipv6_key;
memset(&ipv6_key, 0, sizeof ipv6_key);
}
ipv6_key.ipv6_proto = ipv6_proto;
ipv6_key.ipv6_tos = ipv6_tos;
+ ipv6_key.ipv6_frag = ipv6_frag;
nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV6,
&ipv6_key, sizeof ipv6_key);
return n;
return 0;
}
+static uint8_t
+tos_frag_to_odp_frag(uint8_t tos_frag)
+{
+ return (tos_frag & FLOW_FRAG_LATER ? OVS_FRAG_TYPE_LATER
+ : tos_frag & FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST
+ : OVS_FRAG_TYPE_NONE);
+}
+
/* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. */
void
odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
ipv4_key->ipv4_src = flow->nw_src;
ipv4_key->ipv4_dst = flow->nw_dst;
ipv4_key->ipv4_proto = flow->nw_proto;
- ipv4_key->ipv4_tos = flow->nw_tos;
+ ipv4_key->ipv4_tos = flow->tos_frag & IP_DSCP_MASK;
+ ipv4_key->ipv4_frag = tos_frag_to_odp_frag(flow->tos_frag);
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
memcpy(ipv6_key->ipv6_src, &flow->ipv6_src, sizeof ipv6_key->ipv6_src);
memcpy(ipv6_key->ipv6_dst, &flow->ipv6_dst, sizeof ipv6_key->ipv6_dst);
ipv6_key->ipv6_proto = flow->nw_proto;
- ipv6_key->ipv6_tos = flow->nw_tos;
+ ipv6_key->ipv6_tos = flow->tos_frag & IP_DSCP_MASK;
+ ipv6_key->ipv6_frag = tos_frag_to_odp_frag(flow->tos_frag);
} else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
struct ovs_key_arp *arp_key;
memcpy(arp_key->arp_tha, flow->arp_tha, ETH_ADDR_LEN);
}
- if (flow->dl_type == htons(ETH_TYPE_IP)
- || flow->dl_type == htons(ETH_TYPE_IPV6)) {
+ if ((flow->dl_type == htons(ETH_TYPE_IP)
+ || flow->dl_type == htons(ETH_TYPE_IPV6))
+ && !(flow->tos_frag & FLOW_FRAG_LATER)) {
if (flow->nw_proto == IPPROTO_TCP) {
struct ovs_key_tcp *tcp_key;
}
}
+static bool
+odp_to_tos_frag(uint8_t odp_tos, uint8_t odp_frag, struct flow *flow)
+{
+ if (odp_tos & ~IP_DSCP_MASK || odp_frag > OVS_FRAG_TYPE_LATER) {
+ return false;
+ }
+
+ flow->tos_frag = odp_tos;
+ if (odp_frag != OVS_FRAG_TYPE_NONE) {
+ flow->tos_frag |= FLOW_FRAG_ANY;
+ if (odp_frag == OVS_FRAG_TYPE_LATER) {
+ flow->tos_frag |= FLOW_FRAG_LATER;
+ }
+ }
+ return true;
+}
+
/* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a flow
* structure in 'flow'. Returns 0 if successful, otherwise EINVAL. */
int
flow->nw_src = ipv4_key->ipv4_src;
flow->nw_dst = ipv4_key->ipv4_dst;
flow->nw_proto = ipv4_key->ipv4_proto;
- flow->nw_tos = ipv4_key->ipv4_tos;
- if (flow->nw_tos & IP_ECN_MASK) {
+ if (!odp_to_tos_frag(ipv4_key->ipv4_tos, ipv4_key->ipv4_frag,
+ flow)) {
return EINVAL;
}
break;
memcpy(&flow->ipv6_src, ipv6_key->ipv6_src, sizeof flow->ipv6_src);
memcpy(&flow->ipv6_dst, ipv6_key->ipv6_dst, sizeof flow->ipv6_dst);
flow->nw_proto = ipv6_key->ipv6_proto;
- flow->nw_tos = ipv6_key->ipv6_tos;
- if (flow->nw_tos & IP_ECN_MASK) {
+ if (!odp_to_tos_frag(ipv6_key->ipv6_tos, ipv6_key->ipv6_frag,
+ flow)) {
return EINVAL;
}
break;
return 0;
case OVS_KEY_ATTR_IPV4:
+ if (flow->tos_frag & FLOW_FRAG_LATER) {
+ return 0;
+ }
if (flow->nw_proto == IPPROTO_TCP
|| flow->nw_proto == IPPROTO_UDP
|| flow->nw_proto == IPPROTO_ICMP) {
return 0;
case OVS_KEY_ATTR_IPV6:
+ if (flow->tos_frag & FLOW_FRAG_LATER) {
+ return 0;
+ }
if (flow->nw_proto == IPPROTO_TCP
|| flow->nw_proto == IPPROTO_UDP
|| flow->nw_proto == IPPROTO_ICMPV6) {
case OVS_KEY_ATTR_ICMPV6:
if (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT)
- || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) {
+ || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)
+ || flow->tos_frag & FLOW_FRAG_LATER) {
return EINVAL;
}
return 0;
case OVS_KEY_ATTR_TCP:
case OVS_KEY_ATTR_UDP:
case OVS_KEY_ATTR_ICMP:
- case OVS_KEY_ATTR_ARP:
case OVS_KEY_ATTR_ND:
+ if (flow->tos_frag & FLOW_FRAG_LATER) {
+ return EINVAL;
+ }
+ return 0;
+
+ case OVS_KEY_ATTR_ARP:
return 0;
case __OVS_KEY_ATTR_MAX:
flags = ntohs(osc->flags);
- ds_put_cstr(string, " frags=");
- switch (flags & OFPC_FRAG_MASK) {
- case OFPC_FRAG_NORMAL:
- ds_put_cstr(string, "normal");
- flags &= ~OFPC_FRAG_MASK;
- break;
- case OFPC_FRAG_DROP:
- ds_put_cstr(string, "drop");
- flags &= ~OFPC_FRAG_MASK;
- break;
- case OFPC_FRAG_REASM:
- ds_put_cstr(string, "reassemble");
- flags &= ~OFPC_FRAG_MASK;
- break;
- }
+ ds_put_format(string, " frags=%s", ofputil_frag_handling_to_string(flags));
+ flags &= ~OFPC_FRAG_MASK;
+
if (flags) {
ds_put_format(string, " ***unknown flags 0x%04"PRIx16"***", flags);
}
void
ofputil_wildcard_from_openflow(uint32_t ofpfw, struct flow_wildcards *wc)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
/* Initialize most of rule->wc. */
flow_wildcards_init_catchall(wc);
/* Wildcard fields that aren't defined by ofp_match or tun_id. */
wc->wildcards |= (FWW_ARP_SHA | FWW_ARP_THA | FWW_ND_TARGET);
- if (ofpfw & OFPFW_NW_TOS) {
- wc->wildcards |= FWW_NW_TOS;
+ if (!(ofpfw & OFPFW_NW_TOS)) {
+ wc->tos_frag_mask |= IP_DSCP_MASK;
}
+
wc->nw_src_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_SRC_SHIFT);
wc->nw_dst_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_DST_SHIFT);
rule->flow.tp_dst = match->tp_dst;
memcpy(rule->flow.dl_src, match->dl_src, ETH_ADDR_LEN);
memcpy(rule->flow.dl_dst, match->dl_dst, ETH_ADDR_LEN);
- rule->flow.nw_tos = match->nw_tos;
+ rule->flow.tos_frag = match->nw_tos & IP_DSCP_MASK;
rule->flow.nw_proto = match->nw_proto;
/* Translate VLANs. */
ofpfw = (OVS_FORCE uint32_t) (wc->wildcards & WC_INVARIANTS);
ofpfw |= ofputil_netmask_to_wcbits(wc->nw_src_mask) << OFPFW_NW_SRC_SHIFT;
ofpfw |= ofputil_netmask_to_wcbits(wc->nw_dst_mask) << OFPFW_NW_DST_SHIFT;
- if (wc->wildcards & FWW_NW_TOS) {
+ if (!(wc->tos_frag_mask & IP_DSCP_MASK)) {
ofpfw |= OFPFW_NW_TOS;
}
match->dl_type = ofputil_dl_type_to_openflow(rule->flow.dl_type);
match->nw_src = rule->flow.nw_src;
match->nw_dst = rule->flow.nw_dst;
- match->nw_tos = rule->flow.nw_tos;
+ match->nw_tos = rule->flow.tos_frag & IP_DSCP_MASK;
match->nw_proto = rule->flow.nw_proto;
match->tp_src = rule->flow.tp_src;
match->tp_dst = rule->flow.tp_dst;
{
const struct flow_wildcards *wc = &rule->wc;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
/* Only NXM supports separately wildcards the Ethernet multicast bit. */
if (!(wc->wildcards & FWW_DL_DST) != !(wc->wildcards & FWW_ETH_MCAST)) {
return NXFF_NXM;
}
+ /* Only NXM supports matching fragments. */
+ if (wc->tos_frag_mask & FLOW_FRAG_MASK) {
+ return NXFF_NXM;
+ }
+
/* Other formats can express this rule. */
return NXFF_OPENFLOW10;
}
return out;
}
+const char *
+ofputil_frag_handling_to_string(enum ofp_config_flags flags)
+{
+ switch (flags & OFPC_FRAG_MASK) {
+ case OFPC_FRAG_NORMAL: return "normal";
+ case OFPC_FRAG_DROP: return "drop";
+ case OFPC_FRAG_REASM: return "reassemble";
+ case OFPC_FRAG_NX_MATCH: return "nx-match";
+ }
+
+ NOT_REACHED();
+}
+
+bool
+ofputil_frag_handling_from_string(const char *s, enum ofp_config_flags *flags)
+{
+ if (!strcasecmp(s, "normal")) {
+ *flags = OFPC_FRAG_NORMAL;
+ } else if (!strcasecmp(s, "drop")) {
+ *flags = OFPC_FRAG_DROP;
+ } else if (!strcasecmp(s, "reassemble")) {
+ *flags = OFPC_FRAG_REASM;
+ } else if (!strcasecmp(s, "nx-match")) {
+ *flags = OFPC_FRAG_NX_MATCH;
+ } else {
+ return false;
+ }
+ return true;
+}
+
/* Checks that 'port' is a valid output port for the OFPAT_OUTPUT action, given
* that the switch will never have more than 'max_ports' ports. Returns 0 if
* 'port' is valid, otherwise an ofp_mkerr() return code. */
MAY_NW_ADDR = 1 << 0, /* nw_src, nw_dst */
MAY_TP_ADDR = 1 << 1, /* tp_src, tp_dst */
MAY_NW_PROTO = 1 << 2, /* nw_proto */
- MAY_NW_TOS = 1 << 3, /* nw_tos */
+ MAY_TOS_FRAG = 1 << 3, /* tos_frag */
MAY_ARP_SHA = 1 << 4, /* arp_sha */
MAY_ARP_THA = 1 << 5, /* arp_tha */
MAY_IPV6_ADDR = 1 << 6, /* ipv6_src, ipv6_dst */
/* Figure out what fields may be matched. */
if (rule->flow.dl_type == htons(ETH_TYPE_IP)) {
- may_match = MAY_NW_PROTO | MAY_NW_TOS | MAY_NW_ADDR;
+ may_match = MAY_NW_PROTO | MAY_TOS_FRAG | MAY_NW_ADDR;
if (rule->flow.nw_proto == IPPROTO_TCP ||
rule->flow.nw_proto == IPPROTO_UDP ||
rule->flow.nw_proto == IPPROTO_ICMP) {
}
} else if (rule->flow.dl_type == htons(ETH_TYPE_IPV6)
&& flow_format == NXFF_NXM) {
- may_match = MAY_NW_PROTO | MAY_NW_TOS | MAY_IPV6_ADDR;
+ may_match = MAY_NW_PROTO | MAY_TOS_FRAG | MAY_IPV6_ADDR;
if (rule->flow.nw_proto == IPPROTO_TCP ||
rule->flow.nw_proto == IPPROTO_UDP) {
may_match |= MAY_TP_ADDR;
if (!(may_match & MAY_NW_PROTO)) {
wc.wildcards |= FWW_NW_PROTO;
}
- if (!(may_match & MAY_NW_TOS)) {
- wc.wildcards |= FWW_NW_TOS;
+ if (!(may_match & MAY_TOS_FRAG)) {
+ wc.tos_frag_mask = 0;
}
if (!(may_match & MAY_ARP_SHA)) {
wc.wildcards |= FWW_ARP_SHA;
uint16_t in_port, uint16_t out_port);
struct ofpbuf *make_echo_request(void);
struct ofpbuf *make_echo_reply(const struct ofp_header *rq);
+
+const char *ofputil_frag_handling_to_string(enum ofp_config_flags);
+bool ofputil_frag_handling_from_string(const char *, enum ofp_config_flags *);
\f
/* Actions. */
}
nf_rec->tcp_flags = nf_flow->tcp_flags;
nf_rec->ip_proto = expired->flow.nw_proto;
- nf_rec->ip_tos = expired->flow.nw_tos;
+ nf_rec->ip_tos = expired->flow.tos_frag & IP_DSCP_MASK;
/* NetFlow messages are limited to 30 records. */
if (ntohs(nf_hdr->count) >= 30) {
rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
uint8_t table_id)
{
+ struct cls_rule *cls_rule;
+ struct classifier *cls;
+
if (table_id >= N_TABLES) {
return NULL;
}
- return rule_dpif_cast(rule_from_cls_rule(
- classifier_lookup(&ofproto->up.tables[table_id],
- flow)));
+ cls = &ofproto->up.tables[table_id];
+ if (flow->tos_frag & FLOW_FRAG_ANY
+ && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
+ /* For OFPC_NORMAL frag_handling, we must pretend that transport ports
+ * are unavailable. */
+ struct flow ofpc_normal_flow = *flow;
+ ofpc_normal_flow.tp_src = htons(0);
+ ofpc_normal_flow.tp_dst = htons(0);
+ cls_rule = classifier_lookup(cls, &ofpc_normal_flow);
+ } else {
+ cls_rule = classifier_lookup(cls, flow);
+ }
+ return rule_dpif_cast(rule_from_cls_rule(cls_rule));
}
static void
commit_set_nw_action(const struct flow *flow, struct flow *base,
struct ofpbuf *odp_actions)
{
+ int frag = base->tos_frag & FLOW_FRAG_MASK;
struct ovs_key_ipv4 ipv4_key;
if (base->dl_type != htons(ETH_TYPE_IP) ||
if (base->nw_src == flow->nw_src &&
base->nw_dst == flow->nw_dst &&
- base->nw_tos == flow->nw_tos) {
+ base->tos_frag == flow->tos_frag) {
return;
}
+
memset(&ipv4_key, 0, sizeof(ipv4_key));
ipv4_key.ipv4_src = base->nw_src = flow->nw_src;
ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst;
- ipv4_key.ipv4_tos = base->nw_tos = flow->nw_tos;
-
ipv4_key.ipv4_proto = base->nw_proto;
+ ipv4_key.ipv4_tos = flow->tos_frag & IP_DSCP_MASK;
+ ipv4_key.ipv4_frag = (frag == 0 ? OVS_FRAG_TYPE_NONE
+ : frag == FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST
+ : OVS_FRAG_TYPE_LATER);
commit_action__(odp_actions, OVS_ACTION_ATTR_SET,
OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof(ipv4_key));
break;
case OFPUTIL_OFPAT_SET_NW_TOS:
- ctx->flow.nw_tos = ia->nw_tos.nw_tos & IP_DSCP_MASK;
+ ctx->flow.tos_frag &= ~IP_DSCP_MASK;
+ ctx->flow.tos_frag |= ia->nw_tos.nw_tos & IP_DSCP_MASK;
break;
case OFPUTIL_OFPAT_SET_TP_SRC:
ctx->odp_actions = ofpbuf_new(512);
ofpbuf_reserve(ctx->odp_actions, NL_A_U32_SIZE);
+
+ if (ctx->flow.tos_frag & FLOW_FRAG_ANY) {
+ switch (ctx->ofproto->up.frag_handling) {
+ case OFPC_FRAG_NORMAL:
+ /* We must pretend that transport ports are unavailable. */
+ ctx->flow.tp_src = htons(0);
+ ctx->flow.tp_dst = htons(0);
+ break;
+
+ case OFPC_FRAG_DROP:
+ return ctx->odp_actions;
+
+ case OFPC_FRAG_REASM:
+ NOT_REACHED();
+
+ case OFPC_FRAG_NX_MATCH:
+ /* Nothing to do. */
+ break;
+ }
+ }
+
ctx->tags = 0;
ctx->may_set_up_flow = true;
ctx->has_learn = false;
}
\f
static bool
-get_drop_frags(struct ofproto *ofproto_)
+set_frag_handling(struct ofproto *ofproto_,
+ enum ofp_config_flags frag_handling)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- bool drop_frags;
-
- dpif_get_drop_frags(ofproto->dpif, &drop_frags);
- return drop_frags;
-}
-static void
-set_drop_frags(struct ofproto *ofproto_, bool drop_frags)
-{
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
-
- dpif_set_drop_frags(ofproto->dpif, drop_frags);
+ if (frag_handling != OFPC_FRAG_REASM) {
+ ofproto->need_revalidate = true;
+ return true;
+ } else {
+ return false;
+ }
}
static int
rule_get_stats,
rule_execute,
rule_modify_actions,
- get_drop_frags,
- set_drop_frags,
+ set_frag_handling,
packet_out,
set_netflow,
get_netflow_ids,
char *sw_desc; /* Software version. */
char *serial_desc; /* Serial number. */
char *dp_desc; /* Datapath description. */
+ enum ofp_config_flags frag_handling; /* One of OFPC_*. */
/* Datapath. */
struct hmap ports; /* Contains "struct ofport"s. */
(CLS) < &(OFPROTO)->tables[(OFPROTO)->n_tables]; \
(CLS)++)
-
/* An OpenFlow port within a "struct ofproto".
*
* With few exceptions, ofproto implementations may look at these fields but
* rule. */
void (*rule_modify_actions)(struct rule *rule);
- /* These functions implement the OpenFlow IP fragment handling policy. By
- * default ('drop_frags' == false), an OpenFlow switch should treat IP
- * fragments the same way as other packets (although TCP and UDP port
- * numbers cannot be determined). With 'drop_frags' == true, the switch
- * should drop all IP fragments without passing them through the flow
- * table. */
- bool (*get_drop_frags)(struct ofproto *ofproto);
- void (*set_drop_frags)(struct ofproto *ofproto, bool drop_frags);
+ /* Changes the OpenFlow IP fragment handling policy to 'frag_handling',
+ * which takes one of the following values, with the corresponding
+ * meanings:
+ *
+ * - OFPC_FRAG_NORMAL: The switch should treat IP fragments the same way
+ * as other packets, omitting TCP and UDP port numbers (always setting
+ * them to 0).
+ *
+ * - OFPC_FRAG_DROP: The switch should drop all IP fragments without
+ * passing them through the flow table.
+ *
+ * - OFPC_FRAG_REASM: The switch should reassemble IP fragments before
+ * passing packets through the flow table.
+ *
+ * - OFPC_FRAG_NX_MATCH (a Nicira extension): Similar to OFPC_FRAG_NORMAL,
+ * except that TCP and UDP port numbers should be included in fragments
+ * with offset 0.
+ *
+ * Implementations are not required to support every mode.
+ * OFPC_FRAG_NORMAL is the default mode when an ofproto is created.
+ *
+ * At the time of the call to ->set_frag_handling(), the current mode is
+ * available in 'ofproto->frag_handling'. ->set_frag_handling() returns
+ * true if the requested mode was set, false if it is not supported.
+ *
+ * Upon successful return, the caller changes 'ofproto->frag_handling' to
+ * reflect the new mode.
+ */
+ bool (*set_frag_handling)(struct ofproto *ofproto,
+ enum ofp_config_flags frag_handling);
/* Implements the OpenFlow OFPT_PACKET_OUT command. The datapath should
* execute the 'n_actions' in the 'actions' array on 'packet'.
ofproto->sw_desc = xstrdup(DEFAULT_SW_DESC);
ofproto->serial_desc = xstrdup(DEFAULT_SERIAL_DESC);
ofproto->dp_desc = xstrdup(DEFAULT_DP_DESC);
+ ofproto->frag_handling = OFPC_FRAG_NORMAL;
hmap_init(&ofproto->ports);
shash_init(&ofproto->port_by_name);
ofproto->tables = NULL;
handle_get_config_request(struct ofconn *ofconn, const struct ofp_header *oh)
{
struct ofproto *ofproto = ofconn_get_ofproto(ofconn);
- struct ofpbuf *buf;
struct ofp_switch_config *osc;
- uint16_t flags;
- bool drop_frags;
-
- /* Figure out flags. */
- drop_frags = ofproto->ofproto_class->get_drop_frags(ofproto);
- flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL;
+ struct ofpbuf *buf;
/* Send reply. */
osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf);
- osc->flags = htons(flags);
+ osc->flags = htons(ofproto->frag_handling);
osc->miss_send_len = htons(ofconn_get_miss_send_len(ofconn));
ofconn_send_reply(ofconn, buf);
struct ofproto *ofproto = ofconn_get_ofproto(ofconn);
uint16_t flags = ntohs(osc->flags);
- if (ofconn_get_type(ofconn) == OFCONN_PRIMARY
- && ofconn_get_role(ofconn) != NX_ROLE_SLAVE) {
- switch (flags & OFPC_FRAG_MASK) {
- case OFPC_FRAG_NORMAL:
- ofproto->ofproto_class->set_drop_frags(ofproto, false);
- break;
- case OFPC_FRAG_DROP:
- ofproto->ofproto_class->set_drop_frags(ofproto, true);
- break;
- default:
- VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")",
- osc->flags);
- break;
+ if (ofconn_get_type(ofconn) != OFCONN_PRIMARY
+ || ofconn_get_role(ofconn) != NX_ROLE_SLAVE) {
+ enum ofp_config_flags cur = ofproto->frag_handling;
+ enum ofp_config_flags next = flags & OFPC_FRAG_MASK;
+
+ assert((cur & OFPC_FRAG_MASK) == cur);
+ if (cur != next) {
+ if (ofproto->ofproto_class->set_frag_handling(ofproto, next)) {
+ ofproto->frag_handling = next;
+ } else {
+ VLOG_WARN_RL(&rl, "%s: unsupported fragment handling mode %s",
+ ofproto->name,
+ ofputil_frag_handling_to_string(next));
+ }
}
}
#! /usr/bin/perl
-# Copyright (c) 2009, 2010 Nicira Networks.
+# Copyright (c) 2009, 2010, 2011 Nicira Networks.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
} else {
die;
}
- if ($attrs{IP_FRAGMENT} ne 'no') {
+ if ($attrs{IP_FRAGMENT} ne 'no' && $attrs{IP_FRAGMENT} ne 'first') {
$flow{TP_SRC} = $flow{TP_DST} = 0;
}
} elsif ($attrs{DL_TYPE} eq 'non-ip') {
if ($attrs{TP_PROTO} =~ '^TCP') {
my $tcp = pack('nnNNnnnn',
- $flow{TP_SRC}, # source port
- $flow{TP_DST}, # dest port
- 87123455, # seqno
- 712378912, # ackno
+ $flow{TP_SRC}, # source port
+ $flow{TP_DST}, # dest port
+ 87123455, # seqno
+ 712378912, # ackno
(5 << 12) | 0x02 | 0x10, # hdrlen, SYN, ACK
5823, # window size
18923, # checksum
- 12893); # urgent pointer
+ 12893); # urgent pointer
if ($attrs{TP_PROTO} eq 'TCP+options') {
substr($tcp, 12, 2) = pack('n', (6 << 12) | 0x02 | 0x10);
$tcp .= pack('CCn', 2, 4, 1975); # MSS option
$ip .= $udp;
} elsif ($attrs{TP_PROTO} eq 'ICMP') {
$ip .= pack('CCnnn',
- 8, # echo request
- 0, # code
- 0, # checksum
- 736, # identifier
- 931); # sequence number
+ 8, # echo request
+ 0, # code
+ 0, # checksum
+ 736, # identifier
+ 931); # sequence number
} elsif ($attrs{TP_PROTO} eq 'other') {
$ip .= 'other header';
} else {
die;
}
-
substr($ip, 2, 2) = pack('n', length($ip));
$packet .= $ip;
}
AT_DATA([odp-base.txt], [dnl
in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15)
in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0),tcp(src=80,dst=8080)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0),udp(src=81,dst=6632)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0),icmp(type=1,code=2)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=6,tos=0),tcp(src=80,dst=8080)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=17,tos=0),udp(src=6630,dst=22)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=1,code=2)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=135,code=0),nd(target=::3)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=first)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=later)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,frag=no),tcp(src=80,dst=8080)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,frag=no),udp(src=81,dst=6632)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,frag=no),icmp(type=1,code=2)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=first)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=later)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=6,tos=0,frag=no),tcp(src=80,dst=8080)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=17,tos=0,frag=no),udp(src=6630,dst=22)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=1,code=2)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)
])
echo
echo '# Valid forms with tun_id and VLAN headers.'
sed 's/^/tun_id(0xfedcba9876543210),/
-s/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt) > odp.txt
+s/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt
+
+ echo
+ echo '# Valid forms with IP first fragment.'
+sed -n 's/,frag=no),/,frag=first),/p' odp-base.txt
+
+ echo
+ echo '# Valid forms with IP later fragment.'
+sed -n 's/,frag=no),.*/,frag=later)/p' odp-base.txt) > odp.txt
AT_CAPTURE_FILE([odp.txt])
AT_CHECK_UNQUOTED([test-odp < odp.txt], [0], [`cat odp.txt`
])
table=1 in_port=3 priority=1500 icmp actions=output(14),resubmit(,2)
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
AT_CHECK([tail -1 stdout], [0],
[Datapath actions: 10,11,12,13,14,15,16,17,18,19,20,21
])
in_port=11,reg2=0xeef22dea actions=output:22
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
AT_CHECK([tail -1 stdout], [0],
[Datapath actions: 20,21,22
])
in_port=7 actions=load:0x110000ff->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]]
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
AT_CHECK([tail -1 stdout], [0],
[Datapath actions: 9,55,10,55,66,11,77,88
])
in_port=5 actions=set_tunnel:5
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'tun_id(0x1),in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'tun_id(0x1),in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
AT_CHECK([tail -1 stdout], [0],
[Datapath actions: set(tun_id(0x1)),1,2,set(tun_id(0x3)),3,4
])
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - fragment handling])
+OFPROTO_START
+AT_DATA([flows.txt], [dnl
+priority=75 tcp ip_frag=no tp_dst=80 actions=output:1
+priority=75 tcp ip_frag=first tp_dst=80 actions=output:2
+priority=75 tcp ip_frag=later tp_dst=80 actions=output:3
+priority=50 tcp ip_frag=no actions=output:4
+priority=50 tcp ip_frag=first actions=output:5
+priority=50 tcp ip_frag=later actions=output:6
+])
+AT_CHECK([ovs-ofctl replace-flows br0 flows.txt])
+
+base_flow="in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0"
+no_flow="$base_flow,frag=no),tcp(src=12345,dst=80)"
+first_flow="$base_flow,frag=first),tcp(src=12345,dst=80)"
+later_flow="$base_flow,frag=later)"
+
+ # mode no first later
+for tuple in \
+ 'normal 1 5 6' \
+ 'drop 1 drop drop' \
+ 'nx-match 1 2 6'
+do
+ set $tuple
+ mode=$1
+ no=$2
+ first=$3
+ later=$4
+
+ AT_CHECK([ovs-ofctl set-frags br0 $mode])
+ for type in no first later; do
+ eval flow=\$${type}_flow exp_output=\$$type
+ AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 "$flow"],
+ [0], [stdout])
+ AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $exp_output
+])
+ done
+done
+OFPROTO_STOP
+AT_CLEANUP
NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3b) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4)
NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4)
+# IPv4 fragments.
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/fd)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(03/03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(f3)
+
+# IPv6 fragments.
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/03)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/01)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(03/03)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(f3)
+
# Tunnel ID.
NXM_NX_TUN_ID(00000000abcdef01)
NXM_NX_TUN_ID_W(84200000abcdef01/84200000FFFFFFFF)
nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ)
nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ)
+# IPv4 fragments.
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/01)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03)
+nx_pull_match() returned error 44010102 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_VALUE)
+
+# IPv6 fragments.
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/01)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03)
+nx_pull_match() returned error 44010102 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_VALUE)
+
# Tunnel ID.
NXM_NX_TUN_ID(00000000abcdef01)
NXM_NX_TUN_ID_W(84200000abcdef01/84200000ffffffff)
CLS_FIELD(FWW_DL_SRC, dl_src, DL_SRC) \
CLS_FIELD(FWW_DL_DST | FWW_ETH_MCAST, dl_dst, DL_DST) \
CLS_FIELD(FWW_NW_PROTO, nw_proto, NW_PROTO) \
- CLS_FIELD(FWW_NW_TOS, nw_tos, NW_TOS)
+ CLS_FIELD(0, tos_frag, TOS_FRAG)
/* Field indexes.
*
& wild->wc.vlan_tci_mask);
} else if (f_idx == CLS_F_IDX_TUN_ID) {
eq = !((fixed->tun_id ^ wild->flow.tun_id) & wild->wc.tun_id_mask);
+ } else if (f_idx == CLS_F_IDX_TOS_FRAG) {
+ eq = !((fixed->tos_frag ^ wild->flow.tos_frag)
+ & wild->wc.tos_frag_mask);
} else {
NOT_REACHED();
}
static uint8_t dl_dst_values[][6] = { { 0x4a, 0x27, 0x71, 0xae, 0x64, 0xc1 },
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } };
static uint8_t nw_proto_values[] = { IPPROTO_TCP, IPPROTO_ICMP };
-static uint8_t nw_tos_values[] = { 49, 0 };
+static uint8_t tos_frag_values[] = { 48, 0 };
static void *values[CLS_N_FIELDS][2];
values[CLS_F_IDX_NW_PROTO][0] = &nw_proto_values[0];
values[CLS_F_IDX_NW_PROTO][1] = &nw_proto_values[1];
- values[CLS_F_IDX_NW_TOS][0] = &nw_tos_values[0];
- values[CLS_F_IDX_NW_TOS][1] = &nw_tos_values[1];
+ values[CLS_F_IDX_TOS_FRAG][0] = &tos_frag_values[0];
+ values[CLS_F_IDX_TOS_FRAG][1] = &tos_frag_values[1];
values[CLS_F_IDX_TP_SRC][0] = &tp_src_values[0];
values[CLS_F_IDX_TP_SRC][1] = &tp_src_values[1];
#define N_DL_SRC_VALUES ARRAY_SIZE(dl_src_values)
#define N_DL_DST_VALUES ARRAY_SIZE(dl_dst_values)
#define N_NW_PROTO_VALUES ARRAY_SIZE(nw_proto_values)
-#define N_NW_TOS_VALUES ARRAY_SIZE(nw_tos_values)
+#define N_TOS_FRAG_VALUES ARRAY_SIZE(tos_frag_values)
#define N_FLOW_VALUES (N_NW_SRC_VALUES * \
N_NW_DST_VALUES * \
N_DL_SRC_VALUES * \
N_DL_DST_VALUES * \
N_NW_PROTO_VALUES * \
- N_NW_TOS_VALUES)
+ N_TOS_FRAG_VALUES)
static unsigned int
get_value(unsigned int *x, unsigned n_values)
memcpy(flow.dl_dst, dl_dst_values[get_value(&x, N_DL_DST_VALUES)],
ETH_ADDR_LEN);
flow.nw_proto = nw_proto_values[get_value(&x, N_NW_PROTO_VALUES)];
- flow.nw_tos = nw_tos_values[get_value(&x, N_NW_TOS_VALUES)];
+ flow.tos_frag = tos_frag_values[get_value(&x, N_TOS_FRAG_VALUES)];
cr0 = classifier_lookup(cls, &flow);
cr1 = tcls_lookup(tcls, &flow);
rule->cls_rule.wc.vlan_tci_mask = htons(UINT16_MAX);
} else if (f_idx == CLS_F_IDX_TUN_ID) {
rule->cls_rule.wc.tun_id_mask = htonll(UINT64_MAX);
+ } else if (f_idx == CLS_F_IDX_TOS_FRAG) {
+ rule->cls_rule.wc.tos_frag_mask = UINT8_MAX;
} else {
NOT_REACHED();
}
printf("Packet:\n");
ofp_print_packet(stdout, packet->data, packet->size, packet->size);
ovs_hex_dump(stdout, packet->data, packet->size, 0, true);
+ cls_rule_print(&rule);
printf("Expected flow:\n%s\n", exp_s);
printf("Actually extracted flow:\n%s\n", got_s);
+ ovs_hex_dump(stdout, &expected_match, sizeof expected_match, 0, false);
+ ovs_hex_dump(stdout, &extracted_match, sizeof extracted_match, 0, false);
printf("\n");
free(exp_s);
free(got_s);
printf("%s:\n", dpif_name(dpif));
if (!dpif_get_dp_stats(dpif, &stats)) {
- printf("\tlookups: frags:%"PRIu64, stats.n_frags);
- printf(" hit:%"PRIu64, stats.n_hit);
- printf(" missed:%"PRIu64, stats.n_missed);
- printf(" lost:%"PRIu64"\n", stats.n_lost);
-
- printf("\tflows: %"PRIu64"\n", stats.n_flows);
+ printf("\tlookups: hit:%"PRIu64" missed:%"PRIu64" lost:%"PRIu64"\n"
+ "\tflows: %"PRIu64"\n",
+ stats.n_hit, stats.n_missed, stats.n_lost, stats.n_flows);
}
DPIF_PORT_FOR_EACH (&dpif_port, &dump, dpif) {
printf("\tport %u: %s", dpif_port.port_no, dpif_port.name);
.
.RE
.
+.IP "\fBget\-frags \fIswitch\fR"
+Prints \fIswitch\fR's fragment handling mode. See \fBset\-frags\fR,
+below, for a description of each fragment handling mode.
+.IP
+The \fBshow\fR command also prints the fragment handling mode among
+its other output.
+.
+.IP "\fBset\-frags \fIswitch frag_mode\fR"
+Configures \fIswitch\fR's treatment of IPv4 and IPv6 fragments. The
+choices for \fIfrag_mode\fR are:
+.RS
+.IP "\fBnormal\fR"
+Fragments pass through the flow table like non-fragmented packets.
+The TCP ports, UDP ports, and ICMP type and code fields are always set
+to 0, even for fragments where that information would otherwise be
+available (fragments with offset 0). This is the default fragment
+handling mode for an OpenFlow switch.
+.IP "\fBdrop\fR"
+Fragments are dropped without passing through the flow table.
+.IP "\fBreassemble\fR"
+The switch reassembles fragments into full IP packets before passing
+them through the flow table. Open vSwitch does not implement this
+fragment handling mode.
+.IP "\fBnx\-match\fR"
+Fragments pass through the flow table like non-fragmented packets.
+The TCP ports, UDP ports, and ICMP type and code fields are available
+for matching for fragments with offset 0, and set to 0 in fragments
+with nonzero offset. This mode is a Nicira extension.
+.RE
+.IP
+See the description of \fBip_frag\fR, below, for a way to match on
+whether a packet is a fragment and on its fragment offset.
+.
.TP
\fBdump\-flows \fIswitch \fR[\fIflows\fR]
Prints to the console all flow entries in \fIswitch\fR's
Some of these matching possibilities can also be achieved with
\fBdl_vlan\fR and \fBdl_vlan_pcp\fR.
.
+.IP \fBip_frag=\fIfrag_type\fR
+When \fBdl_type\fR specifies IP or IPv6, \fIfrag_type\fR
+specifies what kind of IP fragments or non-fragments to match. The
+following values of \fIfrag_type\fR are supported:
+.RS
+.IP "\fBno\fR"
+Matches only non-fragmented packets.
+.IP "\fByes\fR"
+Matches all fragments.
+.IP "\fBfirst\fR"
+Matches only fragments with offset 0.
+.IP "\fBlater\fR"
+Matches only fragments with nonzero offset.
+.IP "\fBnot_later\fR"
+Matches non-fragmented packets and fragments with zero offset.
+.RE
+.IP
+The \fBip_frag\fR match type is likely to be most useful in
+\fBnx\-match\fR mode. See the description of the \fBset\-frags\fR
+command, above, for more details.
+.
.IP \fBarp_sha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR
.IQ \fBarp_tha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR
When \fBdl_type\fR specifies ARP, \fBarp_sha\fR and \fBarp_tha\fR match
" dump-desc SWITCH print switch description\n"
" dump-tables SWITCH print table stats\n"
" mod-port SWITCH IFACE ACT modify port behavior\n"
+ " get-frags SWITCH print fragment handling behavior\n"
+ " set-frags SWITCH FRAG_MODE set fragment handling behavior\n"
" dump-ports SWITCH [PORT] print port statistics\n"
" dump-flows SWITCH print all flow entries\n"
" dump-flows SWITCH FLOW print matching FLOWs\n"
/* Sends 'request', which should be a request that only has a reply if an error
* occurs, and waits for it to succeed or fail. If an error does occur, prints
- * it and exits with an error. */
+ * it and exits with an error.
+ *
+ * Destroys all of the 'requests'. */
static void
transact_multiple_noreply(struct vconn *vconn, struct list *requests)
{
/* Sends 'request', which should be a request that only has a reply if an error
* occurs, and waits for it to succeed or fail. If an error does occur, prints
- * it and exits with an error. */
+ * it and exits with an error.
+ *
+ * Destroys 'request'. */
static void
transact_noreply(struct vconn *vconn, struct ofpbuf *request)
{
transact_multiple_noreply(vconn, &requests);
}
+static void
+fetch_switch_config(struct vconn *vconn, struct ofp_switch_config *config_)
+{
+ struct ofp_switch_config *config;
+ struct ofp_header *header;
+ struct ofpbuf *request;
+ struct ofpbuf *reply;
+
+ make_openflow(sizeof(struct ofp_header), OFPT_GET_CONFIG_REQUEST,
+ &request);
+ run(vconn_transact(vconn, request, &reply),
+ "talking to %s", vconn_get_name(vconn));
+
+ header = reply->data;
+ if (header->type != OFPT_GET_CONFIG_REPLY ||
+ header->length != htons(sizeof *config)) {
+ ovs_fatal(0, "%s: bad reply to config request", vconn_get_name(vconn));
+ }
+
+ config = reply->data;
+ *config_ = *config;
+}
+
+static void
+set_switch_config(struct vconn *vconn, struct ofp_switch_config *config_)
+{
+ struct ofp_switch_config *config;
+ struct ofp_header save_header;
+ struct ofpbuf *request;
+
+ config = make_openflow(sizeof *config, OFPT_SET_CONFIG, &request);
+ save_header = config->header;
+ *config = *config_;
+ config->header = save_header;
+
+ transact_noreply(vconn, request);
+}
+
static void
do_show(int argc OVS_UNUSED, char *argv[])
{
open_vconn(argv[1], &vconn);
if (argc > 2) {
- int miss_send_len = atoi(argv[2]);
- struct ofp_switch_config *osc;
- struct ofpbuf *buf;
+ struct ofp_switch_config config;
- osc = make_openflow(sizeof *osc, OFPT_SET_CONFIG, &buf);
- osc->miss_send_len = htons(miss_send_len);
- transact_noreply(vconn, buf);
+ fetch_switch_config(vconn, &config);
+ config.miss_send_len = htons(atoi(argv[2]));
+ set_switch_config(vconn, &config);
}
monitor_vconn(vconn);
}
vconn_close(vconn);
}
+static void
+do_get_frags(int argc OVS_UNUSED, char *argv[])
+{
+ struct ofp_switch_config config;
+ struct vconn *vconn;
+
+ open_vconn(argv[1], &vconn);
+ fetch_switch_config(vconn, &config);
+ puts(ofputil_frag_handling_to_string(ntohs(config.flags)));
+ vconn_close(vconn);
+}
+
+static void
+do_set_frags(int argc OVS_UNUSED, char *argv[])
+{
+ struct ofp_switch_config config;
+ enum ofp_config_flags mode;
+ struct vconn *vconn;
+ ovs_be16 flags;
+
+ if (!ofputil_frag_handling_from_string(argv[2], &mode)) {
+ ovs_fatal(0, "%s: unknown fragment handling mode", argv[2]);
+ }
+
+ open_vconn(argv[1], &vconn);
+ fetch_switch_config(vconn, &config);
+ flags = htons(mode) | (config.flags & htons(~OFPC_FRAG_MASK));
+ if (flags != config.flags) {
+ /* Set the configuration. */
+ config.flags = flags;
+ set_switch_config(vconn, &config);
+
+ /* Then retrieve the configuration to see if it really took. OpenFlow
+ * doesn't define error reporting for bad modes, so this is all we can
+ * do. */
+ fetch_switch_config(vconn, &config);
+ if (flags != config.flags) {
+ ovs_fatal(0, "%s: setting fragment handling mode failed (this "
+ "switch probably doesn't support mode \"%s\")",
+ argv[1], ofputil_frag_handling_to_string(mode));
+ }
+ }
+ vconn_close(vconn);
+}
+
static void
do_ping(int argc, char *argv[])
{
{ "diff-flows", 2, 2, do_diff_flows },
{ "dump-ports", 1, 2, do_dump_ports },
{ "mod-port", 3, 3, do_mod_port },
+ { "get-frags", 1, 1, do_get_frags },
+ { "set-frags", 2, 2, do_set_frags },
{ "probe", 1, 1, do_probe },
{ "ping", 1, 2, do_ping },
{ "benchmark", 3, 3, do_benchmark },