datapath: add ipv6 'set' action
authorAnsis Atteka <aatteka@nicira.com>
Mon, 5 Nov 2012 13:53:32 +0000 (15:53 +0200)
committerAnsis Atteka <aatteka@nicira.com>
Tue, 13 Nov 2012 11:51:59 +0000 (13:51 +0200)
This patch adds ipv6 set action functionality. It allows to change
traffic class, flow label, hop-limit, ipv6 source and destination
address fields.

Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Ansis Atteka <aatteka@nicira.com>
NEWS
datapath/actions.c
datapath/checksum.h
datapath/datapath.c
debian/changelog
lib/csum.c
lib/csum.h
lib/dpif-netdev.c
lib/packets.c
lib/packets.h

diff --git a/NEWS b/NEWS
index 646ce30b5a5aa2704c755d11c10194a240722d15..037296574b90a8f6e48ca2d929f5a837c2ba94e6 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ v1.9.0 - xx xxx xxxx
     - The tunneling code no longer assumes input and output keys are symmetric.
       If they are not, PMTUD needs to be disabled for tunneling to work. Note
       this only applies to flow-based keys.
+    - Datapath: Support for ipv6 set action.
     - FreeBSD is now a supported platform, thanks to code contributions from
       Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
     - ovs-bugtool: New --ovs option to report only OVS related information.
index 8ec692d1d3d61c03aba0628393eff1ce8acc7efa..76c9823a5259268e8e5a62aa0748ef3f279bfc8d 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 #include <net/ip.h>
+#include <net/ipv6.h>
 #include <net/checksum.h>
 #include <net/dsfield.h>
 
@@ -166,6 +167,54 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
        *addr = new_addr;
 }
 
+static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
+                                __be32 addr[4], const __be32 new_addr[4])
+{
+       int transport_len = skb->len - skb_transport_offset(skb);
+
+       if (l4_proto == IPPROTO_TCP) {
+               if (likely(transport_len >= sizeof(struct tcphdr)))
+                       inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
+                                                 addr, new_addr, 1);
+       } else if (l4_proto == IPPROTO_UDP) {
+               if (likely(transport_len >= sizeof(struct udphdr))) {
+                       struct udphdr *uh = udp_hdr(skb);
+
+                       if (uh->check ||
+                           get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
+                               inet_proto_csum_replace16(&uh->check, skb,
+                                                         addr, new_addr, 1);
+                               if (!uh->check)
+                                       uh->check = CSUM_MANGLED_0;
+                       }
+               }
+       }
+}
+
+static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
+                         __be32 addr[4], const __be32 new_addr[4],
+                         bool recalculate_csum)
+{
+       if (recalculate_csum)
+               update_ipv6_checksum(skb, l4_proto, addr, new_addr);
+
+       skb_clear_rxhash(skb);
+       memcpy(addr, new_addr, sizeof(__be32[4]));
+}
+
+static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+{
+       nh->priority = tc >> 4;
+       nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+}
+
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+{
+       nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
+       nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
+       nh->flow_lbl[2] = fl & 0x000000FF;
+}
+
 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
 {
        csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
@@ -199,6 +248,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
        return 0;
 }
 
+static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+{
+       struct ipv6hdr *nh;
+       int err;
+       __be32 *saddr;
+       __be32 *daddr;
+
+       err = make_writable(skb, skb_network_offset(skb) +
+                           sizeof(struct ipv6hdr));
+       if (unlikely(err))
+               return err;
+
+       nh = ipv6_hdr(skb);
+       saddr = (__be32 *)&nh->saddr;
+       daddr = (__be32 *)&nh->daddr;
+
+       if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+               set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
+                             ipv6_key->ipv6_src, true);
+
+       if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+               unsigned int offset = 0;
+               int flags = OVS_IP6T_FH_F_SKIP_RH;
+               bool recalc_csum = true;
+
+               if (ipv6_ext_hdr(nh->nexthdr))
+                       recalc_csum = ipv6_find_hdr(skb, &offset,
+                                                   NEXTHDR_ROUTING, NULL,
+                                                   &flags) != NEXTHDR_ROUTING;
+
+               set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
+                             ipv6_key->ipv6_dst, recalc_csum);
+       }
+
+       set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
+       set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
+       nh->hop_limit = ipv6_key->ipv6_hlimit;
+
+       return 0;
+}
+
 /* Must follow make_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
                         __be16 new_port, __sum16 *check)
@@ -373,6 +463,10 @@ static int execute_set_action(struct sk_buff *skb,
                err = set_ipv4(skb, nla_data(nested_attr));
                break;
 
+       case OVS_KEY_ATTR_IPV6:
+               err = set_ipv6(skb, nla_data(nested_attr));
+               break;
+
        case OVS_KEY_ATTR_TCP:
                err = set_tcp(skb, nla_data(nested_attr));
                break;
index 2f2ffeeb2e66ebaf7b30ebec0dee385bf3bb425c..a440c59985ca94a17c5dac620248b345767a5404 100644 (file)
@@ -102,6 +102,30 @@ static inline void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 }
 #endif
 
+#if defined(NEED_CSUM_NORMALIZE) || LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+#define inet_proto_csum_replace16 rpl_inet_proto_csum_replace16
+static inline void inet_proto_csum_replace16(__sum16 *sum,
+                                            struct sk_buff *skb,
+                                            const __be32 *from,
+                                            const __be32 *to,
+                                            int pseudohdr)
+{
+       __be32 diff[] = {
+               ~from[0], ~from[1], ~from[2], ~from[3],
+               to[0], to[1], to[2], to[3],
+       };
+       if (get_ip_summed(skb) != OVS_CSUM_PARTIAL) {
+               *sum = csum_fold(csum_partial(diff, sizeof(diff),
+                                ~csum_unfold(*sum)));
+               if (get_ip_summed(skb) == OVS_CSUM_COMPLETE && pseudohdr)
+                       skb->csum = ~csum_partial(diff, sizeof(diff),
+                                                 ~skb->csum);
+       } else if (pseudohdr)
+               *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+                                 csum_unfold(*sum)));
+}
+#endif
+
 #ifdef NEED_CSUM_NORMALIZE
 static inline void update_csum_start(struct sk_buff *skb, int delta)
 {
index e88b1da240067e507bab8961093940829bf85707..e359ac034039b683985b3b80a1ec4e8fd5eadcaa 100644 (file)
@@ -588,6 +588,7 @@ static int validate_set(const struct nlattr *a,
        switch (key_type) {
        const struct ovs_key_ipv4 *ipv4_key;
        const struct ovs_key_ipv4_tunnel *tun_key;
+       const struct ovs_key_ipv6 *ipv6_key;
 
        case OVS_KEY_ATTR_PRIORITY:
        case OVS_KEY_ATTR_TUN_ID:
@@ -616,6 +617,25 @@ static int validate_set(const struct nlattr *a,
 
                break;
 
+       case OVS_KEY_ATTR_IPV6:
+               if (flow_key->eth.type != htons(ETH_P_IPV6))
+                       return -EINVAL;
+
+               if (!flow_key->ip.proto)
+                       return -EINVAL;
+
+               ipv6_key = nla_data(ovs_key);
+               if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+                       return -EINVAL;
+
+               if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+                       return -EINVAL;
+
+               if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+                       return -EINVAL;
+
+               break;
+
        case OVS_KEY_ATTR_TCP:
                if (flow_key->ip.proto != IPPROTO_TCP)
                        return -EINVAL;
index ac5f158a6a094f93081ea90972706843f7618fde..b9518fb7b10c2dd5ddaa74c7077760fa95a5f588 100644 (file)
@@ -11,6 +11,7 @@ openvswitch (1.9.0-1) unstable; urgency=low
     - The tunneling code no longer assumes input and output keys are symmetric.
       If they are not, PMTUD needs to be disabled for tunneling to work. Note
       this only applies to flow-based keys.
+    - Datapath: Support for ipv6 set action.
     - FreeBSD is now a supported platform, thanks to code contributions from
       Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
     - ovs-bugtool: New --ovs option to report only OVS related information.
index 98a83de2eafa56e96f17cd525579dd89fa3505f7..fb32a530dfce8d9935b78cb24906dfc7df2ce2c8 100644 (file)
@@ -112,6 +112,21 @@ recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32)
                          old_u32 >> 16, new_u32 >> 16);
 }
 
+/* Returns the new checksum for a packet in which the checksum field previously
+ * contained 'old_csum' and in which a field that contained 'old_u32[4]' was
+ * changed to contain 'new_u32[4]'. */
+ovs_be16
+recalc_csum128(ovs_be16 old_csum, ovs_be32 old_u32[4],
+               const ovs_be32 new_u32[4])
+{
+    ovs_be16 new_csum = old_csum;
+    int i;
+
+    for (i = 0; i < 4; ++i) {
+        new_csum = recalc_csum32(new_csum, old_u32[i], new_u32[i]);
+    }
+    return new_csum;
+}
 #else  /* __CHECKER__ */
 /* Making sparse happy with these functions also makes them unreadable, so
  * don't bother to show it their implementations. */
index 12402d703f27bc772a95cf47ca48878b07816090..6382d298c545259d578fcd9885552b415975827a 100644 (file)
@@ -28,5 +28,7 @@ uint32_t csum_continue(uint32_t partial, const void *, size_t);
 ovs_be16 csum_finish(uint32_t partial);
 ovs_be16 recalc_csum16(ovs_be16 old_csum, ovs_be16 old_u16, ovs_be16 new_u16);
 ovs_be16 recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32);
+ovs_be16 recalc_csum128(ovs_be16 old_csum, ovs_be32 old_u32[4],
+                        const ovs_be32 new_u32[4]);
 
 #endif /* csum.h */
index a80b1b062df4ed5168b8ab089670f75c54cd66ac..4ce4147c06c483e19b606cf4cb9688413c57301d 100644 (file)
@@ -1181,13 +1181,13 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
 {
     enum ovs_key_attr type = nl_attr_type(a);
     const struct ovs_key_ipv4 *ipv4_key;
+    const struct ovs_key_ipv6 *ipv6_key;
     const struct ovs_key_tcp *tcp_key;
     const struct ovs_key_udp *udp_key;
 
     switch (type) {
     case OVS_KEY_ATTR_TUN_ID:
     case OVS_KEY_ATTR_PRIORITY:
-    case OVS_KEY_ATTR_IPV6:
     case OVS_KEY_ATTR_IPV4_TUNNEL:
         /* not implemented */
         break;
@@ -1203,6 +1203,13 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
                         ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl);
         break;
 
+    case OVS_KEY_ATTR_IPV6:
+        ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6));
+        packet_set_ipv6(packet, ipv6_key->ipv6_proto, ipv6_key->ipv6_src,
+                        ipv6_key->ipv6_dst, ipv6_key->ipv6_tclass,
+                        ipv6_key->ipv6_label, ipv6_key->ipv6_hlimit);
+        break;
+
     case OVS_KEY_ATTR_TCP:
         tcp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp));
         packet_set_tcp_port(packet, tcp_key->tcp_src, tcp_key->tcp_dst);
index 16f4fe63afe254f93e2678c4771013c9b7ac3566..fa73b50a043246a3944fbd4bd8e216636216668f 100644 (file)
@@ -20,6 +20,7 @@
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <netinet/ip6.h>
 #include <stdlib.h>
 #include "byte-order.h"
 #include "csum.h"
@@ -472,6 +473,133 @@ packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr)
     *addr = new_addr;
 }
 
+/* Returns true, if packet contains at least one routing header where
+ * segements_left > 0.
+ *
+ * This function assumes that L3 and L4 markers are set in the packet. */
+static bool
+packet_rh_present(struct ofpbuf *packet)
+{
+    const struct ip6_hdr *nh;
+    int nexthdr;
+    size_t len;
+    size_t remaining;
+    uint8_t *data = packet->l3;
+
+    remaining = (uint8_t *)packet->l4 - (uint8_t *)packet->l3;
+
+    if (remaining < sizeof *nh) {
+        return false;
+    }
+    nh = (struct ip6_hdr *)data;
+    data += sizeof *nh;
+    remaining -= sizeof *nh;
+    nexthdr = nh->ip6_nxt;
+
+    while (1) {
+        if ((nexthdr != IPPROTO_HOPOPTS)
+                && (nexthdr != IPPROTO_ROUTING)
+                && (nexthdr != IPPROTO_DSTOPTS)
+                && (nexthdr != IPPROTO_AH)
+                && (nexthdr != IPPROTO_FRAGMENT)) {
+            /* It's either a terminal header (e.g., TCP, UDP) or one we
+             * don't understand.  In either case, we're done with the
+             * packet, so use it to fill in 'nw_proto'. */
+            break;
+        }
+
+        /* We only verify that at least 8 bytes of the next header are
+         * available, but many of these headers are longer.  Ensure that
+         * accesses within the extension header are within those first 8
+         * bytes. All extension headers are required to be at least 8
+         * bytes. */
+        if (remaining < 8) {
+            return false;
+        }
+
+        if (nexthdr == IPPROTO_AH) {
+            /* A standard AH definition isn't available, but the fields
+             * we care about are in the same location as the generic
+             * option header--only the header length is calculated
+             * differently. */
+            const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
+
+            nexthdr = ext_hdr->ip6e_nxt;
+            len = (ext_hdr->ip6e_len + 2) * 4;
+        } else if (nexthdr == IPPROTO_FRAGMENT) {
+            const struct ip6_frag *frag_hdr = (struct ip6_frag *)data;
+
+            nexthdr = frag_hdr->ip6f_nxt;
+            len = sizeof *frag_hdr;
+        } else if (nexthdr == IPPROTO_ROUTING) {
+            const struct ip6_rthdr *rh = (struct ip6_rthdr *)data;
+
+            if (rh->ip6r_segleft > 0) {
+                return true;
+            }
+
+            nexthdr = rh->ip6r_nxt;
+            len = (rh->ip6r_len + 1) * 8;
+        } else {
+            const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
+
+            nexthdr = ext_hdr->ip6e_nxt;
+            len = (ext_hdr->ip6e_len + 1) * 8;
+        }
+
+        if (remaining < len) {
+            return false;
+        }
+        remaining -= len;
+        data += len;
+    }
+
+    return false;
+}
+
+static void
+packet_update_csum128(struct ofpbuf *packet, uint8_t proto,
+                     ovs_be32 addr[4], const ovs_be32 new_addr[4])
+{
+    if (proto == IPPROTO_TCP && packet->l7) {
+        struct tcp_header *th = packet->l4;
+
+        th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+    } else if (proto == IPPROTO_UDP && packet->l7) {
+        struct udp_header *uh = packet->l4;
+
+        if (uh->udp_csum) {
+            uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+            if (!uh->udp_csum) {
+                uh->udp_csum = htons(0xffff);
+            }
+        }
+    }
+}
+
+static void
+packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto,
+                     struct in6_addr *addr, const ovs_be32 new_addr[4],
+                     bool recalculate_csum)
+{
+    if (recalculate_csum) {
+        packet_update_csum128(packet, proto, (ovs_be32 *)addr, new_addr);
+    }
+    memcpy(addr, new_addr, sizeof(*addr));
+}
+
+static void
+packet_set_ipv6_flow_label(ovs_be32 *flow_label, ovs_be32 flow_key)
+{
+    *flow_label = (*flow_label & htonl(~IPV6_LABEL_MASK)) | flow_key;
+}
+
+static void
+packet_set_ipv6_tc(ovs_be32 *flow_label, uint8_t tc)
+{
+    *flow_label = (*flow_label & htonl(0xF00FFFFF)) | htonl(tc << 20);
+}
+
 /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src',
  * 'dst', 'tos', and 'ttl'.  Updates 'packet''s L4 checksums as appropriate.
  * 'packet' must contain a valid IPv4 packet with correctly populated l[347]
@@ -507,6 +635,33 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst,
     }
 }
 
+/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src',
+ * 'dst', 'traffic class', and 'next hop'.  Updates 'packet''s L4 checksums as
+ * appropriate. 'packet' must contain a valid IPv6 packet with correctly
+ * populated l[347] markers. */
+void
+packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4],
+                const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl,
+                uint8_t key_hl)
+{
+    struct ip6_hdr *nh = packet->l3;
+
+    if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) {
+        packet_set_ipv6_addr(packet, proto, &nh->ip6_src, src, true);
+    }
+
+    if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) {
+        packet_set_ipv6_addr(packet, proto, &nh->ip6_dst, dst,
+                             !packet_rh_present(packet));
+    }
+
+    packet_set_ipv6_tc(&nh->ip6_flow, key_tc);
+
+    packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl);
+
+    nh->ip6_hlim = key_hl;
+}
+
 static void
 packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
 {
index e550be0ba9bf34bcf698ebeddb28ff8081a63083..4ad527bafb9ceb8edc83bfbc8608ec0368f4077f 100644 (file)
@@ -490,6 +490,9 @@ void *snap_compose(struct ofpbuf *, const uint8_t eth_dst[ETH_ADDR_LEN],
                    unsigned int oui, uint16_t snap_type, size_t size);
 void packet_set_ipv4(struct ofpbuf *, ovs_be32 src, ovs_be32 dst, uint8_t tos,
                      uint8_t ttl);
+void packet_set_ipv6(struct ofpbuf *, uint8_t proto, const ovs_be32 src[4],
+                     const ovs_be32 dst[4], uint8_t tc,
+                     uint32_t fl, uint8_t hlmit);
 void packet_set_tcp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);
 void packet_set_udp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);