From: Ansis Atteka Date: Mon, 5 Nov 2012 13:53:32 +0000 (+0200) Subject: datapath: add ipv6 'set' action X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bc7a5acdff087b7e7a162da42ae608a83f3cf902;p=openvswitch datapath: add ipv6 'set' action This patch adds ipv6 set action functionality. It allows to change traffic class, flow label, hop-limit, ipv6 source and destination address fields. Acked-by: Jesse Gross Signed-off-by: Ansis Atteka --- diff --git a/NEWS b/NEWS index 646ce30b..03729657 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,7 @@ v1.9.0 - xx xxx xxxx - The tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Note this only applies to flow-based keys. + - Datapath: Support for ipv6 set action. - FreeBSD is now a supported platform, thanks to code contributions from Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. diff --git a/datapath/actions.c b/datapath/actions.c index 8ec692d1..76c9823a 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -166,6 +167,54 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, *addr = new_addr; } +static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, + __be32 addr[4], const __be32 new_addr[4]) +{ + int transport_len = skb->len - skb_transport_offset(skb); + + if (l4_proto == IPPROTO_TCP) { + if (likely(transport_len >= sizeof(struct tcphdr))) + inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, + addr, new_addr, 1); + } else if (l4_proto == IPPROTO_UDP) { + if (likely(transport_len >= sizeof(struct udphdr))) { + struct udphdr *uh = udp_hdr(skb); + + if (uh->check || + get_ip_summed(skb) == OVS_CSUM_PARTIAL) { + inet_proto_csum_replace16(&uh->check, skb, + addr, new_addr, 1); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + } +} + +static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, + __be32 addr[4], const __be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) + update_ipv6_checksum(skb, l4_proto, addr, new_addr); + + skb_clear_rxhash(skb); + memcpy(addr, new_addr, sizeof(__be32[4])); +} + +static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +{ + nh->priority = tc >> 4; + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); +} + +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +{ + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; + nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; + nh->flow_lbl[2] = fl & 0x000000FF; +} + static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) { csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); @@ -199,6 +248,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) return 0; } +static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) +{ + struct ipv6hdr *nh; + int err; + __be32 *saddr; + __be32 *daddr; + + err = make_writable(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + saddr = (__be32 *)&nh->saddr; + daddr = (__be32 *)&nh->daddr; + + if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) + set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, + ipv6_key->ipv6_src, true); + + if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + unsigned int offset = 0; + int flags = OVS_IP6T_FH_F_SKIP_RH; + bool recalc_csum = true; + + if (ipv6_ext_hdr(nh->nexthdr)) + recalc_csum = ipv6_find_hdr(skb, &offset, + NEXTHDR_ROUTING, NULL, + &flags) != NEXTHDR_ROUTING; + + set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, + ipv6_key->ipv6_dst, recalc_csum); + } + + set_ipv6_tc(nh, ipv6_key->ipv6_tclass); + set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); + nh->hop_limit = ipv6_key->ipv6_hlimit; + + return 0; +} + /* Must follow make_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -373,6 +463,10 @@ static int execute_set_action(struct sk_buff *skb, err = set_ipv4(skb, nla_data(nested_attr)); break; + case OVS_KEY_ATTR_IPV6: + err = set_ipv6(skb, nla_data(nested_attr)); + break; + case OVS_KEY_ATTR_TCP: err = set_tcp(skb, nla_data(nested_attr)); break; diff --git a/datapath/checksum.h b/datapath/checksum.h index 2f2ffeeb..a440c599 100644 --- a/datapath/checksum.h +++ b/datapath/checksum.h @@ -102,6 +102,30 @@ static inline void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } #endif +#if defined(NEED_CSUM_NORMALIZE) || LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) +#define inet_proto_csum_replace16 rpl_inet_proto_csum_replace16 +static inline void inet_proto_csum_replace16(__sum16 *sum, + struct sk_buff *skb, + const __be32 *from, + const __be32 *to, + int pseudohdr) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + if (get_ip_summed(skb) != OVS_CSUM_PARTIAL) { + *sum = csum_fold(csum_partial(diff, sizeof(diff), + ~csum_unfold(*sum))); + if (get_ip_summed(skb) == OVS_CSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial(diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial(diff, sizeof(diff), + csum_unfold(*sum))); +} +#endif + #ifdef NEED_CSUM_NORMALIZE static inline void update_csum_start(struct sk_buff *skb, int delta) { diff --git a/datapath/datapath.c b/datapath/datapath.c index e88b1da2..e359ac03 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -588,6 +588,7 @@ static int validate_set(const struct nlattr *a, switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv6 *ipv6_key; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_TUN_ID: @@ -616,6 +617,25 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_IPV6: + if (flow_key->eth.type != htons(ETH_P_IPV6)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv6_key = nla_data(ovs_key); + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) + return -EINVAL; + + break; + case OVS_KEY_ATTR_TCP: if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; diff --git a/debian/changelog b/debian/changelog index ac5f158a..b9518fb7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,6 +11,7 @@ openvswitch (1.9.0-1) unstable; urgency=low - The tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Note this only applies to flow-based keys. + - Datapath: Support for ipv6 set action. - FreeBSD is now a supported platform, thanks to code contributions from Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. diff --git a/lib/csum.c b/lib/csum.c index 98a83de2..fb32a530 100644 --- a/lib/csum.c +++ b/lib/csum.c @@ -112,6 +112,21 @@ recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32) old_u32 >> 16, new_u32 >> 16); } +/* Returns the new checksum for a packet in which the checksum field previously + * contained 'old_csum' and in which a field that contained 'old_u32[4]' was + * changed to contain 'new_u32[4]'. */ +ovs_be16 +recalc_csum128(ovs_be16 old_csum, ovs_be32 old_u32[4], + const ovs_be32 new_u32[4]) +{ + ovs_be16 new_csum = old_csum; + int i; + + for (i = 0; i < 4; ++i) { + new_csum = recalc_csum32(new_csum, old_u32[i], new_u32[i]); + } + return new_csum; +} #else /* __CHECKER__ */ /* Making sparse happy with these functions also makes them unreadable, so * don't bother to show it their implementations. */ diff --git a/lib/csum.h b/lib/csum.h index 12402d70..6382d298 100644 --- a/lib/csum.h +++ b/lib/csum.h @@ -28,5 +28,7 @@ uint32_t csum_continue(uint32_t partial, const void *, size_t); ovs_be16 csum_finish(uint32_t partial); ovs_be16 recalc_csum16(ovs_be16 old_csum, ovs_be16 old_u16, ovs_be16 new_u16); ovs_be16 recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32); +ovs_be16 recalc_csum128(ovs_be16 old_csum, ovs_be32 old_u32[4], + const ovs_be32 new_u32[4]); #endif /* csum.h */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a80b1b06..4ce4147c 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1181,13 +1181,13 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a) { enum ovs_key_attr type = nl_attr_type(a); const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv6 *ipv6_key; const struct ovs_key_tcp *tcp_key; const struct ovs_key_udp *udp_key; switch (type) { case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_PRIORITY: - case OVS_KEY_ATTR_IPV6: case OVS_KEY_ATTR_IPV4_TUNNEL: /* not implemented */ break; @@ -1203,6 +1203,13 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a) ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl); break; + case OVS_KEY_ATTR_IPV6: + ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6)); + packet_set_ipv6(packet, ipv6_key->ipv6_proto, ipv6_key->ipv6_src, + ipv6_key->ipv6_dst, ipv6_key->ipv6_tclass, + ipv6_key->ipv6_label, ipv6_key->ipv6_hlimit); + break; + case OVS_KEY_ATTR_TCP: tcp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp)); packet_set_tcp_port(packet, tcp_key->tcp_src, tcp_key->tcp_dst); diff --git a/lib/packets.c b/lib/packets.c index 16f4fe63..fa73b50a 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "byte-order.h" #include "csum.h" @@ -472,6 +473,133 @@ packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) *addr = new_addr; } +/* Returns true, if packet contains at least one routing header where + * segements_left > 0. + * + * This function assumes that L3 and L4 markers are set in the packet. */ +static bool +packet_rh_present(struct ofpbuf *packet) +{ + const struct ip6_hdr *nh; + int nexthdr; + size_t len; + size_t remaining; + uint8_t *data = packet->l3; + + remaining = (uint8_t *)packet->l4 - (uint8_t *)packet->l3; + + if (remaining < sizeof *nh) { + return false; + } + nh = (struct ip6_hdr *)data; + data += sizeof *nh; + remaining -= sizeof *nh; + nexthdr = nh->ip6_nxt; + + while (1) { + if ((nexthdr != IPPROTO_HOPOPTS) + && (nexthdr != IPPROTO_ROUTING) + && (nexthdr != IPPROTO_DSTOPTS) + && (nexthdr != IPPROTO_AH) + && (nexthdr != IPPROTO_FRAGMENT)) { + /* It's either a terminal header (e.g., TCP, UDP) or one we + * don't understand. In either case, we're done with the + * packet, so use it to fill in 'nw_proto'. */ + break; + } + + /* We only verify that at least 8 bytes of the next header are + * available, but many of these headers are longer. Ensure that + * accesses within the extension header are within those first 8 + * bytes. All extension headers are required to be at least 8 + * bytes. */ + if (remaining < 8) { + return false; + } + + if (nexthdr == IPPROTO_AH) { + /* A standard AH definition isn't available, but the fields + * we care about are in the same location as the generic + * option header--only the header length is calculated + * differently. */ + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 2) * 4; + } else if (nexthdr == IPPROTO_FRAGMENT) { + const struct ip6_frag *frag_hdr = (struct ip6_frag *)data; + + nexthdr = frag_hdr->ip6f_nxt; + len = sizeof *frag_hdr; + } else if (nexthdr == IPPROTO_ROUTING) { + const struct ip6_rthdr *rh = (struct ip6_rthdr *)data; + + if (rh->ip6r_segleft > 0) { + return true; + } + + nexthdr = rh->ip6r_nxt; + len = (rh->ip6r_len + 1) * 8; + } else { + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 1) * 8; + } + + if (remaining < len) { + return false; + } + remaining -= len; + data += len; + } + + return false; +} + +static void +packet_update_csum128(struct ofpbuf *packet, uint8_t proto, + ovs_be32 addr[4], const ovs_be32 new_addr[4]) +{ + if (proto == IPPROTO_TCP && packet->l7) { + struct tcp_header *th = packet->l4; + + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); + } else if (proto == IPPROTO_UDP && packet->l7) { + struct udp_header *uh = packet->l4; + + if (uh->udp_csum) { + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); + if (!uh->udp_csum) { + uh->udp_csum = htons(0xffff); + } + } + } +} + +static void +packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto, + struct in6_addr *addr, const ovs_be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) { + packet_update_csum128(packet, proto, (ovs_be32 *)addr, new_addr); + } + memcpy(addr, new_addr, sizeof(*addr)); +} + +static void +packet_set_ipv6_flow_label(ovs_be32 *flow_label, ovs_be32 flow_key) +{ + *flow_label = (*flow_label & htonl(~IPV6_LABEL_MASK)) | flow_key; +} + +static void +packet_set_ipv6_tc(ovs_be32 *flow_label, uint8_t tc) +{ + *flow_label = (*flow_label & htonl(0xF00FFFFF)) | htonl(tc << 20); +} + /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src', * 'dst', 'tos', and 'ttl'. Updates 'packet''s L4 checksums as appropriate. * 'packet' must contain a valid IPv4 packet with correctly populated l[347] @@ -507,6 +635,33 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, } } +/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src', + * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as + * appropriate. 'packet' must contain a valid IPv6 packet with correctly + * populated l[347] markers. */ +void +packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4], + const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl, + uint8_t key_hl) +{ + struct ip6_hdr *nh = packet->l3; + + if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, &nh->ip6_src, src, true); + } + + if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, &nh->ip6_dst, dst, + !packet_rh_present(packet)); + } + + packet_set_ipv6_tc(&nh->ip6_flow, key_tc); + + packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl); + + nh->ip6_hlim = key_hl; +} + static void packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) { diff --git a/lib/packets.h b/lib/packets.h index e550be0b..4ad527ba 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -490,6 +490,9 @@ void *snap_compose(struct ofpbuf *, const uint8_t eth_dst[ETH_ADDR_LEN], unsigned int oui, uint16_t snap_type, size_t size); void packet_set_ipv4(struct ofpbuf *, ovs_be32 src, ovs_be32 dst, uint8_t tos, uint8_t ttl); +void packet_set_ipv6(struct ofpbuf *, uint8_t proto, const ovs_be32 src[4], + const ovs_be32 dst[4], uint8_t tc, + uint32_t fl, uint8_t hlmit); void packet_set_tcp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst); void packet_set_udp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);