From b37e6334fd80a6460aabc95f05eb8ab68e5a82ed Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 24 Oct 2011 12:27:36 -0700 Subject: [PATCH] datapath: Add multicast tunnel support. Something like this, on two separate vswitches, works to try it out: route add -net 224.0.0.0 netmask 240.0.0.0 dev eth0 ovs-vsctl \ -- add-port br0 gre0 \ -- set interface gre0 type=gre options:remote_ip=224.0.0.1 Runtime tested on Linux 3.0, build tested on Linux 2.6.18, both i386. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross --- datapath/tunnel.c | 117 +++++++++++++++++++++++++++++++++++-------- datapath/tunnel.h | 3 ++ datapath/vport-gre.c | 2 + lib/netdev-vport.c | 12 ++++- lib/packets.h | 5 ++ vswitchd/vswitch.xml | 16 ++++-- 6 files changed, 128 insertions(+), 27 deletions(-) diff --git a/datapath/tunnel.c b/datapath/tunnel.c index f9138493..372d90ed 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -10,8 +10,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -147,6 +149,21 @@ static void free_cache_rcu(struct rcu_head *rcu) free_cache(c); } +/* Frees the portion of 'mutable' that requires RTNL and thus can't happen + * within an RCU callback. Fortunately this part doesn't require waiting for + * an RCU grace period. + */ +static void free_mutable_rtnl(struct tnl_mutable_config *mutable) +{ + ASSERT_RTNL(); + if (ipv4_is_multicast(mutable->key.daddr) && mutable->mlink) { + struct in_device *in_dev; + in_dev = inetdev_by_index(&init_net, mutable->mlink); + if (in_dev) + ip_mc_dec_group(in_dev, mutable->key.daddr); + } +} + static void assign_config_rcu(struct vport *vport, struct tnl_mutable_config *new_config) { @@ -155,6 +172,8 @@ static void assign_config_rcu(struct vport *vport, old_config = rtnl_dereference(tnl_vport->mutable); rcu_assign_pointer(tnl_vport->mutable, new_config); + + free_mutable_rtnl(old_config); call_rcu(&old_config->rcu, free_config_rcu); } @@ -270,6 +289,26 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key, struct port_lookup_key lookup; struct vport *vport; + if (ipv4_is_multicast(saddr)) { + lookup.saddr = 0; + lookup.daddr = saddr; + if (key_remote_ports) { + lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT; + lookup.in_key = key; + vport = port_table_lookup(&lookup, mutable); + if (vport) + return vport; + } + if (remote_ports) { + lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH; + lookup.in_key = 0; + vport = port_table_lookup(&lookup, mutable); + if (vport) + return vport; + } + return NULL; + } + lookup.saddr = saddr; lookup.daddr = daddr; @@ -932,6 +971,31 @@ unlock: return cache; } +static struct rtable *__find_route(const struct tnl_mutable_config *mutable, + u8 ipproto, u8 tos) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) + struct flowi fl = { .nl_u = { .ip4_u = + { .daddr = mutable->key.daddr, + .saddr = mutable->key.saddr, + .tos = tos } }, + .proto = ipproto }; + struct rtable *rt; + + if (unlikely(ip_route_output_key(&init_net, &rt, &fl))) + return ERR_PTR(-EADDRNOTAVAIL); + + return rt; +#else + struct flowi4 fl = { .daddr = mutable->key.daddr, + .saddr = mutable->key.saddr, + .flowi4_tos = tos, + .flowi4_proto = ipproto }; + + return ip_route_output_key(&init_net, &fl); +#endif +} + static struct rtable *find_route(struct vport *vport, const struct tnl_mutable_config *mutable, u8 tos, struct tnl_cache **cache) @@ -947,25 +1011,10 @@ static struct rtable *find_route(struct vport *vport, return cur_cache->rt; } else { struct rtable *rt; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = mutable->key.daddr, - .saddr = mutable->key.saddr, - .tos = tos } }, - .proto = tnl_vport->tnl_ops->ipproto }; - - if (unlikely(ip_route_output_key(&init_net, &rt, &fl))) - return NULL; -#else - struct flowi4 fl = { .daddr = mutable->key.daddr, - .saddr = mutable->key.saddr, - .flowi4_tos = tos, - .flowi4_proto = tnl_vport->tnl_ops->ipproto }; - rt = ip_route_output_key(&init_net, &fl); + rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos); if (IS_ERR(rt)) return NULL; -#endif if (likely(tos == mutable->tos)) *cache = build_cache(vport, mutable, rt); @@ -1310,9 +1359,12 @@ static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops, mutable->flags = nla_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_PUBLIC; - if (a[OVS_TUNNEL_ATTR_SRC_IPV4]) - mutable->key.saddr = nla_get_be32(a[OVS_TUNNEL_ATTR_SRC_IPV4]); mutable->key.daddr = nla_get_be32(a[OVS_TUNNEL_ATTR_DST_IPV4]); + if (a[OVS_TUNNEL_ATTR_SRC_IPV4]) { + if (ipv4_is_multicast(mutable->key.daddr)) + return -EINVAL; + mutable->key.saddr = nla_get_be32(a[OVS_TUNNEL_ATTR_SRC_IPV4]); + } if (a[OVS_TUNNEL_ATTR_TOS]) { mutable->tos = nla_get_u8(a[OVS_TUNNEL_ATTR_TOS]); @@ -1347,6 +1399,22 @@ static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops, if (old_vport && old_vport != cur_vport) return -EEXIST; + mutable->mlink = 0; + if (ipv4_is_multicast(mutable->key.daddr)) { + struct net_device *dev; + struct rtable *rt; + + rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos); + if (IS_ERR(rt)) + return -EADDRNOTAVAIL; + dev = rt_dst(rt).dev; + ip_rt_put(rt); + if (__in_dev_get_rtnl(dev) == NULL) + return -EADDRNOTAVAIL; + mutable->mlink = dev->ifindex; + ip_mc_inc_group(__in_dev_get_rtnl(dev), mutable->key.daddr); + } + return 0; } @@ -1399,6 +1467,7 @@ struct vport *tnl_create(const struct vport_parms *parms, return vport; error_free_mutable: + free_mutable_rtnl(mutable); kfree(mutable); error_free_vport: vport_free(vport); @@ -1437,6 +1506,7 @@ int tnl_set_options(struct vport *vport, struct nlattr *options) return 0; error_free: + free_mutable_rtnl(mutable); kfree(mutable); error: return err; @@ -1480,23 +1550,26 @@ static void free_port_rcu(struct rcu_head *rcu) void tnl_destroy(struct vport *vport) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - const struct tnl_mutable_config *mutable; + struct tnl_mutable_config *mutable; mutable = rtnl_dereference(tnl_vport->mutable); port_table_remove_port(vport); + free_mutable_rtnl(mutable); call_rcu(&tnl_vport->rcu, free_port_rcu); } int tnl_set_addr(struct vport *vport, const unsigned char *addr) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - struct tnl_mutable_config *mutable; + struct tnl_mutable_config *old_mutable, *mutable; - mutable = kmemdup(rtnl_dereference(tnl_vport->mutable), - sizeof(struct tnl_mutable_config), GFP_KERNEL); + old_mutable = rtnl_dereference(tnl_vport->mutable); + mutable = kmemdup(old_mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL); if (!mutable) return -ENOMEM; + old_mutable->mlink = 0; + memcpy(mutable->eth_addr, addr, ETH_ALEN); assign_config_rcu(vport, mutable); diff --git a/datapath/tunnel.h b/datapath/tunnel.h index 9211740a..f80df99b 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -89,6 +89,9 @@ struct tnl_mutable_config { u32 flags; u8 tos; u8 ttl; + + /* Multicast configuration. */ + int mlink; }; struct tnl_ops { diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index cc64d129..95ac4bb2 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -188,6 +188,8 @@ static void gre_err(struct sk_buff *skb, u32 info) return; iph = (struct iphdr *)skb->data; + if (ipv4_is_multicast(iph->daddr)) + return; tunnel_hdr_len = parse_header(iph, &flags, &key); if (tunnel_hdr_len < 0) diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 0577bd3b..f6dbd03e 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -578,6 +578,7 @@ parse_tunnel_config(const char *name, const char *type, struct shash_node *node; bool ipsec_mech_set = false; ovs_be32 daddr = htonl(0); + ovs_be32 saddr = htonl(0); uint32_t flags; flags = TNL_F_DF_DEFAULT | TNL_F_PMTUD | TNL_F_HDR_CACHE; @@ -603,8 +604,7 @@ parse_tunnel_config(const char *name, const char *type, if (lookup_ip(node->data, &in_addr)) { VLOG_WARN("%s: bad %s 'local_ip'", name, type); } else { - nl_msg_put_be32(options, OVS_TUNNEL_ATTR_SRC_IPV4, - in_addr.s_addr); + saddr = in_addr.s_addr; } } else if (!strcmp(node->name, "tos")) { if (!strcmp(node->data, "inherit")) { @@ -707,6 +707,14 @@ parse_tunnel_config(const char *name, const char *type, } nl_msg_put_be32(options, OVS_TUNNEL_ATTR_DST_IPV4, daddr); + if (saddr) { + if (ip_is_multicast(daddr)) { + VLOG_WARN("%s: remote_ip is multicast, ignoring local_ip", name); + } else { + nl_msg_put_be32(options, OVS_TUNNEL_ATTR_SRC_IPV4, saddr); + } + } + nl_msg_put_u32(options, OVS_TUNNEL_ATTR_FLAGS, flags); return 0; diff --git a/lib/packets.h b/lib/packets.h index f5f473ce..84877541 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -291,6 +291,11 @@ ip_is_cidr(ovs_be32 netmask) uint32_t x = ~ntohl(netmask); return !(x & (x + 1)); } +static inline bool +ip_is_multicast(ovs_be32 ip) +{ + return (ip & htonl(0xf0000000)) == htonl(0xe0000000); +} int ip_count_cidr_bits(ovs_be32 netmask); void ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *); diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 239a9e88..bcb6b6fd 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1118,12 +1118,22 @@

- Required. The tunnel endpoint. +

+ Required. The tunnel endpoint. Unicast and multicast endpoints are + both supported. +

+ +

+ When a multicast endpoint is specified, a routing table lookup occurs + only when the tunnel is created. Following a routing change, delete + and then re-create the tunnel to force a new routing table lookup. +

- Optional. The destination IP that received packets must - match. Default is to match all addresses. + Optional. The destination IP that received packets must match. + Default is to match all addresses. Must be omitted when is a multicast address. -- 2.30.2