From: Ben Pfaff Date: Sat, 3 Jan 2009 00:13:07 +0000 (-0800) Subject: Use separate Netlink multicast groups for different datapaths. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=73fd5f01b19c3e7d6530f49e1897d22ca1d154f3;p=openvswitch Use separate Netlink multicast groups for different datapaths. Otherwise every socket that listens for OpenFlow multicast messages will get them for every single datapath. Not only is that a waste of memory and time, it also allows one congested bridge to effectively DoS other bridges that have little traffic. --- diff --git a/datapath/datapath.c b/datapath/datapath.c index 8e30e878..7cfeb627 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -79,7 +79,24 @@ EXPORT_SYMBOL(dp_ioctl_hook); #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) static struct genl_family dp_genl_family; -static struct genl_multicast_group mc_group; + +/* + * Datapath multicast groups. + * + * Really we want one multicast group per in-use datapath (or even more than + * one). Locking issues, however, mean that we can't allocate a multicast + * group at the point in the code where we we actually create a datapath[*], so + * we have to pre-allocate them. It's massive overkill to allocate DP_MAX of + * them in advance, since we will hardly ever actually create DP_MAX datapaths, + * so instead we allocate a few multicast groups at startup and choose one for + * each datapath by hashing its datapath index. + * + * [*] dp_genl_add, to add a new datapath, is called under the genl_lock + * mutex, and genl_register_mc_group, called to acquire a new multicast + * group ID, also acquires genl_lock, thus deadlock. + */ +#define N_MC_GROUPS 16 /* Must be power of 2. */ +static struct genl_multicast_group mc_groups[N_MC_GROUPS]; /* Datapaths. Protected on the read side by rcu_read_lock, on the write side * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex @@ -214,14 +231,22 @@ alloc_openflow_skb(struct datapath *dp, size_t openflow_len, uint8_t type, return oh; } +/* Returns the ID of the multicast group used by datapath 'dp'. */ +static u32 +dp_mc_group(const struct datapath *dp) +{ + return mc_groups[dp->dp_idx & (N_MC_GROUPS - 1)].id; +} + /* Sends 'skb' to 'sender' if it is nonnull, otherwise multicasts 'skb' to all * listeners. */ static int -send_openflow_skb(struct sk_buff *skb, const struct sender *sender) +send_openflow_skb(const struct datapath *dp, + struct sk_buff *skb, const struct sender *sender) { return (sender ? genlmsg_unicast(skb, sender->pid) - : genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC)); + : genlmsg_multicast(skb, 0, dp_mc_group(dp), GFP_ATOMIC)); } /* Retrieves the datapath id, which is the MAC address of the "of" device. */ @@ -716,7 +741,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, opi->reason = reason; opi->pad = 0; skb_copy_bits(skb, 0, opi->data, fwd_len); - err = send_openflow_skb(f_skb, NULL); + err = send_openflow_skb(dp, f_skb, NULL); out: kfree_skb(skb); @@ -871,7 +896,7 @@ dp_send_features_reply(struct datapath *dp, const struct sender *sender) /* Shrink to fit. */ ofr_len = sizeof(*ofr) + (sizeof(struct ofp_phy_port) * port_count); resize_openflow_skb(skb, &ofr->header, ofr_len); - return send_openflow_skb(skb, sender); + return send_openflow_skb(dp, skb, sender); } int @@ -888,7 +913,7 @@ dp_send_config_reply(struct datapath *dp, const struct sender *sender) osc->flags = htons(dp->flags); osc->miss_send_len = htons(dp->miss_send_len); - return send_openflow_skb(skb, sender); + return send_openflow_skb(dp, skb, sender); } int @@ -910,7 +935,7 @@ dp_send_hello(struct datapath *dp, const struct sender *sender, if (!reply) return -ENOMEM; - return send_openflow_skb(skb, sender); + return send_openflow_skb(dp, skb, sender); } } @@ -1020,7 +1045,7 @@ dp_send_port_status(struct net_bridge_port *p, uint8_t status) memset(ops->pad, 0, sizeof ops->pad); fill_port_desc(p, &ops->desc); - return send_openflow_skb(skb, NULL); + return send_openflow_skb(p->dp, skb, NULL); } /* Convert jiffies_64 to milliseconds. */ @@ -1069,7 +1094,7 @@ dp_send_flow_end(struct datapath *dp, struct sw_flow *flow, nfe->packet_count = cpu_to_be64(flow->packet_count); nfe->byte_count = cpu_to_be64(flow->byte_count); - return send_openflow_skb(skb, NULL); + return send_openflow_skb(dp, skb, NULL); } EXPORT_SYMBOL(dp_send_flow_end); @@ -1090,7 +1115,7 @@ dp_send_error_msg(struct datapath *dp, const struct sender *sender, oem->code = htons(code); memcpy(oem->data, data, len); - return send_openflow_skb(skb, sender); + return send_openflow_skb(dp, skb, sender); } int @@ -1106,7 +1131,7 @@ dp_send_echo_reply(struct datapath *dp, const struct sender *sender, return -ENOMEM; memcpy(reply + 1, rq + 1, ntohs(rq->length) - sizeof *rq); - return send_openflow_skb(skb, sender); + return send_openflow_skb(dp, skb, sender); } /* Generic Netlink interface. @@ -1229,15 +1254,7 @@ static struct genl_ops dp_genl_ops_del_dp = { /* Queries a datapath for related information. Currently the only relevant * information is the datapath's multicast group ID, datapath ID, and - * datapath device name. Really we want one multicast group per datapath, - * but because of locking issues[*] we can't easily get one. Thus, every - * datapath will currently return the same global multicast group ID, but - * in the future it would be nice to fix that. - * - * [*] dp_genl_add, to add a new datapath, is called under the genl_lock - * mutex, and genl_register_mc_group, called to acquire a new multicast - * group ID, also acquires genl_lock, thus deadlock. - */ + * datapath device name. */ static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) { struct datapath *dp; @@ -1262,7 +1279,7 @@ static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) goto err; NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx); NLA_PUT_STRING(ans_skb, DP_GENL_A_DP_NAME, dp->netdev->name); - NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id); + NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, dp_mc_group(dp)); genlmsg_end(ans_skb, data); err = genlmsg_reply(ans_skb, info); @@ -1874,10 +1891,13 @@ static int dp_init_netlink(void) goto err_unregister; } - strcpy(mc_group.name, "openflow"); - err = genl_register_mc_group(&dp_genl_family, &mc_group); - if (err < 0) - goto err_unregister; + for (i = 0; i < N_MC_GROUPS; i++) { + snprintf(mc_groups[i].name, sizeof mc_groups[i].name, + "openflow%d", i); + err = genl_register_mc_group(&dp_genl_family, &mc_groups[i]); + if (err < 0) + goto err_unregister; + } return 0;