From: Ben Pfaff Date: Wed, 7 Jan 2009 23:19:09 +0000 (-0800) Subject: datapath: Fix deadlock in network device notifier. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=95ceee731ab76d8eb84669a9a285897c96217b8e;p=openvswitch datapath: Fix deadlock in network device notifier. When a network device that is part of a datapath is destroyed, the notifier handler in dp_device_event() calls dp_del_switch_port() to delete the switch port. dp_del_switch_port() then calls rtnl_lock() to drop the device's promiscuous count. But the RTNL lock has already been taken at this point, so taking it recursively deadlocks. The obvious fix is for dp_del_switch_port()'s caller to take the RTNL lock for it, if it is not already taken. This, however, causes a different deadlock, as dp_del_switch_port() needs dp_mutex and that means that we are nesting dp_mutex and the RTNL both ways. The fix used here is to always nest dp_mutex inside the RTNL lock and never vice versa. ============================================= [ INFO: possible recursive locking detected ] --------------------------------------------- tunctl/2934 is trying to acquire lock: (rtnl_mutex){--..}, at: [] mutex_lock+0x1c/0x1f but task is already holding lock: (rtnl_mutex){--..}, at: [] mutex_lock+0x1c/0x1f other info that might help us debug this: 1 lock held by tunctl/2934: #0: (rtnl_mutex){--..}, at: [] mutex_lock+0x1c/0x1f stack backtrace: [] show_trace+0xd/0x10 [] dump_stack+0x19/0x1b [] __lock_acquire+0x747/0x9a6 [] lock_acquire+0x4b/0x6b [] __mutex_lock_slowpath+0xb0/0x1f6 [] mutex_lock+0x1c/0x1f [] rtnl_lock+0xd/0xf [] dp_del_switch_port+0x2d/0xb9 [openflow_mod] [] dp_device_event+0x84/0xb0 [openflow_mod] [] notifier_call_chain+0x20/0x31 [] raw_notifier_call_chain+0x8/0xa [] unregister_netdevice+0x129/0x1ce [] tun_chr_close+0x5e/0x69 [] __fput+0xb3/0x15e [] fput+0x17/0x19 [] filp_close+0x51/0x5b [] put_files_struct+0x6d/0xa9 [] do_exit+0x206/0x790 [] sys_exit_group+0x0/0x11 [] sys_exit_group+0xf/0x11 [] syscall_call+0x7/0xb Problem identified with this script (in conjunction with the brcompat module): #! /bin/sh brctl delbr a brctl delbr b brctl delbr c set -x stress () { bridge=$1 shift while true; do brctl addbr $bridge; brctl show for iface do case $iface in tap*) tunctl -t $iface ;; esac brctl addif $bridge $iface done for iface do brctl delif $bridge $iface case $iface in tap*) tunctl -d $iface ;; esac done brctl delbr $bridge; brctl show done } stress a tap0 tap1 tap2 & stress b tap3 tap4 tap5 & trap 'killall stress-brctl' 0 SIGINT wait --- diff --git a/datapath/datapath.c b/datapath/datapath.c index 7cfeb627..474d3ee2 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008 The Board of Trustees of The Leland + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland * Stanford Junior University */ @@ -103,6 +103,9 @@ static struct genl_multicast_group mc_groups[N_MC_GROUPS]; * maintained by the Generic Netlink code, but the timeout path needs mutual * exclusion too. * + * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL + * lock first. + * * It is safe to access the datapath and net_bridge_port structures with just * dp_mutex. */ @@ -285,6 +288,7 @@ static int new_dp(int dp_idx, const char *dp_name) struct datapath *dp; int err; + rtnl_lock(); mutex_lock(&dp_mutex); if (dp_idx == -1) dp_idx = gen_dp_idx(); @@ -334,6 +338,7 @@ static int new_dp(int dp_idx, const char *dp_name) dps[dp_idx] = dp; mutex_unlock(&dp_mutex); + rtnl_unlock(); return 0; @@ -349,6 +354,7 @@ err_put: module_put(THIS_MODULE); err_unlock: mutex_unlock(&dp_mutex); + rtnl_unlock(); return err; } @@ -362,6 +368,7 @@ static int find_portno(struct datapath *dp) return -EXFULL; } +/* Called with RTNL lock and dp_mutex. */ static struct net_bridge_port *new_nbp(struct datapath *dp, struct net_device *dev, int port_no) { @@ -374,9 +381,7 @@ static struct net_bridge_port *new_nbp(struct datapath *dp, if (p == NULL) return ERR_PTR(-ENOMEM); - rtnl_lock(); dev_set_promiscuity(dev, 1); - rtnl_unlock(); dev_hold(dev); p->dp = dp; p->dev = dev; @@ -392,6 +397,7 @@ static struct net_bridge_port *new_nbp(struct datapath *dp, return p; } +/* Called with RTNL lock and dp_mutex. */ int add_switch_port(struct datapath *dp, struct net_device *dev) { struct net_bridge_port *p; @@ -417,7 +423,8 @@ int add_switch_port(struct datapath *dp, struct net_device *dev) return 0; } -/* Delete 'p' from switch. */ +/* Delete 'p' from switch. + * Called with RTNL lock and dp_mutex. */ int dp_del_switch_port(struct net_bridge_port *p) { #ifdef SUPPORT_SNAT @@ -426,9 +433,7 @@ int dp_del_switch_port(struct net_bridge_port *p) /* First drop references to device. */ cancel_work_sync(&p->port_task); - rtnl_lock(); dev_set_promiscuity(p->dev, -1); - rtnl_unlock(); list_del_rcu(&p->node); if (p->port_no != OFPP_LOCAL) rcu_assign_pointer(p->dp->ports[p->port_no], NULL); @@ -1229,18 +1234,24 @@ lookup_dp(struct genl_info *info) static int dp_genl_del(struct sk_buff *skb, struct genl_info *info) { + struct net_device *dev = NULL; struct datapath *dp; int err; + rtnl_lock(); mutex_lock(&dp_mutex); dp = lookup_dp(info); if (IS_ERR(dp)) err = PTR_ERR(dp); else { + dev = dp->netdev; del_dp(dp); err = 0; } mutex_unlock(&dp_mutex); + rtnl_unlock(); + if (dev) + free_netdev(dev); return err; } @@ -1310,8 +1321,10 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info) VERIFY_NUL_STRING(info->attrs[DP_GENL_A_PORTNAME])) return -EINVAL; - /* Get datapath. */ + rtnl_lock(); mutex_lock(&dp_mutex); + + /* Get datapath. */ dp = lookup_dp(info); if (IS_ERR(dp)) { err = PTR_ERR(dp); @@ -1341,6 +1354,7 @@ out_put: dev_put(port); out_unlock: mutex_unlock(&dp_mutex); + rtnl_unlock(); return err; } diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c index f866fb4e..94a71748 100644 --- a/datapath/dp_dev.c +++ b/datapath/dp_dev.c @@ -181,7 +181,9 @@ do_setup(struct net_device *netdev) } /* Create a datapath device associated with 'dp'. If 'dp_name' is null, - * the device name will be of the form 'of'. */ + * the device name will be of the form 'of'. + * + * Called with RTNL lock and dp_mutex.*/ int dp_dev_setup(struct datapath *dp, const char *dp_name) { struct dp_dev *dp_dev; @@ -200,7 +202,7 @@ int dp_dev_setup(struct datapath *dp, const char *dp_name) if (!netdev) return -ENOMEM; - err = register_netdev(netdev); + err = register_netdevice(netdev); if (err) { free_netdev(netdev); return err; @@ -220,6 +222,7 @@ int dp_dev_setup(struct datapath *dp, const char *dp_name) return 0; } +/* Called with RTNL lock and dp_mutex.*/ void dp_dev_destroy(struct datapath *dp) { struct dp_dev *dp_dev = dp_dev_priv(dp->netdev); @@ -227,8 +230,7 @@ void dp_dev_destroy(struct datapath *dp) netif_tx_disable(dp->netdev); synchronize_net(); skb_queue_purge(&dp_dev->xmit_queue); - unregister_netdev(dp->netdev); - free_netdev(dp->netdev); + unregister_netdevice(dp->netdev); } int is_dp_dev(struct net_device *netdev) diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c index 9bf491df..cd3163af 100644 --- a/datapath/dp_notify.c +++ b/datapath/dp_notify.c @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008 The Board of Trustees of The Leland + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland * Stanford Junior University */ @@ -46,7 +46,9 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UNREGISTER: spin_unlock_irqrestore(&p->lock, flags); + mutex_lock(&dp_mutex); dp_del_switch_port(p); + mutex_unlock(&dp_mutex); return NOTIFY_DONE; break; }