When a network device that is part of a datapath is destroyed, the
notifier handler in dp_device_event() calls dp_del_switch_port() to
delete the switch port. dp_del_switch_port() then calls rtnl_lock()
to drop the device's promiscuous count. But the RTNL lock has already
been taken at this point, so taking it recursively deadlocks.
The obvious fix is for dp_del_switch_port()'s caller to take the RTNL
lock for it, if it is not already taken. This, however, causes a
different deadlock, as dp_del_switch_port() needs dp_mutex and that
means that we are nesting dp_mutex and the RTNL both ways.
The fix used here is to always nest dp_mutex inside the RTNL lock and
never vice versa.
=============================================
[ INFO: possible recursive locking detected ]
---------------------------------------------
tunctl/2934 is trying to acquire lock:
(rtnl_mutex){--..}, at: [<
c029667d>] mutex_lock+0x1c/0x1f
but task is already holding lock:
(rtnl_mutex){--..}, at: [<
c029667d>] mutex_lock+0x1c/0x1f
other info that might help us debug this:
1 lock held by tunctl/2934:
#0: (rtnl_mutex){--..}, at: [<
c029667d>] mutex_lock+0x1c/0x1f
stack backtrace:
[<
c01040cf>] show_trace+0xd/0x10
[<
c0104655>] dump_stack+0x19/0x1b
[<
c012f897>] __lock_acquire+0x747/0x9a6
[<
c0130058>] lock_acquire+0x4b/0x6b
[<
c029651b>] __mutex_lock_slowpath+0xb0/0x1f6
[<
c029667d>] mutex_lock+0x1c/0x1f
[<
c023f045>] rtnl_lock+0xd/0xf
[<
c89703eb>] dp_del_switch_port+0x2d/0xb9 [openflow_mod]
[<
c8972554>] dp_device_event+0x84/0xb0 [openflow_mod]
[<
c0125033>] notifier_call_chain+0x20/0x31
[<
c012506d>] raw_notifier_call_chain+0x8/0xa
[<
c0237e75>] unregister_netdevice+0x129/0x1ce
[<
c020ed97>] tun_chr_close+0x5e/0x69
[<
c01573a5>] __fput+0xb3/0x15e
[<
c0157467>] fput+0x17/0x19
[<
c0154de1>] filp_close+0x51/0x5b
[<
c011aeca>] put_files_struct+0x6d/0xa9
[<
c011becf>] do_exit+0x206/0x790
[<
c011c4d2>] sys_exit_group+0x0/0x11
[<
c011c4e1>] sys_exit_group+0xf/0x11
[<
c0102ab7>] syscall_call+0x7/0xb
Problem identified with this script (in conjunction with the brcompat
module):
#! /bin/sh
brctl delbr a
brctl delbr b
brctl delbr c
set -x
stress () {
bridge=$1
shift
while true; do
brctl addbr $bridge; brctl show
for iface
do
case $iface in
tap*) tunctl -t $iface ;;
esac
brctl addif $bridge $iface
done
for iface
do
brctl delif $bridge $iface
case $iface in
tap*) tunctl -d $iface ;;
esac
done
brctl delbr $bridge; brctl show
done
}
stress a tap0 tap1 tap2 &
stress b tap3 tap4 tap5 &
trap 'killall stress-brctl' 0 SIGINT
wait
/*
* Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
+ * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
* Stanford Junior University
*/
* maintained by the Generic Netlink code, but the timeout path needs mutual
* exclusion too.
*
+ * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL
+ * lock first.
+ *
* It is safe to access the datapath and net_bridge_port structures with just
* dp_mutex.
*/
struct datapath *dp;
int err;
+ rtnl_lock();
mutex_lock(&dp_mutex);
if (dp_idx == -1)
dp_idx = gen_dp_idx();
dps[dp_idx] = dp;
mutex_unlock(&dp_mutex);
+ rtnl_unlock();
return 0;
module_put(THIS_MODULE);
err_unlock:
mutex_unlock(&dp_mutex);
+ rtnl_unlock();
return err;
}
return -EXFULL;
}
+/* Called with RTNL lock and dp_mutex. */
static struct net_bridge_port *new_nbp(struct datapath *dp,
struct net_device *dev, int port_no)
{
if (p == NULL)
return ERR_PTR(-ENOMEM);
- rtnl_lock();
dev_set_promiscuity(dev, 1);
- rtnl_unlock();
dev_hold(dev);
p->dp = dp;
p->dev = dev;
return p;
}
+/* Called with RTNL lock and dp_mutex. */
int add_switch_port(struct datapath *dp, struct net_device *dev)
{
struct net_bridge_port *p;
return 0;
}
-/* Delete 'p' from switch. */
+/* Delete 'p' from switch.
+ * Called with RTNL lock and dp_mutex. */
int dp_del_switch_port(struct net_bridge_port *p)
{
#ifdef SUPPORT_SNAT
/* First drop references to device. */
cancel_work_sync(&p->port_task);
- rtnl_lock();
dev_set_promiscuity(p->dev, -1);
- rtnl_unlock();
list_del_rcu(&p->node);
if (p->port_no != OFPP_LOCAL)
rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
{
+ struct net_device *dev = NULL;
struct datapath *dp;
int err;
+ rtnl_lock();
mutex_lock(&dp_mutex);
dp = lookup_dp(info);
if (IS_ERR(dp))
err = PTR_ERR(dp);
else {
+ dev = dp->netdev;
del_dp(dp);
err = 0;
}
mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+ if (dev)
+ free_netdev(dev);
return err;
}
VERIFY_NUL_STRING(info->attrs[DP_GENL_A_PORTNAME]))
return -EINVAL;
- /* Get datapath. */
+ rtnl_lock();
mutex_lock(&dp_mutex);
+
+ /* Get datapath. */
dp = lookup_dp(info);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
dev_put(port);
out_unlock:
mutex_unlock(&dp_mutex);
+ rtnl_unlock();
return err;
}
}
/* Create a datapath device associated with 'dp'. If 'dp_name' is null,
- * the device name will be of the form 'of<dp_idx>'. */
+ * the device name will be of the form 'of<dp_idx>'.
+ *
+ * Called with RTNL lock and dp_mutex.*/
int dp_dev_setup(struct datapath *dp, const char *dp_name)
{
struct dp_dev *dp_dev;
if (!netdev)
return -ENOMEM;
- err = register_netdev(netdev);
+ err = register_netdevice(netdev);
if (err) {
free_netdev(netdev);
return err;
return 0;
}
+/* Called with RTNL lock and dp_mutex.*/
void dp_dev_destroy(struct datapath *dp)
{
struct dp_dev *dp_dev = dp_dev_priv(dp->netdev);
netif_tx_disable(dp->netdev);
synchronize_net();
skb_queue_purge(&dp_dev->xmit_queue);
- unregister_netdev(dp->netdev);
- free_netdev(dp->netdev);
+ unregister_netdevice(dp->netdev);
}
int is_dp_dev(struct net_device *netdev)
/*
* Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
+ * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
* Stanford Junior University
*/
case NETDEV_UNREGISTER:
spin_unlock_irqrestore(&p->lock, flags);
+ mutex_lock(&dp_mutex);
dp_del_switch_port(p);
+ mutex_unlock(&dp_mutex);
return NOTIFY_DONE;
break;
}