Send of0 packets from workqueue, to avoid recursive locking of ofN device.
authorBen Pfaff <blp@nicira.com>
Tue, 9 Sep 2008 19:53:47 +0000 (12:53 -0700)
committerBen Pfaff <blp@nicira.com>
Tue, 9 Sep 2008 19:53:47 +0000 (12:53 -0700)
lockdep reported the following warning, showing that dev_queue_xmit()
for of0 and for another ethernet device (eth0?) were taking locks of
the same class.  It was not a problem in this particular case, because
of0 != eth0, but if a flow were to send a packet back to of0 then we
would have a deadlock.

Solution: use a workqueue to send packets to avoid recursive locking.
We will still waste a lot of CPU time if we get a packet that loops back
to of0.  Solution for that still needed.

=============================================
[ INFO: possible recursive locking detected ]
2.6.26.5 #1
---------------------------------------------
memcheck/1258 is trying to acquire lock:
 (_xmit_ETHER){-+..}, at: [<c025b126>] __qdisc_run+0xa0/0x18a

but task is already holding lock:
 (_xmit_ETHER){-+..}, at: [<c024f3cd>] dev_queue_xmit+0x23c/0x334

other info that might help us debug this:
9 locks held by memcheck/1258:
 #0:  (genl_mutex){--..}, at: [<c0262037>] genl_rcv+0x12/0x2b
 #1:  (dp_mutex){--..}, at: [<c88e9a13>] dp_genl_openflow+0x58/0x91 [openflow_mo
d]
 #2:  (rcu_read_lock){..--}, at: [<c024e749>] netif_receive_skb+0x9e/0x328
 #3:  (rcu_read_lock){..--}, at: [<c0267f77>] ip_local_deliver_finish+0x2a/0x1d7
 #4:  (slock-AF_INET/1){-+..}, at: [<c027f1b8>] tcp_v4_rcv+0x29f/0x5ef
 #5:  (rcu_read_lock){..--}, at: [<c024f308>] dev_queue_xmit+0x177/0x334
 #6:  (_xmit_ETHER){-+..}, at: [<c024f3cd>] dev_queue_xmit+0x23c/0x334
 #7:  (rcu_read_lock){..--}, at: [<c88ea214>] dp_dev_xmit+0x0/0x6c [openflow_mod
]
 #8:  (rcu_read_lock){..--}, at: [<c024f308>] dev_queue_xmit+0x177/0x334

stack backtrace:
Pid: 1258, comm: memcheck Not tainted 2.6.26.5 #1
 [<c02c11cc>] ? printk+0xf/0x13
 [<c01368c5>] __lock_acquire+0x8c2/0xbe4
 [<c0134d35>] ? add_lock_to_list+0x64/0x8a
 [<c0107a78>] ? native_sched_clock+0x82/0x94
 [<c0136c3e>] lock_acquire+0x57/0x73
 [<c025b126>] ? __qdisc_run+0xa0/0x18a
 [<c02c3919>] _spin_lock+0x1c/0x49
 [<c025b126>] ? __qdisc_run+0xa0/0x18a
 [<c025b126>] __qdisc_run+0xa0/0x18a
 [<c024f382>] dev_queue_xmit+0x1f1/0x334
 [<c88e9cde>] xmit_skb+0x5b/0x65 [openflow_mod]
 [<c88e9e60>] dp_output_port+0x178/0x1ae [openflow_mod]
 [<c88eaf91>] do_output+0x2a/0x4c [openflow_mod]
 [<c88eb11f>] execute_actions+0x16c/0x198 [openflow_mod]
 [<c88eb622>] run_flow_through_tables+0xe9/0xf6 [openflow_mod]
 [<c88eb63e>] fwd_port_input+0xf/0x3d [openflow_mod]
 [<c88ea260>] dp_dev_xmit+0x4c/0x6c [openflow_mod]
 [<c88ea214>] ? dp_dev_xmit+0x0/0x6c [openflow_mod]
 [<c024f042>] dev_hard_start_xmit+0x20f/0x276
 [<c024f3e2>] dev_queue_xmit+0x251/0x334
 [<c026bfcb>] ip_finish_output+0x1ea/0x222
 [<c026c081>] ip_output+0x7e/0x83
 [<c026b37a>] ip_local_out+0x18/0x1b
 [<c026baa2>] ip_queue_xmit+0x288/0x2c9
 [<c0136a0c>] ? __lock_acquire+0xa09/0xbe4
 [<c0111264>] ? kernel_map_pages+0xfc/0x113
 [<c027ddad>] ? tcp_v4_send_check+0x80/0xba
 [<c027a281>] tcp_transmit_skb+0x695/0x6cd
 [<c016060a>] ? __kmalloc_track_caller+0xee/0x12a
 [<c024aacc>] ? __alloc_skb+0x51/0xff
 [<c027a43c>] tcp_send_ack+0xdf/0xe7
 [<c02781e4>] tcp_rcv_state_process+0x389/0xc33
 [<c027eebd>] tcp_v4_do_rcv+0x3bd/0x419
 [<c027f2c7>] tcp_v4_rcv+0x3ae/0x5ef
 [<c026805f>] ip_local_deliver_finish+0x112/0x1d7
 [<c0268185>] ip_local_deliver+0x61/0x6a
 [<c0267d14>] ip_rcv_finish+0x2a4/0x2c3
 [<c0267f23>] ip_rcv+0x1f0/0x21a
 [<c024e990>] netif_receive_skb+0x2e5/0x328
 [<c024ea52>] process_backlog+0x7f/0xca
 [<c024d867>] net_rx_action+0x72/0x127
 [<c0120ea1>] __do_softirq+0x7b/0xf2
 [<c0105a5d>] do_softirq+0x66/0xb3
 [<c024ec69>] netif_rx_ni+0x29/0x2e
 [<c88ea2cd>] dp_dev_recv+0x4d/0x6c [openflow_mod]
 [<c88e9e06>] dp_output_port+0x11e/0x1ae [openflow_mod]
 [<c88eaf91>] do_output+0x2a/0x4c [openflow_mod]
 [<c88eb11f>] execute_actions+0x16c/0x198 [openflow_mod]
 [<c88eb312>] recv_flow+0x1c7/0x2a5 [openflow_mod]
 [<c88eb14b>] ? recv_flow+0x0/0x2a5 [openflow_mod]
 [<c88eab37>] fwd_control_input+0x53/0x60 [openflow_mod]
 [<c88e9a27>] dp_genl_openflow+0x6c/0x91 [openflow_mod]
 [<c02621ce>] genl_rcv_msg+0x17e/0x198
 [<c0262050>] ? genl_rcv_msg+0x0/0x198
 [<c02614c6>] netlink_rcv_skb+0x30/0x76
 [<c0262043>] genl_rcv+0x1e/0x2b
 [<c0261053>] netlink_unicast+0x1a9/0x20f
 [<c02612dc>] netlink_sendmsg+0x223/0x230
 [<c0245384>] sock_sendmsg+0xca/0xe1
 [<c012c501>] ? autoremove_wake_function+0x0/0x33
 [<c015f6ce>] ? cache_free_debugcheck+0x2a3/0x2be
 [<c0107a78>] ? native_sched_clock+0x82/0x94
 [<c0134382>] ? lock_release_holdtime+0x1a/0x115
 [<c01f52b7>] ? copy_from_user+0x34/0x11b
 [<c024b6a5>] ? verify_iovec+0x40/0x6f
 [<c02454da>] sys_sendmsg+0x13f/0x192
 [<c0168d51>] ? pipe_write+0x434/0x43f
 [<c0107a78>] ? native_sched_clock+0x82/0x94
 [<c0107a78>] ? native_sched_clock+0x82/0x94
 [<c0134382>] ? lock_release_holdtime+0x1a/0x115
 [<c0107a78>] ? native_sched_clock+0x82/0x94
 [<c0134382>] ? lock_release_holdtime+0x1a/0x115
 [<c0246189>] sys_socketcall+0x14e/0x169
 [<c0102d43>] ? restore_nocheck+0x12/0x15
 [<c0102ce2>] syscall_call+0x7/0xb
 =======================

datapath/dp_dev.c

index 4601852d82ad8a95b0f4ebc7f2cf75430c619889..7817d0d6d76a46a4b5b07bd40f82b4deb32c693f 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/etherdevice.h>
 #include <linux/rcupdate.h>
 #include <linux/skbuff.h>
+#include <linux/workqueue.h>
 
 #include "datapath.h"
 #include "forward.h"
 struct dp_dev {
        struct net_device_stats stats;
        struct datapath *dp;
+       struct sk_buff_head xmit_queue;
+       struct work_struct xmit_work;
 };
 
+
 static struct dp_dev *dp_dev_priv(struct net_device *netdev) 
 {
        return netdev_priv(netdev);
@@ -60,14 +64,37 @@ static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
        dp_dev->stats.tx_packets++;
        dp_dev->stats.tx_bytes += skb->len;
 
-       skb_reset_mac_header(skb);
-       rcu_read_lock();
-       fwd_port_input(dp->chain, skb, OFPP_LOCAL);
-       rcu_read_unlock();
+       if (skb_queue_len(&dp_dev->xmit_queue) >= dp->netdev->tx_queue_len) {
+               /* Queue overflow.  Stop transmitter. */
+               netif_stop_queue(dp->netdev);
+
+               /* We won't see all dropped packets individually, so overrun
+                * error is appropriate. */
+               dp_dev->stats.tx_fifo_errors++;
+       }
+       skb_queue_tail(&dp_dev->xmit_queue, skb);
+       dp->netdev->trans_start = jiffies;
+
+       schedule_work(&dp_dev->xmit_work);
 
        return 0;
 }
 
+static void dp_dev_do_xmit(struct work_struct *work)
+{
+       struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work);
+       struct datapath *dp = dp_dev->dp;
+       struct sk_buff *skb;
+
+       while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
+               skb_reset_mac_header(skb);
+               rcu_read_lock();
+               fwd_port_input(dp->chain, skb, OFPP_LOCAL);
+               rcu_read_unlock();
+       }
+       netif_wake_queue(dp->netdev);
+}
+
 static int dp_dev_open(struct net_device *netdev)
 {
        netif_start_queue(netdev);
@@ -89,7 +116,7 @@ do_setup(struct net_device *netdev)
        netdev->hard_start_xmit = dp_dev_xmit;
        netdev->open = dp_dev_open;
        netdev->stop = dp_dev_stop;
-       netdev->tx_queue_len = 0;
+       netdev->tx_queue_len = 100;
        netdev->set_mac_address = dp_dev_mac_addr;
 
        netdev->flags = IFF_BROADCAST | IFF_MULTICAST;
@@ -121,14 +148,19 @@ int dp_dev_setup(struct datapath *dp)
 
        dp_dev = dp_dev_priv(netdev);
        dp_dev->dp = dp;
+       skb_queue_head_init(&dp_dev->xmit_queue);
+       INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit);
        dp->netdev = netdev;
        return 0;
 }
 
 void dp_dev_destroy(struct datapath *dp)
 {
+       struct dp_dev *dp_dev = dp_dev_priv(dp->netdev);
+
        netif_tx_disable(dp->netdev);
        synchronize_net();
+       skb_queue_purge(&dp_dev->xmit_queue);
        unregister_netdev(dp->netdev);
        free_netdev(dp->netdev);
 }