From 71f13ed0b69cfdfbbcdbb97948e57968fc586fb3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 9 Sep 2008 12:53:47 -0700 Subject: [PATCH] Send of0 packets from workqueue, to avoid recursive locking of ofN device. lockdep reported the following warning, showing that dev_queue_xmit() for of0 and for another ethernet device (eth0?) were taking locks of the same class. It was not a problem in this particular case, because of0 != eth0, but if a flow were to send a packet back to of0 then we would have a deadlock. Solution: use a workqueue to send packets to avoid recursive locking. We will still waste a lot of CPU time if we get a packet that loops back to of0. Solution for that still needed. ============================================= [ INFO: possible recursive locking detected ] 2.6.26.5 #1 --------------------------------------------- memcheck/1258 is trying to acquire lock: (_xmit_ETHER){-+..}, at: [] __qdisc_run+0xa0/0x18a but task is already holding lock: (_xmit_ETHER){-+..}, at: [] dev_queue_xmit+0x23c/0x334 other info that might help us debug this: 9 locks held by memcheck/1258: #0: (genl_mutex){--..}, at: [] genl_rcv+0x12/0x2b #1: (dp_mutex){--..}, at: [] dp_genl_openflow+0x58/0x91 [openflow_mo d] #2: (rcu_read_lock){..--}, at: [] netif_receive_skb+0x9e/0x328 #3: (rcu_read_lock){..--}, at: [] ip_local_deliver_finish+0x2a/0x1d7 #4: (slock-AF_INET/1){-+..}, at: [] tcp_v4_rcv+0x29f/0x5ef #5: (rcu_read_lock){..--}, at: [] dev_queue_xmit+0x177/0x334 #6: (_xmit_ETHER){-+..}, at: [] dev_queue_xmit+0x23c/0x334 #7: (rcu_read_lock){..--}, at: [] dp_dev_xmit+0x0/0x6c [openflow_mod ] #8: (rcu_read_lock){..--}, at: [] dev_queue_xmit+0x177/0x334 stack backtrace: Pid: 1258, comm: memcheck Not tainted 2.6.26.5 #1 [] ? printk+0xf/0x13 [] __lock_acquire+0x8c2/0xbe4 [] ? add_lock_to_list+0x64/0x8a [] ? native_sched_clock+0x82/0x94 [] lock_acquire+0x57/0x73 [] ? __qdisc_run+0xa0/0x18a [] _spin_lock+0x1c/0x49 [] ? __qdisc_run+0xa0/0x18a [] __qdisc_run+0xa0/0x18a [] dev_queue_xmit+0x1f1/0x334 [] xmit_skb+0x5b/0x65 [openflow_mod] [] dp_output_port+0x178/0x1ae [openflow_mod] [] do_output+0x2a/0x4c [openflow_mod] [] execute_actions+0x16c/0x198 [openflow_mod] [] run_flow_through_tables+0xe9/0xf6 [openflow_mod] [] fwd_port_input+0xf/0x3d [openflow_mod] [] dp_dev_xmit+0x4c/0x6c [openflow_mod] [] ? dp_dev_xmit+0x0/0x6c [openflow_mod] [] dev_hard_start_xmit+0x20f/0x276 [] dev_queue_xmit+0x251/0x334 [] ip_finish_output+0x1ea/0x222 [] ip_output+0x7e/0x83 [] ip_local_out+0x18/0x1b [] ip_queue_xmit+0x288/0x2c9 [] ? __lock_acquire+0xa09/0xbe4 [] ? kernel_map_pages+0xfc/0x113 [] ? tcp_v4_send_check+0x80/0xba [] tcp_transmit_skb+0x695/0x6cd [] ? __kmalloc_track_caller+0xee/0x12a [] ? __alloc_skb+0x51/0xff [] tcp_send_ack+0xdf/0xe7 [] tcp_rcv_state_process+0x389/0xc33 [] tcp_v4_do_rcv+0x3bd/0x419 [] tcp_v4_rcv+0x3ae/0x5ef [] ip_local_deliver_finish+0x112/0x1d7 [] ip_local_deliver+0x61/0x6a [] ip_rcv_finish+0x2a4/0x2c3 [] ip_rcv+0x1f0/0x21a [] netif_receive_skb+0x2e5/0x328 [] process_backlog+0x7f/0xca [] net_rx_action+0x72/0x127 [] __do_softirq+0x7b/0xf2 [] do_softirq+0x66/0xb3 [] netif_rx_ni+0x29/0x2e [] dp_dev_recv+0x4d/0x6c [openflow_mod] [] dp_output_port+0x11e/0x1ae [openflow_mod] [] do_output+0x2a/0x4c [openflow_mod] [] execute_actions+0x16c/0x198 [openflow_mod] [] recv_flow+0x1c7/0x2a5 [openflow_mod] [] ? recv_flow+0x0/0x2a5 [openflow_mod] [] fwd_control_input+0x53/0x60 [openflow_mod] [] dp_genl_openflow+0x6c/0x91 [openflow_mod] [] genl_rcv_msg+0x17e/0x198 [] ? genl_rcv_msg+0x0/0x198 [] netlink_rcv_skb+0x30/0x76 [] genl_rcv+0x1e/0x2b [] netlink_unicast+0x1a9/0x20f [] netlink_sendmsg+0x223/0x230 [] sock_sendmsg+0xca/0xe1 [] ? autoremove_wake_function+0x0/0x33 [] ? cache_free_debugcheck+0x2a3/0x2be [] ? native_sched_clock+0x82/0x94 [] ? lock_release_holdtime+0x1a/0x115 [] ? copy_from_user+0x34/0x11b [] ? verify_iovec+0x40/0x6f [] sys_sendmsg+0x13f/0x192 [] ? pipe_write+0x434/0x43f [] ? native_sched_clock+0x82/0x94 [] ? native_sched_clock+0x82/0x94 [] ? lock_release_holdtime+0x1a/0x115 [] ? native_sched_clock+0x82/0x94 [] ? lock_release_holdtime+0x1a/0x115 [] sys_socketcall+0x14e/0x169 [] ? restore_nocheck+0x12/0x15 [] syscall_call+0x7/0xb ======================= --- datapath/dp_dev.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c index 4601852d..7817d0d6 100644 --- a/datapath/dp_dev.c +++ b/datapath/dp_dev.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "datapath.h" #include "forward.h" @@ -10,8 +11,11 @@ struct dp_dev { struct net_device_stats stats; struct datapath *dp; + struct sk_buff_head xmit_queue; + struct work_struct xmit_work; }; + static struct dp_dev *dp_dev_priv(struct net_device *netdev) { return netdev_priv(netdev); @@ -60,14 +64,37 @@ static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev) dp_dev->stats.tx_packets++; dp_dev->stats.tx_bytes += skb->len; - skb_reset_mac_header(skb); - rcu_read_lock(); - fwd_port_input(dp->chain, skb, OFPP_LOCAL); - rcu_read_unlock(); + if (skb_queue_len(&dp_dev->xmit_queue) >= dp->netdev->tx_queue_len) { + /* Queue overflow. Stop transmitter. */ + netif_stop_queue(dp->netdev); + + /* We won't see all dropped packets individually, so overrun + * error is appropriate. */ + dp_dev->stats.tx_fifo_errors++; + } + skb_queue_tail(&dp_dev->xmit_queue, skb); + dp->netdev->trans_start = jiffies; + + schedule_work(&dp_dev->xmit_work); return 0; } +static void dp_dev_do_xmit(struct work_struct *work) +{ + struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work); + struct datapath *dp = dp_dev->dp; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) { + skb_reset_mac_header(skb); + rcu_read_lock(); + fwd_port_input(dp->chain, skb, OFPP_LOCAL); + rcu_read_unlock(); + } + netif_wake_queue(dp->netdev); +} + static int dp_dev_open(struct net_device *netdev) { netif_start_queue(netdev); @@ -89,7 +116,7 @@ do_setup(struct net_device *netdev) netdev->hard_start_xmit = dp_dev_xmit; netdev->open = dp_dev_open; netdev->stop = dp_dev_stop; - netdev->tx_queue_len = 0; + netdev->tx_queue_len = 100; netdev->set_mac_address = dp_dev_mac_addr; netdev->flags = IFF_BROADCAST | IFF_MULTICAST; @@ -121,14 +148,19 @@ int dp_dev_setup(struct datapath *dp) dp_dev = dp_dev_priv(netdev); dp_dev->dp = dp; + skb_queue_head_init(&dp_dev->xmit_queue); + INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit); dp->netdev = netdev; return 0; } void dp_dev_destroy(struct datapath *dp) { + struct dp_dev *dp_dev = dp_dev_priv(dp->netdev); + netif_tx_disable(dp->netdev); synchronize_net(); + skb_queue_purge(&dp_dev->xmit_queue); unregister_netdev(dp->netdev); free_netdev(dp->netdev); } -- 2.30.2