From ee5f04b7c29897d96b06ebb119ee131ea84183ec Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 23 May 2012 17:25:41 -0700 Subject: [PATCH] datapath: Avoid system freeze due to ovs-flow-rehash softlockup. OVS datapath does periodic flow table rehash which takes genl_lock in workq context. In some cases, like ports add or delete, genl_lock can cause softlockup as vswitchd would take and succeed with genl_lock and rehash workq would block on the lock. Eventually rehash will proceed, flow rehash is low priority task so this is not problem for rehashing. But it is blocking workq thread; some other workq item from other kernel subsystem would be blocked and can cause system freeze. To avoid workq blocking and system freeze, we can use OVS compat workq. It runs in separate kernel thread thus does not block any non-ovs deferred workq work item. We will fix it by making genetlink lockless and having fine granular locking in OVS. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- datapath/linux/compat/include/linux/workqueue.h | 12 ++++-------- datapath/linux/compat/workqueue.c | 3 --- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/datapath/linux/compat/include/linux/workqueue.h b/datapath/linux/compat/include/linux/workqueue.h index 919afe35..79158f96 100644 --- a/datapath/linux/compat/include/linux/workqueue.h +++ b/datapath/linux/compat/include/linux/workqueue.h @@ -1,23 +1,19 @@ #ifndef __LINUX_WORKQUEUE_WRAPPER_H #define __LINUX_WORKQUEUE_WRAPPER_H 1 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) -#include_next -static inline int __init ovs_workqueues_init(void) { return 0; } -static inline void ovs_workqueues_exit(void) {} - -#else #include int __init ovs_workqueues_init(void); void ovs_workqueues_exit(void); - /* Older kernels have an implementation of work queues with some very bad * characteristics when trying to cancel work (potential deadlocks, use after * free, etc. Therefore we implement simple ovs specific work queue using * single worker thread. work-queue API are kept similar for compatibility. + * It seems it is useful even on newer kernel. As it can avoid system wide + * freeze in event of softlockup due to workq blocked on genl_lock. */ + struct work_struct; typedef void (*work_func_t)(struct work_struct *work); @@ -68,6 +64,6 @@ int cancel_delayed_work_sync(struct delayed_work *dwork); (_work)->func = (_func); \ } while (0) -#endif /* kernel version < 2.6.23 */ +extern void flush_scheduled_work(void); #endif diff --git a/datapath/linux/compat/workqueue.c b/datapath/linux/compat/workqueue.c index 883665bf..9934f1a3 100644 --- a/datapath/linux/compat/workqueue.c +++ b/datapath/linux/compat/workqueue.c @@ -23,8 +23,6 @@ #include #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - static spinlock_t wq_lock; static struct list_head workq; static wait_queue_head_t more_work; @@ -210,4 +208,3 @@ void ovs_workqueues_exit(void) BUG_ON(!list_empty(&workq)); kthread_stop(workq_thread); } -#endif -- 2.30.2