datapath: Fix OVS compat workqueue support.
authorPravin B Shelar <pshelar@nicira.com>
Thu, 5 Jan 2012 01:22:07 +0000 (17:22 -0800)
committerPravin B Shelar <pshelar@nicira.com>
Thu, 5 Jan 2012 01:22:07 +0000 (17:22 -0800)
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
datapath/datapath.c
datapath/linux/Modules.mk
datapath/linux/compat/include/linux/workqueue.h
datapath/linux/compat/workqueue.c [new file with mode: 0644]

index 281e86bf34cf1754d8f6b9aeb74c6c563a55e377..754cb32ffc677fd4c54d99f5041d913a845a653e 100644 (file)
@@ -2054,10 +2054,14 @@ static int __init dp_init(void)
        if (err)
                goto error;
 
-       err = ovs_tnl_init();
+       err = ovs_workqueues_init();
        if (err)
                goto error_genl_exec;
 
+       err = ovs_tnl_init();
+       if (err)
+               goto error_wq;
+
        err = ovs_flow_init();
        if (err)
                goto error_tnl_exit;
@@ -2084,6 +2088,8 @@ error_flow_exit:
        ovs_flow_exit();
 error_tnl_exit:
        ovs_tnl_exit();
+error_wq:
+       ovs_workqueues_exit();
 error_genl_exec:
        genl_exec_exit();
 error:
@@ -2098,6 +2104,7 @@ static void dp_cleanup(void)
        ovs_vport_exit();
        ovs_flow_exit();
        ovs_tnl_exit();
+       ovs_workqueues_exit();
        genl_exec_exit();
 }
 
index f6cb88edddd368a73b70047dfe30250edda4e88f..40c3927acf52cf15212d56bb2087087553077859 100644 (file)
@@ -9,7 +9,8 @@ openvswitch_sources += \
        linux/compat/netdevice.c \
        linux/compat/reciprocal_div.c \
        linux/compat/skbuff-openvswitch.c \
-       linux/compat/time.c
+       linux/compat/time.c     \
+       linux/compat/workqueue.c
 openvswitch_headers += \
        linux/compat/include/linux/compiler.h \
        linux/compat/include/linux/compiler-gcc.h \
index 01c6345e9370d3e57986e33dfd9781c1c218d590..919afe350b1036657f5221e563014bd36bf0d1f8 100644 (file)
@@ -1,41 +1,73 @@
 #ifndef __LINUX_WORKQUEUE_WRAPPER_H
 #define __LINUX_WORKQUEUE_WRAPPER_H 1
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
 #include_next <linux/workqueue.h>
+static inline int __init ovs_workqueues_init(void) { return 0; }
+static inline void  ovs_workqueues_exit(void) {}
+
+#else
+#include <linux/timer.h>
+
+int __init ovs_workqueues_init(void);
+void ovs_workqueues_exit(void);
 
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
 
 /* Older kernels have an implementation of work queues with some very bad
  * characteristics when trying to cancel work (potential deadlocks, use after
- * free, etc.  Here we directly use timers instead for delayed work.  It's not
- * optimal but it is better than the alternative.  Note that work queues
- * normally run in process context but this will cause them to operate in
- * softirq context.
+ * free, etc.  Therefore we implement simple ovs specific work queue using
+ * single worker thread. work-queue API are kept similar for compatibility.
  */
+struct work_struct;
 
-#include <linux/timer.h>
+typedef void (*work_func_t)(struct work_struct *work);
 
-#undef DECLARE_DELAYED_WORK
-#define DECLARE_DELAYED_WORK(n, f) \
-       struct timer_list n = TIMER_INITIALIZER((void (*)(unsigned long))f, 0, 0)
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
 
-#define schedule_delayed_work rpl_schedule_delayed_work
-static inline int schedule_delayed_work(struct timer_list *timer, unsigned long delay)
-{
-       if (timer_pending(timer))
-               return 0;
+struct work_struct {
+#define WORK_STRUCT_PENDING 0           /* T if work item pending execution */
+       atomic_long_t data;
+       struct list_head entry;
+       work_func_t func;
+};
+
+#define WORK_DATA_INIT()        ATOMIC_LONG_INIT(0)
+
+#define work_clear_pending(work)                               \
+       clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+
+struct delayed_work {
+       struct work_struct work;
+       struct timer_list timer;
+};
 
-       mod_timer(timer, jiffies + delay);
-       return 1;
+#define __WORK_INITIALIZER(n, f) {                             \
+       .data = WORK_DATA_INIT(),                               \
+       .entry  = { &(n).entry, &(n).entry },                   \
+       .func = (f),                                            \
 }
 
-#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
-static inline int cancel_delayed_work_sync(struct timer_list *timer)
-{
-       return del_timer_sync(timer);
+#define __DELAYED_WORK_INITIALIZER(n, f) {                     \
+       .work = __WORK_INITIALIZER((n).work, (f)),              \
+       .timer = TIMER_INITIALIZER(NULL, 0, 0),                 \
 }
 
+#define DECLARE_DELAYED_WORK(n, f)                             \
+       struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+
+#define schedule_delayed_work rpl_schedule_delayed_work
+int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
+
+#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
+int cancel_delayed_work_sync(struct delayed_work *dwork);
+
+#define INIT_WORK(_work, _func)                                        \
+       do {                                                    \
+               (_work)->data = (atomic_long_t) WORK_DATA_INIT();       \
+               INIT_LIST_HEAD(&(_work)->entry);                \
+               (_work)->func = (_func);                        \
+       } while (0)
+
 #endif /* kernel version < 2.6.23 */
 
 #endif
diff --git a/datapath/linux/compat/workqueue.c b/datapath/linux/compat/workqueue.c
new file mode 100644 (file)
index 0000000..883665b
--- /dev/null
@@ -0,0 +1,213 @@
+/*
+ * Derived from the kernel/workqueue.c
+ *
+ * This is the generic async execution mechanism.  Work items as are
+ * executed in process context.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/hardirq.h>
+#include <linux/mempolicy.h>
+#include <linux/kallsyms.h>
+#include <linux/debug_locks.h>
+#include <linux/lockdep.h>
+#include <linux/idr.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+static spinlock_t wq_lock;
+static struct list_head workq;
+static wait_queue_head_t more_work;
+static struct task_struct *workq_thread;
+static struct work_struct *current_work;
+
+static void queue_work(struct work_struct *work)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&wq_lock, flags);
+       list_add_tail(&work->entry, &workq);
+       wake_up(&more_work);
+       spin_unlock_irqrestore(&wq_lock, flags);
+}
+
+static void _delayed_work_timer_fn(unsigned long __data)
+{
+       struct delayed_work *dwork = (struct delayed_work *)__data;
+       queue_work(&dwork->work);
+}
+
+static void __queue_delayed_work(struct delayed_work *dwork,
+               unsigned long delay)
+{
+       struct timer_list *timer = &dwork->timer;
+       struct work_struct *work = &dwork->work;
+
+       BUG_ON(timer_pending(timer));
+       BUG_ON(!list_empty(&work->entry));
+
+       timer->expires = jiffies + delay;
+       timer->data = (unsigned long)dwork;
+       timer->function = _delayed_work_timer_fn;
+
+       add_timer(timer);
+}
+
+int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
+{
+       if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(&dwork->work)))
+               return 0;
+
+       if (delay == 0)
+               queue_work(&dwork->work);
+       else
+               __queue_delayed_work(dwork, delay);
+
+       return 1;
+}
+
+struct wq_barrier {
+       struct work_struct      work;
+       struct completion       done;
+};
+
+static void wq_barrier_func(struct work_struct *work)
+{
+       struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+       complete(&barr->done);
+}
+
+static void workqueue_barrier(struct work_struct *work)
+{
+       bool need_barrier;
+       struct wq_barrier barr;
+
+       spin_lock_irq(&wq_lock);
+       if (current_work != work)
+               need_barrier = false;
+       else {
+               INIT_WORK(&barr.work, wq_barrier_func);
+               init_completion(&barr.done);
+               list_add(&barr.work.entry, &workq);
+               wake_up(&more_work);
+               need_barrier = true;
+       }
+       spin_unlock_irq(&wq_lock);
+
+       if (need_barrier)
+               wait_for_completion(&barr.done);
+}
+
+static int try_to_grab_pending(struct work_struct *work)
+{
+       int ret;
+
+       BUG_ON(in_interrupt());
+
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
+               return 0;
+
+       spin_lock_irq(&wq_lock);
+       if (!list_empty(&work->entry)) {
+               list_del_init(&work->entry);
+               ret = 0;
+       } else
+               /* Already executed, retry. */
+               ret = -1;
+       spin_unlock_irq(&wq_lock);
+
+       return ret;
+}
+
+static int __cancel_work_timer(struct work_struct *work,
+                              struct timer_list *timer)
+{
+       int ret;
+
+       for (;;) {
+               ret = (timer && likely(del_timer(timer)));
+               if (ret) /* Was active timer, return true. */
+                       break;
+
+               /* Inactive timer case */
+               ret = try_to_grab_pending(work);
+               if (!ret)
+                       break;
+       }
+       workqueue_barrier(work);
+       work_clear_pending(work);
+       return ret;
+}
+
+int cancel_delayed_work_sync(struct delayed_work *dwork)
+{
+       return __cancel_work_timer(&dwork->work, &dwork->timer);
+}
+
+static void run_workqueue(void)
+{
+       spin_lock_irq(&wq_lock);
+       while (!list_empty(&workq)) {
+               struct work_struct *work = list_entry(workq.next,
+                               struct work_struct, entry);
+
+               work_func_t f = work->func;
+               list_del_init(workq.next);
+               current_work = work;
+               spin_unlock_irq(&wq_lock);
+
+               work_clear_pending(work);
+               f(work);
+
+               BUG_ON(in_interrupt());
+               spin_lock_irq(&wq_lock);
+               current_work = NULL;
+       }
+       spin_unlock_irq(&wq_lock);
+}
+
+static int worker_thread(void *dummy)
+{
+       for (;;) {
+               wait_event_interruptible(more_work,
+                               (kthread_should_stop() || !list_empty(&workq)));
+
+               if (kthread_should_stop())
+                       break;
+
+               run_workqueue();
+       }
+
+       return 0;
+}
+
+int __init ovs_workqueues_init(void)
+{
+       spin_lock_init(&wq_lock);
+       INIT_LIST_HEAD(&workq);
+       init_waitqueue_head(&more_work);
+
+       workq_thread = kthread_create(worker_thread, NULL, "ovs_workq");
+       if (IS_ERR(workq_thread))
+               return PTR_ERR(workq_thread);
+
+       wake_up_process(workq_thread);
+       return 0;
+}
+
+void  ovs_workqueues_exit(void)
+{
+       BUG_ON(!list_empty(&workq));
+       kthread_stop(workq_thread);
+}
+#endif