From: Pravin B Shelar Date: Fri, 7 Oct 2011 04:52:39 +0000 (-0700) Subject: datapath: Remove RT kernel support. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e9141eec249f97e547c6e47205b61ee98d9b52e9;p=openvswitch datapath: Remove RT kernel support. Following patch removes RT kernel support. This allows us to cleanup the loop detection. Along with this BH is now disabled while running execute_actions() for packet from user-space. As a result we can simplify the stats code as entire send and receive path runs in BH context on all supported platforms. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Bug #7621 --- diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 3fb10a06..087cf44b 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -17,7 +17,6 @@ openvswitch_sources = \ dp_sysfs_dp.c \ dp_sysfs_if.c \ flow.c \ - loop_counter.c \ tunnel.c \ vlan.c \ vport.c \ @@ -35,7 +34,6 @@ openvswitch_headers = \ datapath.h \ dp_sysfs.h \ flow.h \ - loop_counter.h \ tunnel.h \ vlan.h \ vport.h \ diff --git a/datapath/actions.c b/datapath/actions.c index 36437a4b..945c461c 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -8,6 +8,8 @@ /* Functions for executing flow actions. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -23,7 +25,6 @@ #include "actions.h" #include "checksum.h" #include "datapath.h" -#include "loop_counter.h" #include "openvswitch/datapath-protocol.h" #include "vlan.h" #include "vport.h" @@ -380,6 +381,26 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return 0; } +/* We limit the number of times that we pass into execute_actions() + * to avoid blowing out the stack in the event that we have a loop. */ +#define MAX_LOOPS 5 + +struct loop_counter { + u8 count; /* Count. */ + bool looping; /* Loop detected? */ +}; + +static DEFINE_PER_CPU(struct loop_counter, loop_counters); + +static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions) +{ + if (net_ratelimit()) + pr_warn("%s: flow looped %d times, dropping\n", + dp_name(dp), MAX_LOOPS); + actions->actions_len = 0; + return -ELOOP; +} + /* Execute a list of actions against 'skb'. */ int execute_actions(struct datapath *dp, struct sk_buff *skb) { @@ -388,7 +409,7 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb) int error; /* Check whether we've looped too much. */ - loop = loop_get_counter(); + loop = &__get_cpu_var(loop_counters); if (unlikely(++loop->count > MAX_LOOPS)) loop->looping = true; if (unlikely(loop->looping)) { @@ -409,7 +430,6 @@ out_loop: /* Decrement loop counter. */ if (!--loop->count) loop->looping = false; - loop_put_counter(); return error; } diff --git a/datapath/datapath.c b/datapath/datapath.c index a925f6f2..07188ad5 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -280,9 +280,10 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) struct datapath *dp = p->dp; struct sw_flow *flow; struct dp_stats_percpu *stats; - int stats_counter_off; + u64 *stats_counter; int error; + stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); OVS_CB(skb)->vport = p; if (!OVS_CB(skb)->flow) { @@ -299,7 +300,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) if (is_frag && dp->drop_frags) { consume_skb(skb); - stats_counter_off = offsetof(struct dp_stats_percpu, n_frags); + stats_counter = &stats->n_frags; goto out; } @@ -312,27 +313,23 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.key = &key; dp_upcall(dp, skb, &upcall); kfree_skb(skb); - stats_counter_off = offsetof(struct dp_stats_percpu, n_missed); + stats_counter = &stats->n_missed; goto out; } OVS_CB(skb)->flow = flow; } - stats_counter_off = offsetof(struct dp_stats_percpu, n_hit); + stats_counter = &stats->n_hit; flow_used(OVS_CB(skb)->flow, skb); execute_actions(dp, skb); out: /* Update datapath statistics. */ - local_bh_disable(); - stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); write_seqcount_begin(&stats->seqlock); - (*(u64 *)((u8 *)stats + stats_counter_off))++; + (*stats_counter)++; write_seqcount_end(&stats->seqlock); - - local_bh_enable(); } static void copy_and_csum_skb(struct sk_buff *skb, void *to) @@ -406,15 +403,12 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb, return 0; err: - local_bh_disable(); stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); write_seqcount_begin(&stats->seqlock); stats->n_lost++; write_seqcount_end(&stats->seqlock); - local_bh_enable(); - return err; } @@ -706,7 +700,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) OVS_CB(packet)->vport = get_vport_protected(dp, flow->key.eth.in_port); + local_bh_disable(); err = execute_actions(dp, packet); + local_bh_enable(); rcu_read_unlock(); flow_put(flow); diff --git a/datapath/flow.c b/datapath/flow.c index 2d08c9d7..73222954 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -173,12 +173,12 @@ void flow_used(struct sw_flow *flow, struct sk_buff *skb) tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; } - spin_lock_bh(&flow->lock); + spin_lock(&flow->lock); flow->used = jiffies; flow->packet_count++; flow->byte_count += skb->len; flow->tcp_flags |= tcp_flags; - spin_unlock_bh(&flow->lock); + spin_unlock(&flow->lock); } struct sw_flow_actions *flow_actions_alloc(const struct nlattr *actions) diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index cb680100..1e4fef6d 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -11,7 +11,6 @@ openvswitch_sources += \ linux/compat/time.c openvswitch_headers += \ linux/compat/include/asm-generic/bug.h \ - linux/compat/include/linux/bottom_half.h \ linux/compat/include/linux/compiler.h \ linux/compat/include/linux/compiler-gcc.h \ linux/compat/include/linux/cpumask.h \ diff --git a/datapath/linux/compat/include/linux/bottom_half.h b/datapath/linux/compat/include/linux/bottom_half.h deleted file mode 100644 index 59eb4bca..00000000 --- a/datapath/linux/compat/include/linux/bottom_half.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __LINUX_BH_WRAPPER_H -#define __LINUX_BH_WRAPPER_H 1 - -#include_next - -/* This is not, strictly speaking, compatibility code in the sense that it is - * not needed by older kernels. However, it is used on kernels with the - * realtime patchset applied to create an environment more similar to what we - * would see on normal kernels. - */ - -#ifdef CONFIG_PREEMPT_HARDIRQS -#undef local_bh_disable -#define local_bh_disable preempt_disable -#undef local_bh_enable -#define local_bh_enable preempt_enable -#endif - -#endif diff --git a/datapath/loop_counter.c b/datapath/loop_counter.c deleted file mode 100644 index 3e1d890a..00000000 --- a/datapath/loop_counter.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2010, 2011 Nicira Networks. - * - * Significant portions of this file may be copied from parts of the Linux - * kernel, by Linus Torvalds and others. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include - -#include "loop_counter.h" - -int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions) -{ - if (net_ratelimit()) - pr_warn("%s: flow looped %d times, dropping\n", - dp_name(dp), MAX_LOOPS); - actions->actions_len = 0; - return -ELOOP; -} - -#ifndef CONFIG_PREEMPT_RT - -/* We use a separate counter for each CPU for both interrupt and non-interrupt - * context in order to keep the limit deterministic for a given packet. - */ -struct percpu_loop_counters { - struct loop_counter counters[2]; -}; - -static DEFINE_PER_CPU(struct percpu_loop_counters, loop_counters); - -struct loop_counter *loop_get_counter(void) -{ - return &get_cpu_var(loop_counters).counters[!!in_interrupt()]; -} - -void loop_put_counter(void) -{ - put_cpu_var(loop_counters); -} - -#else /* !CONFIG_PREEMPT_RT */ - -struct loop_counter *loop_get_counter(void) -{ - WARN_ON(in_interrupt()); - - /* Only two bits of the extra_flags field in struct task_struct are - * used and it's an unsigned int. We hijack the most significant bits - * to be our counter structure. On RT kernels softirqs always run in - * process context so we are guaranteed to have a valid task_struct. - */ - -#ifdef __LITTLE_ENDIAN - return (void *)(¤t->extra_flags + 1) - - sizeof(struct loop_counter); -#elif __BIG_ENDIAN - return (struct loop_counter *)¤t->extra_flags; -#else -#error "Please fix ." -#endif -} - -void loop_put_counter(void) { } - -#endif /* CONFIG_PREEMPT_RT */ diff --git a/datapath/loop_counter.h b/datapath/loop_counter.h deleted file mode 100644 index 0d1fdf96..00000000 --- a/datapath/loop_counter.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2010, 2011 Nicira Networks. - * Distributed under the terms of the GNU GPL version 2. - * - * Significant portions of this file may be copied from parts of the Linux - * kernel, by Linus Torvalds and others. - */ - -#ifndef LOOP_COUNTER_H -#define LOOP_COUNTER_H 1 - -#include "datapath.h" -#include "flow.h" - -/* We limit the number of times that we pass into dp_process_received_packet() - * to avoid blowing out the stack in the event that we have a loop. */ -#define MAX_LOOPS 5 - -struct loop_counter { - u8 count; /* Count. */ - bool looping; /* Loop detected? */ -}; - -struct loop_counter *loop_get_counter(void); -void loop_put_counter(void); -int loop_suppress(struct datapath *, struct sw_flow_actions *); - -#endif /* loop_counter.h */ diff --git a/datapath/tunnel.c b/datapath/tunnel.c index 3576df84..3bf58ddf 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -813,9 +813,9 @@ static inline void create_eth_hdr(struct tnl_cache *cache, cache->hh_seq = hh_seq; #else - read_lock_bh(&rt_dst(rt).hh->hh_lock); + read_lock(&rt_dst(rt).hh->hh_lock); memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len); - read_unlock_bh(&rt_dst(rt).hh->hh_lock); + read_unlock(&rt_dst(rt).hh->hh_lock); #endif } @@ -842,7 +842,7 @@ static struct tnl_cache *build_cache(struct vport *vport, * If lock is contended fall back to directly building the header. * We're not going to help performance by sitting here spinning. */ - if (!spin_trylock_bh(&tnl_vport->cache_lock)) + if (!spin_trylock(&tnl_vport->cache_lock)) return NULL; cache = cache_dereference(tnl_vport); @@ -910,7 +910,7 @@ done: assign_cache_rcu(vport, cache); unlock: - spin_unlock_bh(&tnl_vport->cache_lock); + spin_unlock(&tnl_vport->cache_lock); return cache; } diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c index 1c6897f4..b503b877 100644 --- a/datapath/vport-internal_dev.c +++ b/datapath/vport-internal_dev.c @@ -251,10 +251,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) skb->protocol = eth_type_trans(skb, netdev); forward_ip_summed(skb, false); - if (in_interrupt()) - netif_rx(skb); - else - netif_rx_ni(skb); + netif_rx(skb); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) netdev->last_rx = jiffies; diff --git a/datapath/vport.c b/datapath/vport.c index 8826e75d..ad5a10e7 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -563,7 +563,6 @@ void vport_receive(struct vport *vport, struct sk_buff *skb) { struct vport_percpu_stats *stats; - local_bh_disable(); stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); write_seqcount_begin(&stats->seqlock); @@ -571,8 +570,6 @@ void vport_receive(struct vport *vport, struct sk_buff *skb) stats->rx_bytes += skb->len; write_seqcount_end(&stats->seqlock); - local_bh_enable(); - if (!(vport->ops->flags & VPORT_F_FLOW)) OVS_CB(skb)->flow = NULL; @@ -596,7 +593,6 @@ int vport_send(struct vport *vport, struct sk_buff *skb) struct vport_percpu_stats *stats; int sent = vport->ops->send(vport, skb); - local_bh_disable(); stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); write_seqcount_begin(&stats->seqlock); @@ -604,8 +600,6 @@ int vport_send(struct vport *vport, struct sk_buff *skb) stats->tx_bytes += sent; write_seqcount_end(&stats->seqlock); - local_bh_enable(); - return sent; } @@ -620,7 +614,7 @@ int vport_send(struct vport *vport, struct sk_buff *skb) */ void vport_record_error(struct vport *vport, enum vport_err_type err_type) { - spin_lock_bh(&vport->stats_lock); + spin_lock(&vport->stats_lock); switch (err_type) { case VPORT_E_RX_DROPPED: @@ -640,5 +634,5 @@ void vport_record_error(struct vport *vport, enum vport_err_type err_type) break; }; - spin_unlock_bh(&vport->stats_lock); + spin_unlock(&vport->stats_lock); }