datapath: Use u64_stats_sync for datapath and vport stats.
authorJesse Gross <jesse@nicira.com>
Sat, 19 Nov 2011 17:08:56 +0000 (09:08 -0800)
committerJesse Gross <jesse@nicira.com>
Mon, 21 Nov 2011 18:25:19 +0000 (10:25 -0800)
We currently use a seqcount to prevent reading partial 64-bit stats
on 32-bit CPUs.  u64_stats_sync uses the same logic but elides it on
64-bit and uniprocessor machines.  This improves performance (primarily
on non-x86 architectures) at the cost of not guaranteeing that packet
and byte counts were necessarily read together.

Suggested-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
datapath/datapath.c
datapath/datapath.h
datapath/linux/Modules.mk
datapath/linux/compat/include/linux/u64_stats_sync.h [new file with mode: 0644]
datapath/vport.c
datapath/vport.h

index 1ea5d1e9a7c2d850cf989b34c6a5f3e7b53d25ac..adfc76a49f84c47455e1045350321d65cc67e573 100644 (file)
@@ -335,10 +335,9 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
 out:
        /* Update datapath statistics. */
-
-       write_seqcount_begin(&stats->seqlock);
+       u64_stats_update_begin(&stats->sync);
        (*stats_counter)++;
-       write_seqcount_end(&stats->seqlock);
+       u64_stats_update_end(&stats->sync);
 }
 
 static struct genl_family dp_packet_genl_family = {
@@ -381,9 +380,9 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb,
 err:
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 
-       write_seqcount_begin(&stats->seqlock);
+       u64_stats_update_begin(&stats->sync);
        stats->n_lost++;
-       write_seqcount_end(&stats->seqlock);
+       u64_stats_update_end(&stats->sync);
 
        return err;
 }
@@ -836,14 +835,14 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
        for_each_possible_cpu(i) {
                const struct dp_stats_percpu *percpu_stats;
                struct dp_stats_percpu local_stats;
-               unsigned seqcount;
+               unsigned int start;
 
                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 
                do {
-                       seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                       start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
                        local_stats = *percpu_stats;
-               } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+               } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 
                stats->n_hit += local_stats.n_hit;
                stats->n_missed += local_stats.n_missed;
index dc2c75c64b2666c69ac0954494a92ead5d685932..85842215ad52ae9116726c271cef391972ec3023 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
-#include <linux/seqlock.h>
 #include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
 #include <linux/version.h>
 
 #include "checksum.h"
@@ -54,7 +54,7 @@ struct dp_stats_percpu {
        u64 n_hit;
        u64 n_missed;
        u64 n_lost;
-       seqcount_t seqlock;
+       struct u64_stats_sync sync;
 };
 
 /**
index 0857735d527039aa866b073bcdf40cd8d9cc3a37..1f9973b47a3df19921d2a9e98017fc1c69e175c6 100644 (file)
@@ -48,6 +48,7 @@ openvswitch_headers += \
        linux/compat/include/linux/tcp.h \
        linux/compat/include/linux/timer.h \
        linux/compat/include/linux/types.h \
+       linux/compat/include/linux/u64_stats_sync.h \
        linux/compat/include/linux/udp.h \
        linux/compat/include/linux/workqueue.h \
        linux/compat/include/net/checksum.h \
diff --git a/datapath/linux/compat/include/linux/u64_stats_sync.h b/datapath/linux/compat/include/linux/u64_stats_sync.h
new file mode 100644 (file)
index 0000000..45b617a
--- /dev/null
@@ -0,0 +1,147 @@
+#ifndef _LINUX_U64_STATS_SYNC_WRAPPER_H
+#define _LINUX_U64_STATS_SYNC_WRAPPER_H
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+#include_next <linux/u64_stats_sync.h>
+#else
+
+/*
+ * To properly implement 64bits network statistics on 32bit and 64bit hosts,
+ * we provide a synchronization point, that is a noop on 64bit or UP kernels.
+ *
+ * Key points :
+ * 1) Use a seqcount on SMP 32bits, with low overhead.
+ * 2) Whole thing is a noop on 64bit arches or UP kernels.
+ * 3) Write side must ensure mutual exclusion or one seqcount update could
+ *    be lost, thus blocking readers forever.
+ *    If this synchronization point is not a mutex, but a spinlock or
+ *    spinlock_bh() or disable_bh() :
+ * 3.1) Write side should not sleep.
+ * 3.2) Write side should not allow preemption.
+ * 3.3) If applicable, interrupts should be disabled.
+ *
+ * 4) If reader fetches several counters, there is no guarantee the whole values
+ *    are consistent (remember point 1) : this is a noop on 64bit arches anyway)
+ *
+ * 5) readers are allowed to sleep or be preempted/interrupted : They perform
+ *    pure reads. But if they have to fetch many values, it's better to not allow
+ *    preemptions/interruptions to avoid many retries.
+ *
+ * 6) If counter might be written by an interrupt, readers should block interrupts.
+ *    (On UP, there is no seqcount_t protection, a reader allowing interrupts could
+ *     read partial values)
+ *
+ * 7) For softirq uses, readers can use u64_stats_fetch_begin_bh() and
+ *    u64_stats_fetch_retry_bh() helpers
+ *
+ * Usage :
+ *
+ * Stats producer (writer) should use following template granted it already got
+ * an exclusive access to counters (a lock is already taken, or per cpu
+ * data is used [in a non preemptable context])
+ *
+ *   spin_lock_bh(...) or other synchronization to get exclusive access
+ *   ...
+ *   u64_stats_update_begin(&stats->syncp);
+ *   stats->bytes64 += len; // non atomic operation
+ *   stats->packets64++;    // non atomic operation
+ *   u64_stats_update_end(&stats->syncp);
+ *
+ * While a consumer (reader) should use following template to get consistent
+ * snapshot for each variable (but no guarantee on several ones)
+ *
+ * u64 tbytes, tpackets;
+ * unsigned int start;
+ *
+ * do {
+ *         start = u64_stats_fetch_begin(&stats->syncp);
+ *         tbytes = stats->bytes64; // non atomic operation
+ *         tpackets = stats->packets64; // non atomic operation
+ * } while (u64_stats_fetch_retry(&stats->syncp, start));
+ *
+ *
+ * Example of use in drivers/net/loopback.c, using per_cpu containers,
+ * in BH disabled context.
+ */
+#include <linux/seqlock.h>
+
+struct u64_stats_sync {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       seqcount_t      seq;
+#endif
+};
+
+static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       write_seqcount_begin(&syncp->seq);
+#endif
+}
+
+static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       write_seqcount_end(&syncp->seq);
+#endif
+}
+
+static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       return read_seqcount_begin(&syncp->seq);
+#else
+#if BITS_PER_LONG==32
+       preempt_disable();
+#endif
+       return 0;
+#endif
+}
+
+static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+                                        unsigned int start)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       return read_seqcount_retry(&syncp->seq, start);
+#else
+#if BITS_PER_LONG==32
+       preempt_enable();
+#endif
+       return false;
+#endif
+}
+
+/*
+ * In case softirq handlers can update u64 counters, readers can use following helpers
+ * - SMP 32bit arches use seqcount protection, irq safe.
+ * - UP 32bit must disable BH.
+ * - 64bit have no problem atomically reading u64 values, irq safe.
+ */
+static inline unsigned int u64_stats_fetch_begin_bh(const struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       return read_seqcount_begin(&syncp->seq);
+#else
+#if BITS_PER_LONG==32
+       local_bh_disable();
+#endif
+       return 0;
+#endif
+}
+
+static inline bool u64_stats_fetch_retry_bh(const struct u64_stats_sync *syncp,
+                                        unsigned int start)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       return read_seqcount_retry(&syncp->seq, start);
+#else
+#if BITS_PER_LONG==32
+       local_bh_enable();
+#endif
+       return false;
+#endif
+}
+
+#endif /* Linux kernel < 2.6.36 */
+#endif /* _LINUX_U64_STATS_SYNC_WRAPPER_H */
index 91011d2cb6f84fe232c380aacb2e489fe5cbb259..04ae50e55f4f29f561413cf5976034369a91c00c 100644 (file)
@@ -376,14 +376,14 @@ void vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
        for_each_possible_cpu(i) {
                const struct vport_percpu_stats *percpu_stats;
                struct vport_percpu_stats local_stats;
-               unsigned seqcount;
+               unsigned int start;
 
                percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
 
                do {
-                       seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                       start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
                        local_stats = *percpu_stats;
-               } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+               } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 
                stats->rx_bytes         += local_stats.rx_bytes;
                stats->rx_packets       += local_stats.rx_packets;
@@ -444,10 +444,10 @@ void vport_receive(struct vport *vport, struct sk_buff *skb)
 
        stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
 
-       write_seqcount_begin(&stats->seqlock);
+       u64_stats_update_begin(&stats->sync);
        stats->rx_packets++;
        stats->rx_bytes += skb->len;
-       write_seqcount_end(&stats->seqlock);
+       u64_stats_update_end(&stats->sync);
 
        if (!(vport->ops->flags & VPORT_F_FLOW))
                OVS_CB(skb)->flow = NULL;
@@ -476,10 +476,10 @@ int vport_send(struct vport *vport, struct sk_buff *skb)
 
                stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
 
-               write_seqcount_begin(&stats->seqlock);
+               u64_stats_update_begin(&stats->sync);
                stats->tx_packets++;
                stats->tx_bytes += sent;
-               write_seqcount_end(&stats->seqlock);
+               u64_stats_update_end(&stats->sync);
        }
        return sent;
 }
index 0f5f0779696c3e348c1685e5a890d6ca57fa2cd8..8f03f2e981538e5b1e43afcea19b27f2128f5354 100644 (file)
@@ -21,9 +21,9 @@
 
 #include <linux/list.h>
 #include <linux/openvswitch.h>
-#include <linux/seqlock.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/u64_stats_sync.h>
 
 #include "datapath.h"
 
@@ -56,7 +56,7 @@ struct vport_percpu_stats {
        u64 rx_packets;
        u64 tx_bytes;
        u64 tx_packets;
-       seqcount_t seqlock;
+       struct u64_stats_sync sync;
 };
 
 struct vport_err_stats {