datpath: Avoid reporting half updated statistics.
authorJesse Gross <jesse@nicira.com>
Thu, 29 Jul 2010 01:20:43 +0000 (18:20 -0700)
committerJesse Gross <jesse@nicira.com>
Sat, 21 Aug 2010 02:44:46 +0000 (19:44 -0700)
We enforce mutual exclusion when updating statistics by disabling
bottom halves and only writing to per-CPU state.  However, reading
requires looking at the statistics for foreign CPUs, which could be
in the process of updating them since there isn't a lock.  This means
we could get garbage values for 64-bit values on 32-bit machines or
byte counts that don't correspond to packet counts, etc.

This commit introduces a sequence lock for statistics values to avoid
this problem.  Getting a write lock is very cheap - it only requires
incrementing a counter plus a memory barrier (which is compiled away
on x86) to acquire or release the lock and will never block.  On
read we spin until the sequence number hasn't changed in the middle
of the operation, indicating that the we have a consistent set of
values.

Signed-off-by: Jesse Gross <jesse@nicira.com>
datapath/datapath.c
datapath/datapath.h
datapath/vport.c
datapath/vport.h

index e46819876b9caf11a8748230db675bb06f3dd6ea..32572c6f9d5a2bced7cdc005a3e4d0ec45cc1540 100644 (file)
@@ -599,7 +599,11 @@ out:
        /* Update datapath statistics. */
        local_bh_disable();
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
        (*(u64 *)((u8 *)stats + stats_counter_off))++;
+       write_seqcount_end(&stats->seqlock);
+
        local_bh_enable();
 }
 
@@ -860,7 +864,11 @@ err_kfree_skb:
 err:
        local_bh_disable();
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
        stats->n_lost++;
+       write_seqcount_end(&stats->seqlock);
+
        local_bh_enable();
 
        return err;
@@ -1394,12 +1402,21 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
        stats.max_groups = DP_MAX_GROUPS;
        stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
        for_each_possible_cpu(i) {
-               const struct dp_stats_percpu *s;
-               s = per_cpu_ptr(dp->stats_percpu, i);
-               stats.n_frags += s->n_frags;
-               stats.n_hit += s->n_hit;
-               stats.n_missed += s->n_missed;
-               stats.n_lost += s->n_lost;
+               const struct dp_stats_percpu *percpu_stats;
+               struct dp_stats_percpu local_stats;
+               unsigned seqcount;
+
+               percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+               do {
+                       seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                       local_stats = *percpu_stats;
+               } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+
+               stats.n_frags += local_stats.n_frags;
+               stats.n_hit += local_stats.n_hit;
+               stats.n_missed += local_stats.n_missed;
+               stats.n_lost += local_stats.n_lost;
        }
        stats.max_miss_queue = DP_MAX_QUEUE_LEN;
        stats.max_action_queue = DP_MAX_QUEUE_LEN;
index 8e272836e0d05964d280dd57872d784dda623212..fd81dfbad0866427401e345e1ff39ffbe4d6dece 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/workqueue.h>
+#include <linux/seqlock.h>
 #include <linux/skbuff.h>
 #include <linux/version.h>
 #include "flow.h"
@@ -53,6 +54,7 @@ struct dp_stats_percpu {
        u64 n_hit;
        u64 n_missed;
        u64 n_lost;
+       seqcount_t seqlock;
 };
 
 struct dp_port_group {
index dd1c31f3c71b34f853e31cd8b3ffda0cbee8d8f3..cdf615a47b8b135c631988ba6d52e9666391bab3 100644 (file)
@@ -1052,12 +1052,20 @@ int vport_get_stats(struct vport *vport, struct odp_vport_stats *stats)
 
                for_each_possible_cpu(i) {
                        const struct vport_percpu_stats *percpu_stats;
+                       struct vport_percpu_stats local_stats;
+                       unsigned seqcount;
 
                        percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
-                       stats->rx_bytes         += percpu_stats->rx_bytes;
-                       stats->rx_packets       += percpu_stats->rx_packets;
-                       stats->tx_bytes         += percpu_stats->tx_bytes;
-                       stats->tx_packets       += percpu_stats->tx_packets;
+
+                       do {
+                               seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                               local_stats = *percpu_stats;
+                       } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+
+                       stats->rx_bytes         += local_stats.rx_bytes;
+                       stats->rx_packets       += local_stats.rx_packets;
+                       stats->tx_bytes         += local_stats.tx_bytes;
+                       stats->tx_packets       += local_stats.tx_packets;
                }
 
                err = 0;
@@ -1192,10 +1200,12 @@ void vport_receive(struct vport *vport, struct sk_buff *skb)
                struct vport_percpu_stats *stats;
 
                local_bh_disable();
-
                stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+               write_seqcount_begin(&stats->seqlock);
                stats->rx_packets++;
                stats->rx_bytes += skb->len;
+               write_seqcount_end(&stats->seqlock);
 
                local_bh_enable();
        }
@@ -1244,10 +1254,12 @@ int vport_send(struct vport *vport, struct sk_buff *skb)
                struct vport_percpu_stats *stats;
 
                local_bh_disable();
-
                stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+               write_seqcount_begin(&stats->seqlock);
                stats->tx_packets++;
                stats->tx_bytes += sent;
+               write_seqcount_end(&stats->seqlock);
 
                local_bh_enable();
        }
index a5c17f6e5211a1c3bae26f631e1a3bb1ca3f76bc..0a6801d97efc0c1db472a9d459fee9bb94c58ec4 100644 (file)
@@ -10,6 +10,7 @@
 #define VPORT_H 1
 
 #include <linux/list.h>
+#include <linux/seqlock.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 
@@ -83,6 +84,7 @@ struct vport_percpu_stats {
        u64 rx_packets;
        u64 tx_bytes;
        u64 tx_packets;
+       seqcount_t seqlock;
 };
 
 struct vport_err_stats {