timeval: Log additional statistics along with wall-clock time.
authorBen Pfaff <blp@nicira.com>
Tue, 2 Jun 2009 20:56:53 +0000 (13:56 -0700)
committerBen Pfaff <blp@nicira.com>
Tue, 2 Jun 2009 21:30:15 +0000 (14:30 -0700)
It's sometimes unclear whether a process was simply descheduled during its
run for an extended interval, or whether it actually did a lot of work.  By
also logging system and user times, number of context switches, etc., we
can help out with debugging.

lib/timeval.c

index da72d9482066a3638b98ff61bcbb88e67e4a0ac4..37ca11f2a122557bdd32694bdfc3bfdba176f5e1 100644 (file)
@@ -39,6 +39,8 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
 #include "coverage.h"
 #include "fatal-signal.h"
 #include "util.h"
@@ -63,7 +65,9 @@ static void refresh_if_ticked(void);
 static time_t time_add(time_t, time_t);
 static void block_sigalrm(sigset_t *);
 static void unblock_sigalrm(const sigset_t *);
-static void log_poll_interval(long long int last_wakeup);
+static void log_poll_interval(long long int last_wakeup,
+                              const struct rusage *last_rusage);
+static long long int timeval_to_msec(const struct timeval *);
 
 /* Initializes the timetracking module. */
 void
@@ -121,7 +125,7 @@ long long int
 time_msec(void)
 {
     refresh_if_ticked();
-    return (long long int) now.tv_sec * 1000 + now.tv_usec / 1000;
+    return timeval_to_msec(&now);
 }
 
 /* Stores the current time, accurate within TIME_UPDATE_INTERVAL ms, into
@@ -160,13 +164,14 @@ int
 time_poll(struct pollfd *pollfds, int n_pollfds, int timeout)
 {
     static long long int last_wakeup;
+    static struct rusage last_rusage;
     long long int start;
     sigset_t oldsigs;
     bool blocked;
     int retval;
 
     time_refresh();
-    log_poll_interval(last_wakeup);
+    log_poll_interval(last_wakeup, &last_rusage);
     coverage_clear();
     start = time_msec();
     blocked = false;
@@ -197,6 +202,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, int timeout)
         unblock_sigalrm(&oldsigs);
     }
     last_wakeup = time_msec();
+    getrusage(RUSAGE_SELF, &last_rusage);
     return retval;
 }
 
@@ -246,8 +252,20 @@ unblock_sigalrm(const sigset_t *oldsigs)
     }
 }
 
+static long long int
+timeval_to_msec(const struct timeval *tv)
+{
+    return (long long int) tv->tv_sec * 1000 + tv->tv_usec / 1000;
+}
+
+static long long int
+timeval_diff_msec(const struct timeval *a, const struct timeval *b)
+{
+    return timeval_to_msec(a) - timeval_to_msec(b);
+}
+
 static void
-log_poll_interval(long long int last_wakeup)
+log_poll_interval(long long int last_wakeup, const struct rusage *last_rusage)
 {
     static unsigned int mean_interval; /* In 16ths of a millisecond. */
     static unsigned int n_samples;
@@ -262,10 +280,35 @@ log_poll_interval(long long int last_wakeup)
 
     /* Warn if we took too much time between polls. */
     if (n_samples > 10 && interval > mean_interval * 8) {
-        VLOG_WARN("%u ms poll interval is over %u times "
-                  "the weighted mean interval %u ms (%u samples)",
-                  (interval + 8) / 16, interval / mean_interval,
+        struct rusage rusage;
+
+        getrusage(RUSAGE_SELF, &rusage);
+        VLOG_WARN("%u ms poll interval (%lld ms user, %lld ms system) "
+                  "is over %u times the weighted mean interval %u ms "
+                  "(%u samples)",
+                  (interval + 8) / 16,
+                  timeval_diff_msec(&rusage.ru_utime, &last_rusage->ru_utime),
+                  timeval_diff_msec(&rusage.ru_stime, &last_rusage->ru_stime),
+                  interval / mean_interval,
                   (mean_interval + 8) / 16, n_samples);
+        if (rusage.ru_minflt > last_rusage->ru_minflt
+            || rusage.ru_majflt > last_rusage->ru_majflt) {
+            VLOG_WARN("faults: %ld minor, %ld major",
+                      rusage.ru_minflt - last_rusage->ru_minflt,
+                      rusage.ru_majflt - last_rusage->ru_majflt);
+        }
+        if (rusage.ru_inblock > last_rusage->ru_inblock
+            || rusage.ru_oublock > last_rusage->ru_oublock) {
+            VLOG_WARN("disk: %ld reads, %ld writes",
+                      rusage.ru_inblock - last_rusage->ru_inblock,
+                      rusage.ru_oublock - last_rusage->ru_oublock);
+        }
+        if (rusage.ru_nvcsw > last_rusage->ru_nvcsw
+            || rusage.ru_nivcsw > last_rusage->ru_nivcsw) {
+            VLOG_WARN("context switches: %ld voluntary, %ld involuntary",
+                      rusage.ru_nvcsw - last_rusage->ru_nvcsw,
+                      rusage.ru_nivcsw - last_rusage->ru_nivcsw);
+        }
         coverage_log(VLL_WARN);
     }