timeval: Rate-limit logging rusage information.
authorBen Pfaff <blp@nicira.com>
Fri, 20 Apr 2012 20:43:54 +0000 (13:43 -0700)
committerBen Pfaff <blp@nicira.com>
Thu, 26 Apr 2012 21:06:27 +0000 (14:06 -0700)
I'd always assumed that the exponentially weighted moving average code
here was sufficient rate-limiting, but I actually encountered a
pathological case some time ago that forced this rusage information to
print once a second or so, which seems too often.

Signed-off-by: Ben Pfaff <blp@nicira.com>
lib/timeval.c

index 3f12b501209f681f345e4108eb7b8fae53548925..9829100303dbad775e4dceba18e1c96418ff650a 100644 (file)
@@ -463,37 +463,42 @@ log_poll_interval(long long int last_wakeup)
     /* Warn if we took too much time between polls: at least 50 ms and at least
      * 8X the mean interval. */
     if (n_samples > 10 && interval > mean_interval * 8 && interval > 50 * 16) {
-        const struct rusage *last_rusage = get_recent_rusage();
-        struct rusage rusage;
-
-        getrusage(RUSAGE_SELF, &rusage);
-        VLOG_WARN("%lld ms poll interval (%lld ms user, %lld ms system) "
-                  "is over %u times the weighted mean interval %u ms "
-                  "(%u samples)",
-                  now - last_wakeup,
-                  timeval_diff_msec(&rusage.ru_utime, &last_rusage->ru_utime),
-                  timeval_diff_msec(&rusage.ru_stime, &last_rusage->ru_stime),
-                  interval / mean_interval,
-                  (mean_interval + 8) / 16, n_samples);
-        if (rusage.ru_minflt > last_rusage->ru_minflt
-            || rusage.ru_majflt > last_rusage->ru_majflt) {
-            VLOG_WARN("faults: %ld minor, %ld major",
-                      rusage.ru_minflt - last_rusage->ru_minflt,
-                      rusage.ru_majflt - last_rusage->ru_majflt);
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 3);
+
+        if (!VLOG_DROP_WARN(&rl)) {
+            const struct rusage *last_rusage = get_recent_rusage();
+            struct rusage rusage;
+
+            getrusage(RUSAGE_SELF, &rusage);
+            VLOG_WARN("%lld ms poll interval (%lld ms user, %lld ms system) "
+                      "is over %u times the weighted mean interval %u ms "
+                      "(%u samples)",
+                      now - last_wakeup,
+                      timeval_diff_msec(&rusage.ru_utime,
+                                        &last_rusage->ru_utime),
+                      timeval_diff_msec(&rusage.ru_stime,
+                                        &last_rusage->ru_stime),
+                      interval / mean_interval,
+                      (mean_interval + 8) / 16, n_samples);
+            if (rusage.ru_minflt > last_rusage->ru_minflt
+                || rusage.ru_majflt > last_rusage->ru_majflt) {
+                VLOG_WARN("faults: %ld minor, %ld major",
+                          rusage.ru_minflt - last_rusage->ru_minflt,
+                          rusage.ru_majflt - last_rusage->ru_majflt);
+            }
+            if (rusage.ru_inblock > last_rusage->ru_inblock
+                || rusage.ru_oublock > last_rusage->ru_oublock) {
+                VLOG_WARN("disk: %ld reads, %ld writes",
+                          rusage.ru_inblock - last_rusage->ru_inblock,
+                          rusage.ru_oublock - last_rusage->ru_oublock);
+            }
+            if (rusage.ru_nvcsw > last_rusage->ru_nvcsw
+                || rusage.ru_nivcsw > last_rusage->ru_nivcsw) {
+                VLOG_WARN("context switches: %ld voluntary, %ld involuntary",
+                          rusage.ru_nvcsw - last_rusage->ru_nvcsw,
+                          rusage.ru_nivcsw - last_rusage->ru_nivcsw);
+            }
         }
-        if (rusage.ru_inblock > last_rusage->ru_inblock
-            || rusage.ru_oublock > last_rusage->ru_oublock) {
-            VLOG_WARN("disk: %ld reads, %ld writes",
-                      rusage.ru_inblock - last_rusage->ru_inblock,
-                      rusage.ru_oublock - last_rusage->ru_oublock);
-        }
-        if (rusage.ru_nvcsw > last_rusage->ru_nvcsw
-            || rusage.ru_nivcsw > last_rusage->ru_nivcsw) {
-            VLOG_WARN("context switches: %ld voluntary, %ld involuntary",
-                      rusage.ru_nvcsw - last_rusage->ru_nvcsw,
-                      rusage.ru_nivcsw - last_rusage->ru_nivcsw);
-        }
-
         /* Care should be taken in the value chosen for logging.  Depending
          * on the configuration, syslog can write changes synchronously,
          * which can cause the coverage messages to take longer to log