/*
- * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <errno.h>
#include <poll.h>
#include <signal.h>
+#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include "coverage.h"
+#include "dummy.h"
#include "fatal-signal.h"
+#include "signals.h"
+#include "unixctl.h"
#include "util.h"
-
#include "vlog.h"
-#define THIS_MODULE VLM_timeval
+
+VLOG_DEFINE_THIS_MODULE(timeval);
/* The clock to use for measuring time intervals. This is CLOCK_MONOTONIC by
* preference, but on systems that don't have a monotonic clock we fall back
* to CLOCK_REALTIME. */
static clockid_t monotonic_clock;
-/* Has a timer tick occurred?
+/* Has a timer tick occurred? Only relevant if CACHE_TIME is 1.
*
* We initialize these to true to force time_init() to get called on the first
* call to time_msec() or another function that queries the current time. */
static struct timespec wall_time;
static struct timespec monotonic_time;
+/* The monotonic time at which the time module was initialized. */
+static long long int boot_time;
+
+/* features for use by unit tests. */
+static struct timespec warp_offset; /* Offset added to monotonic_time. */
+static bool time_stopped; /* Disables real-time updates, if true. */
+
/* Time at which to die with SIGALRM (if not TIME_MIN). */
static time_t deadline = TIME_MIN;
static time_t time_add(time_t, time_t);
static void block_sigalrm(sigset_t *);
static void unblock_sigalrm(const sigset_t *);
-static void log_poll_interval(long long int last_wakeup,
- const struct rusage *last_rusage);
+static void log_poll_interval(long long int last_wakeup);
+static struct rusage *get_recent_rusage(void);
+static void refresh_rusage(void);
+static void timespec_add(struct timespec *sum,
+ const struct timespec *a, const struct timespec *b);
-/* Initializes the timetracking module.
- *
- * It is not necessary to call this function directly, because other time
- * functions will call it automatically, but it doesn't hurt. */
-void
+/* Initializes the timetracking module, if not already initialized. */
+static void
time_init(void)
{
static bool inited;
VLOG_DBG("monotonic timer not available");
}
- set_up_signal(SA_RESTART);
- set_up_timer();
+ if (CACHE_TIME) {
+ set_up_signal(SA_RESTART);
+ set_up_timer();
+ }
+
+ boot_time = time_msec();
}
static void
sa.sa_handler = sigalrm_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = flags;
- if (sigaction(SIGALRM, &sa, NULL)) {
- ovs_fatal(errno, "sigaction(SIGALRM) failed");
- }
+ xsigaction(SIGALRM, &sa, NULL);
}
/* Remove SA_RESTART from the flags for SIGALRM, so that any system call that
struct itimerspec itimer;
if (timer_create(monotonic_clock, NULL, &timer_id)) {
- ovs_fatal(errno, "timer_create failed");
+ VLOG_FATAL("timer_create failed (%s)", strerror(errno));
}
itimer.it_interval.tv_sec = 0;
itimer.it_value = itimer.it_interval;
if (timer_settime(timer_id, 0, &itimer, NULL)) {
- ovs_fatal(errno, "timer_settime failed");
+ VLOG_FATAL("timer_settime failed (%s)", strerror(errno));
}
}
time_postfork(void)
{
time_init();
- set_up_timer();
+
+ if (CACHE_TIME) {
+ set_up_timer();
+ } else {
+ /* If we are not caching kernel time, the only reason the timer should
+ * exist is if time_alarm() was called and deadline is set */
+ if (deadline != TIME_MIN) {
+ set_up_timer();
+ }
+ }
}
static void
{
time_init();
- if (monotonic_clock == CLOCK_MONOTONIC) {
- clock_gettime(monotonic_clock, &monotonic_time);
- } else {
- refresh_wall_if_ticked();
- monotonic_time = wall_time;
- }
+ if (!time_stopped) {
+ if (monotonic_clock == CLOCK_MONOTONIC) {
+ clock_gettime(monotonic_clock, &monotonic_time);
+ } else {
+ refresh_wall_if_ticked();
+ monotonic_time = wall_time;
+ }
+ timespec_add(&monotonic_time, &monotonic_time, &warp_offset);
- monotonic_tick = false;
+ monotonic_tick = false;
+ }
}
/* Forces a refresh of the current time from the kernel. It is not usually
* necessary to call this function, since the time will be refreshed
- * automatically at least every TIME_UPDATE_INTERVAL milliseconds. */
+ * automatically at least every TIME_UPDATE_INTERVAL milliseconds. If
+ * CACHE_TIME is 0, we will always refresh the current time so this
+ * function has no effect. */
void
time_refresh(void)
{
sigset_t oldsigs;
time_init();
+
block_sigalrm(&oldsigs);
deadline = secs ? time_add(time_now(), secs) : TIME_MIN;
unblock_sigalrm(&oldsigs);
+
+ if (!CACHE_TIME) {
+ /* If we aren't timing the gaps between kernel time refreshes we need to
+ * to start the timer up now */
+ set_up_signal(SA_RESTART);
+ set_up_timer();
+ }
}
/* Like poll(), except:
+ *
+ * - The timeout is specified as an absolute time, as defined by
+ * time_msec(), instead of a duration.
*
* - On error, returns a negative error code (instead of setting errno).
*
* - If interrupted by a signal, retries automatically until the original
- * 'timeout' expires. (Because of this property, this function will
+ * timeout is reached. (Because of this property, this function will
* never return -EINTR.)
*
* - As a side effect, refreshes the current time (like time_refresh()).
- */
+ *
+ * Stores the number of milliseconds elapsed during poll in '*elapsed'. */
int
-time_poll(struct pollfd *pollfds, int n_pollfds, int timeout)
+time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
+ int *elapsed)
{
static long long int last_wakeup;
- static struct rusage last_rusage;
long long int start;
sigset_t oldsigs;
bool blocked;
int retval;
time_refresh();
- log_poll_interval(last_wakeup, &last_rusage);
+ log_poll_interval(last_wakeup);
coverage_clear();
start = time_msec();
blocked = false;
for (;;) {
+ long long int now = time_msec();
int time_left;
- if (timeout > 0) {
- long long int elapsed = time_msec() - start;
- time_left = timeout >= elapsed ? timeout - elapsed : 0;
+
+ if (now >= timeout_when) {
+ time_left = 0;
+ } else if ((unsigned long long int) timeout_when - now > INT_MAX) {
+ time_left = INT_MAX;
} else {
- time_left = timeout;
+ time_left = timeout_when - now;
}
retval = poll(pollfds, n_pollfds, time_left);
unblock_sigalrm(&oldsigs);
}
last_wakeup = time_msec();
- getrusage(RUSAGE_SELF, &last_rusage);
+ refresh_rusage();
+ *elapsed = last_wakeup - start;
return retval;
}
static void
refresh_wall_if_ticked(void)
{
- if (wall_tick) {
+ if (!CACHE_TIME || wall_tick) {
refresh_wall();
}
}
static void
refresh_monotonic_if_ticked(void)
{
- if (monotonic_tick) {
+ if (!CACHE_TIME || monotonic_tick) {
refresh_monotonic();
}
}
sigset_t sigalrm;
sigemptyset(&sigalrm);
sigaddset(&sigalrm, SIGALRM);
- if (sigprocmask(SIG_BLOCK, &sigalrm, oldsigs)) {
- ovs_fatal(errno, "sigprocmask");
- }
+ xsigprocmask(SIG_BLOCK, &sigalrm, oldsigs);
}
static void
unblock_sigalrm(const sigset_t *oldsigs)
{
- if (sigprocmask(SIG_SETMASK, oldsigs, NULL)) {
- ovs_fatal(errno, "sigprocmask");
- }
+ xsigprocmask(SIG_SETMASK, oldsigs, NULL);
}
long long int
return (long long int) tv->tv_sec * 1000 + tv->tv_usec / 1000;
}
+/* Returns the monotonic time at which the "time" module was initialized, in
+ * milliseconds(). */
+long long int
+time_boot_msec(void)
+{
+ time_init();
+ return boot_time;
+}
+
+void
+xgettimeofday(struct timeval *tv)
+{
+ if (gettimeofday(tv, NULL) == -1) {
+ VLOG_FATAL("gettimeofday failed (%s)", strerror(errno));
+ }
+}
+
static long long int
timeval_diff_msec(const struct timeval *a, const struct timeval *b)
{
}
static void
-log_poll_interval(long long int last_wakeup, const struct rusage *last_rusage)
+timespec_add(struct timespec *sum,
+ const struct timespec *a,
+ const struct timespec *b)
+{
+ struct timespec tmp;
+
+ tmp.tv_sec = a->tv_sec + b->tv_sec;
+ tmp.tv_nsec = a->tv_nsec + b->tv_nsec;
+ if (tmp.tv_nsec >= 1000 * 1000 * 1000) {
+ tmp.tv_nsec -= 1000 * 1000 * 1000;
+ tmp.tv_sec++;
+ }
+
+ *sum = tmp;
+}
+
+static void
+log_poll_interval(long long int last_wakeup)
{
static unsigned int mean_interval; /* In 16ths of a millisecond. */
static unsigned int n_samples;
now = time_msec();
interval = MIN(10000, now - last_wakeup) << 4;
- /* Warn if we took too much time between polls. */
- if (n_samples > 10 && interval > mean_interval * 8) {
- struct rusage rusage;
-
- getrusage(RUSAGE_SELF, &rusage);
- VLOG_WARN("%lld ms poll interval (%lld ms user, %lld ms system) "
- "is over %u times the weighted mean interval %u ms "
- "(%u samples)",
- now - last_wakeup,
- timeval_diff_msec(&rusage.ru_utime, &last_rusage->ru_utime),
- timeval_diff_msec(&rusage.ru_stime, &last_rusage->ru_stime),
- interval / mean_interval,
- (mean_interval + 8) / 16, n_samples);
- if (rusage.ru_minflt > last_rusage->ru_minflt
- || rusage.ru_majflt > last_rusage->ru_majflt) {
- VLOG_WARN("faults: %ld minor, %ld major",
- rusage.ru_minflt - last_rusage->ru_minflt,
- rusage.ru_majflt - last_rusage->ru_majflt);
+ /* Warn if we took too much time between polls: at least 50 ms and at least
+ * 8X the mean interval. */
+ if (n_samples > 10 && interval > mean_interval * 8 && interval > 50 * 16) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 3);
+
+ if (!VLOG_DROP_WARN(&rl)) {
+ const struct rusage *last_rusage = get_recent_rusage();
+ struct rusage rusage;
+
+ getrusage(RUSAGE_SELF, &rusage);
+ VLOG_WARN("%lld ms poll interval (%lld ms user, %lld ms system) "
+ "is over %u times the weighted mean interval %u ms "
+ "(%u samples)",
+ now - last_wakeup,
+ timeval_diff_msec(&rusage.ru_utime,
+ &last_rusage->ru_utime),
+ timeval_diff_msec(&rusage.ru_stime,
+ &last_rusage->ru_stime),
+ interval / mean_interval,
+ (mean_interval + 8) / 16, n_samples);
+ if (rusage.ru_minflt > last_rusage->ru_minflt
+ || rusage.ru_majflt > last_rusage->ru_majflt) {
+ VLOG_WARN("faults: %ld minor, %ld major",
+ rusage.ru_minflt - last_rusage->ru_minflt,
+ rusage.ru_majflt - last_rusage->ru_majflt);
+ }
+ if (rusage.ru_inblock > last_rusage->ru_inblock
+ || rusage.ru_oublock > last_rusage->ru_oublock) {
+ VLOG_WARN("disk: %ld reads, %ld writes",
+ rusage.ru_inblock - last_rusage->ru_inblock,
+ rusage.ru_oublock - last_rusage->ru_oublock);
+ }
+ if (rusage.ru_nvcsw > last_rusage->ru_nvcsw
+ || rusage.ru_nivcsw > last_rusage->ru_nivcsw) {
+ VLOG_WARN("context switches: %ld voluntary, %ld involuntary",
+ rusage.ru_nvcsw - last_rusage->ru_nvcsw,
+ rusage.ru_nivcsw - last_rusage->ru_nivcsw);
+ }
}
- if (rusage.ru_inblock > last_rusage->ru_inblock
- || rusage.ru_oublock > last_rusage->ru_oublock) {
- VLOG_WARN("disk: %ld reads, %ld writes",
- rusage.ru_inblock - last_rusage->ru_inblock,
- rusage.ru_oublock - last_rusage->ru_oublock);
- }
- if (rusage.ru_nvcsw > last_rusage->ru_nvcsw
- || rusage.ru_nivcsw > last_rusage->ru_nivcsw) {
- VLOG_WARN("context switches: %ld voluntary, %ld involuntary",
- rusage.ru_nvcsw - last_rusage->ru_nvcsw,
- rusage.ru_nivcsw - last_rusage->ru_nivcsw);
- }
-
- /* Care should be taken in the value chosen for logging. Depending
- * on the configuration, syslog can write changes synchronously,
- * which can cause the coverage messages to take longer to log
- * than the processing delay that triggered it. */
- coverage_log(VLL_INFO, true);
+ coverage_log();
}
/* Update exponentially weighted moving average. With these parameters, a
mean_interval = interval;
}
}
+\f
+/* CPU usage tracking. */
+
+struct cpu_usage {
+ long long int when; /* Time that this sample was taken. */
+ unsigned long long int cpu; /* Total user+system CPU usage when sampled. */
+};
+
+static struct rusage recent_rusage;
+static struct cpu_usage older = { LLONG_MIN, 0 };
+static struct cpu_usage newer = { LLONG_MIN, 0 };
+static int cpu_usage = -1;
+
+static struct rusage *
+get_recent_rusage(void)
+{
+ return &recent_rusage;
+}
+
+static void
+refresh_rusage(void)
+{
+ long long int now;
+
+ now = time_msec();
+ getrusage(RUSAGE_SELF, &recent_rusage);
+
+ if (now >= newer.when + 3 * 1000) {
+ older = newer;
+ newer.when = now;
+ newer.cpu = (timeval_to_msec(&recent_rusage.ru_utime) +
+ timeval_to_msec(&recent_rusage.ru_stime));
+
+ if (older.when != LLONG_MIN && newer.cpu > older.cpu) {
+ unsigned int dividend = newer.cpu - older.cpu;
+ unsigned int divisor = (newer.when - older.when) / 100;
+ cpu_usage = divisor > 0 ? dividend / divisor : -1;
+ } else {
+ cpu_usage = -1;
+ }
+ }
+}
+
+/* Returns an estimate of this process's CPU usage, as a percentage, over the
+ * past few seconds of wall-clock time. Returns -1 if no estimate is available
+ * (which will happen if the process has not been running long enough to have
+ * an estimate, and can happen for other reasons as well). */
+int
+get_cpu_usage(void)
+{
+ return cpu_usage;
+}
+\f
+/* Unixctl interface. */
+
+/* "time/stop" stops the monotonic time returned by e.g. time_msec() from
+ * advancing, except due to later calls to "time/warp". */
+static void
+timeval_stop_cb(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[] OVS_UNUSED,
+ void *aux OVS_UNUSED)
+{
+ time_stopped = true;
+ unixctl_command_reply(conn, NULL);
+}
+
+/* "time/warp MSECS" advances the current monotonic time by the specified
+ * number of milliseconds. Unless "time/stop" has also been executed, the
+ * monotonic clock continues to tick forward at the normal rate afterward.
+ *
+ * Does not affect wall clock readings. */
+static void
+timeval_warp_cb(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED)
+{
+ struct timespec ts;
+ int msecs;
+
+ msecs = atoi(argv[1]);
+ if (msecs <= 0) {
+ unixctl_command_reply_error(conn, "invalid MSECS");
+ return;
+ }
+
+ ts.tv_sec = msecs / 1000;
+ ts.tv_nsec = (msecs % 1000) * 1000 * 1000;
+ timespec_add(&warp_offset, &warp_offset, &ts);
+ timespec_add(&monotonic_time, &monotonic_time, &ts);
+ unixctl_command_reply(conn, "warped");
+}
+
+void
+timeval_dummy_register(void)
+{
+ unixctl_command_register("time/stop", "", 0, 0, timeval_stop_cb, NULL);
+ unixctl_command_register("time/warp", "MSECS", 1, 1,
+ timeval_warp_cb, NULL);
+}