/*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <config.h>
#include "daemon.h"
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
VLOG_DEFINE_THIS_MODULE(daemon);
/* --detach: Should we run in the background? */
-static bool detach;
+static bool detach; /* Was --detach specified? */
+static bool detached; /* Have we already detached? */
/* --pidfile: Name of pidfile (null if none). */
static char *pidfile;
* it dies due to an error signal? */
static bool monitor;
+/* For each of the standard file descriptors, whether to replace it by
+ * /dev/null (if false) or keep it for the daemon to use (if true). */
+static bool save_fds[3];
+
+static void check_already_running(void);
+static int lock_pidfile(FILE *, int command);
+
/* Returns the file name that would be used for a pidfile if 'name' were
* provided to set_pidfile(). The caller must free the returned string. */
char *
return chdir_;
}
-/* Normally, die_if_already_running() will terminate the program with a message
- * if a locked pidfile already exists. If this function is called,
- * die_if_already_running() will merely log a warning. */
+/* Normally, daemonize() or damonize_start() will terminate the program with a
+ * message if a locked pidfile already exists. If this function is called, an
+ * existing pidfile will be replaced, with a warning. */
void
ignore_existing_pidfile(void)
{
monitor = true;
}
-/* If a locked pidfile exists, issue a warning message and, unless
- * ignore_existing_pidfile() has been called, terminate the program. */
+/* A daemon doesn't normally have any use for the file descriptors for stdin,
+ * stdout, and stderr after it detaches. To keep these file descriptors from
+ * e.g. holding an SSH session open, by default detaching replaces each of
+ * these file descriptors by /dev/null. But a few daemons expect the user to
+ * redirect stdout or stderr to a file, in which case it is desirable to keep
+ * these file descriptors. This function, therefore, disables replacing 'fd'
+ * by /dev/null when the daemon detaches. */
void
-die_if_already_running(void)
+daemon_save_fd(int fd)
{
- pid_t pid;
- if (!pidfile) {
- return;
- }
- pid = read_pidfile_if_exists(pidfile);
- if (pid > 0) {
- if (!overwrite_pidfile) {
- VLOG_ERR("%s: %s already running as pid %ld, aborting",
- get_pidfile(), program_name, (long int) pid);
- ovs_fatal(0, "%s: already running as pid %ld",
- get_pidfile(), (long int) pid);
- } else {
- VLOG_WARN("%s: %s already running as pid %ld",
- get_pidfile(), program_name, (long int) pid);
- }
- }
+ assert(fd == STDIN_FILENO || fd == STDOUT_FILENO || fd == STDERR_FILENO);
+ save_fds[fd] = true;
}
/* If a pidfile has been configured, creates it and stores the running
static void
make_pidfile(void)
{
- if (pidfile) {
- /* Create pidfile via temporary file, so that observers never see an
- * empty pidfile or an unlocked pidfile. */
- long int pid = getpid();
- char *tmpfile;
- int fd;
+ long int pid = getpid();
+ struct stat s;
+ char *tmpfile;
+ FILE *file;
+ int error;
- tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
- fatal_signal_add_file_to_unlink(tmpfile);
- fd = open(tmpfile, O_CREAT | O_WRONLY | O_TRUNC, 0666);
- if (fd >= 0) {
- struct flock lck;
- lck.l_type = F_WRLCK;
- lck.l_whence = SEEK_SET;
- lck.l_start = 0;
- lck.l_len = 0;
- if (fcntl(fd, F_SETLK, &lck) != -1) {
- char *text = xasprintf("%ld\n", pid);
- if (write(fd, text, strlen(text)) == strlen(text)) {
- fatal_signal_add_file_to_unlink(pidfile);
- if (rename(tmpfile, pidfile) < 0) {
- VLOG_ERR("failed to rename \"%s\" to \"%s\": %s",
- tmpfile, pidfile, strerror(errno));
- fatal_signal_remove_file_to_unlink(pidfile);
- close(fd);
- } else {
- /* Keep 'fd' open to retain the lock. */
- struct stat s;
-
- if (!fstat(fd, &s)) {
- pidfile_dev = s.st_dev;
- pidfile_ino = s.st_ino;
- } else {
- VLOG_ERR("%s: fstat failed: %s",
- pidfile, strerror(errno));
- }
- }
- } else {
- VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno));
- close(fd);
- }
- free(text);
- } else {
- VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno));
- close(fd);
+ /* Create a temporary pidfile. */
+ tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
+ fatal_signal_add_file_to_unlink(tmpfile);
+ file = fopen(tmpfile, "w+");
+ if (!file) {
+ VLOG_FATAL("%s: create failed (%s)", tmpfile, strerror(errno));
+ }
+
+ if (fstat(fileno(file), &s) == -1) {
+ VLOG_FATAL("%s: fstat failed (%s)", tmpfile, strerror(errno));
+ }
+
+ fprintf(file, "%ld\n", pid);
+ if (fflush(file) == EOF) {
+ VLOG_FATAL("%s: write failed (%s)", tmpfile, strerror(errno));
+ }
+
+ error = lock_pidfile(file, F_SETLK);
+ if (error) {
+ VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, strerror(error));
+ }
+
+ /* Rename or link it to the correct name. */
+ if (overwrite_pidfile) {
+ if (rename(tmpfile, pidfile) < 0) {
+ VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)",
+ tmpfile, pidfile, strerror(errno));
+ }
+ } else {
+ do {
+ error = link(tmpfile, pidfile) == -1 ? errno : 0;
+ if (error == EEXIST) {
+ check_already_running();
}
- } else {
- VLOG_ERR("%s: create failed: %s", tmpfile, strerror(errno));
+ } while (error == EINTR || error == EEXIST);
+ if (error) {
+ VLOG_FATAL("failed to link \"%s\" as \"%s\" (%s)",
+ tmpfile, pidfile, strerror(error));
+ }
+ }
+
+ /* Ensure that the pidfile will get deleted on exit. */
+ fatal_signal_add_file_to_unlink(pidfile);
+
+ /* Delete the temporary pidfile if it still exists. */
+ if (!overwrite_pidfile) {
+ error = fatal_signal_unlink_file_now(tmpfile);
+ if (error) {
+ VLOG_FATAL("%s: unlink failed (%s)", tmpfile, strerror(error));
}
- fatal_signal_remove_file_to_unlink(tmpfile);
- free(tmpfile);
}
+
+ /* Clean up.
+ *
+ * We don't close 'file' because its file descriptor must remain open to
+ * hold the lock. */
+ pidfile_dev = s.st_dev;
+ pidfile_ino = s.st_ino;
+ free(tmpfile);
free(pidfile);
pidfile = NULL;
}
daemonize_complete();
}
+/* Calls fork() and on success returns its return value. On failure, logs an
+ * error and exits unsuccessfully.
+ *
+ * Post-fork, but before returning, this function calls a few other functions
+ * that are generally useful if the child isn't planning to exec a new
+ * process. */
+pid_t
+fork_and_clean_up(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid > 0) {
+ /* Running in parent process. */
+ fatal_signal_fork();
+ } else if (!pid) {
+ /* Running in child process. */
+ time_postfork();
+ lockfile_postfork();
+ } else {
+ VLOG_FATAL("fork failed (%s)", strerror(errno));
+ }
+
+ return pid;
+}
+
+/* Forks, then:
+ *
+ * - In the parent, waits for the child to signal that it has completed its
+ * startup sequence. Then stores -1 in '*fdp' and returns the child's pid.
+ *
+ * - In the child, stores a fd in '*fdp' and returns 0. The caller should
+ * pass the fd to fork_notify_startup() after it finishes its startup
+ * sequence.
+ *
+ * If something goes wrong with the fork, logs a critical error and aborts the
+ * process. */
static pid_t
fork_and_wait_for_startup(int *fdp)
{
xpipe(fds);
- pid = fork();
+ pid = fork_and_clean_up();
if (pid > 0) {
/* Running in parent process. */
+ size_t bytes_read;
char c;
close(fds[1]);
- fatal_signal_fork();
- if (read(fds[0], &c, 1) != 1) {
+ if (read_fully(fds[0], &c, 1, &bytes_read) != 0) {
int retval;
int status;
retval = waitpid(pid, &status, 0);
} while (retval == -1 && errno == EINTR);
- if (retval == pid
- && WIFEXITED(status)
- && WEXITSTATUS(status)) {
- /* Child exited with an error. Convey the same error to
- * our parent process as a courtesy. */
- exit(WEXITSTATUS(status));
+ if (retval == pid) {
+ if (WIFEXITED(status) && WEXITSTATUS(status)) {
+ /* Child exited with an error. Convey the same error
+ * to our parent process as a courtesy. */
+ exit(WEXITSTATUS(status));
+ } else {
+ char *status_msg = process_status_msg(status);
+ VLOG_FATAL("fork child died before signaling startup (%s)",
+ status_msg);
+ }
+ } else if (retval < 0) {
+ VLOG_FATAL("waitpid failed (%s)", strerror(errno));
+ } else {
+ NOT_REACHED();
}
-
- VLOG_FATAL("fork child failed to signal startup (%s)",
- strerror(errno));
}
close(fds[0]);
*fdp = -1;
} else if (!pid) {
/* Running in child process. */
close(fds[0]);
- time_postfork();
- lockfile_postfork();
*fdp = fds[1];
- } else {
- VLOG_FATAL("fork failed (%s)", strerror(errno));
}
return pid;
monitor_daemon(pid_t daemon_pid)
{
/* XXX Should log daemon's stderr output at startup time. */
- const char *saved_program_name;
time_t last_restart;
char *status_msg;
int crashes;
- saved_program_name = program_name;
- program_name = xasprintf("monitor(%s)", program_name);
+ subprogram_name = "monitor";
status_msg = xstrdup("healthy");
last_restart = TIME_MIN;
crashes = 0;
int retval;
int status;
- proctitle_set("%s: monitoring pid %lu (%s)",
- saved_program_name, (unsigned long int) daemon_pid,
- status_msg);
+ proctitle_set("monitoring pid %lu (%s)",
+ (unsigned long int) daemon_pid, status_msg);
do {
retval = waitpid(daemon_pid, &status, 0);
/* Running in new daemon process. */
proctitle_restore();
- free((char *) program_name);
- program_name = saved_program_name;
+ subprogram_name = "";
}
-/* Close stdin, stdout, stderr. If we're started from e.g. an SSH session,
- * then this keeps us from holding that session open artificially. */
+/* Close standard file descriptors (except any that the client has requested we
+ * leave open by calling daemon_save_fd()). If we're started from e.g. an SSH
+ * session, then this keeps us from holding that session open artificially. */
static void
close_standard_fds(void)
{
int null_fd = get_null_fd();
if (null_fd >= 0) {
- dup2(null_fd, STDIN_FILENO);
- dup2(null_fd, STDOUT_FILENO);
- dup2(null_fd, STDERR_FILENO);
+ int fd;
+
+ for (fd = 0; fd < 3; fd++) {
+ if (!save_fds[fd]) {
+ dup2(null_fd, fd);
+ }
+ }
}
+
+ /* Disable logging to stderr to avoid wasting CPU time. */
+ vlog_set_levels(NULL, VLF_CONSOLE, VLL_OFF);
}
/* If daemonization is configured, then starts daemonization, by forking and
/* Running in daemon process. */
}
- make_pidfile();
+ if (pidfile) {
+ make_pidfile();
+ }
/* Make sure that the unixctl commands for vlog get registered in a
* daemon, even before the first log message. */
}
/* If daemonization is configured, then this function notifies the parent
- * process that the child process has completed startup successfully.
+ * process that the child process has completed startup successfully. It also
+ * call daemonize_post_detach().
*
* Calling this function more than once has no additional effect. */
void
daemonize_complete(void)
{
- fork_notify_startup(daemonize_fd);
- daemonize_fd = -1;
+ if (!detached) {
+ detached = true;
+ fork_notify_startup(daemonize_fd);
+ daemonize_fd = -1;
+ daemonize_post_detach();
+ }
+}
+
+/* If daemonization is configured, then this function does traditional Unix
+ * daemonization behavior: join a new session, chdir to the root (if not
+ * disabled), and close the standard file descriptors.
+ *
+ * It only makes sense to call this function as part of an implementation of a
+ * special daemon subprocess. A normal daemon should just call
+ * daemonize_complete(). */
+void
+daemonize_post_detach(void)
+{
if (detach) {
setsid();
if (chdir_) {
ignore(chdir("/"));
}
close_standard_fds();
- detach = false;
}
}
ovs_rundir(), program_name);
}
+static int
+lock_pidfile__(FILE *file, int command, struct flock *lck)
+{
+ int error;
+
+ lck->l_type = F_WRLCK;
+ lck->l_whence = SEEK_SET;
+ lck->l_start = 0;
+ lck->l_len = 0;
+ lck->l_pid = 0;
+
+ do {
+ error = fcntl(fileno(file), command, lck) == -1 ? errno : 0;
+ } while (error == EINTR);
+ return error;
+}
+
+static int
+lock_pidfile(FILE *file, int command)
+{
+ struct flock lck;
+
+ return lock_pidfile__(file, command, &lck);
+}
+
static pid_t
-read_pidfile__(const char *pidfile, bool must_exist)
+read_pidfile__(const char *pidfile, bool delete_if_stale)
{
- char line[128];
+ struct stat s, s2;
struct flock lck;
- struct stat s;
+ char line[128];
FILE *file;
int error;
return getpid();
}
- file = fopen(pidfile, "r");
+ file = fopen(pidfile, "r+");
if (!file) {
- if (errno == ENOENT && !must_exist) {
+ if (errno == ENOENT && delete_if_stale) {
return 0;
}
error = errno;
goto error;
}
- lck.l_type = F_WRLCK;
- lck.l_whence = SEEK_SET;
- lck.l_start = 0;
- lck.l_len = 0;
- lck.l_pid = 0;
- if (fcntl(fileno(file), F_GETLK, &lck)) {
- error = errno;
+ error = lock_pidfile__(file, F_GETLK, &lck);
+ if (error) {
VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error));
goto error;
}
if (lck.l_type == F_UNLCK) {
- error = ESRCH;
- VLOG_WARN("%s: pid file is not locked", pidfile);
- goto error;
+ /* pidfile exists but it isn't locked by anyone. We need to delete it
+ * so that a new pidfile can go in its place. But just calling
+ * unlink(pidfile) makes a nasty race: what if someone else unlinks it
+ * before we do and then replaces it by a valid pidfile? We'd unlink
+ * their valid pidfile. We do a little dance to avoid the race, by
+ * locking the invalid pidfile. Only one process can have the invalid
+ * pidfile locked, and only that process has the right to unlink it. */
+ if (!delete_if_stale) {
+ error = ESRCH;
+ VLOG_DBG("%s: pid file is stale", pidfile);
+ goto error;
+ }
+
+ /* Get the lock. */
+ error = lock_pidfile(file, F_SETLK);
+ if (error) {
+ /* We lost a race with someone else doing the same thing. */
+ VLOG_WARN("%s: lost race to lock pidfile", pidfile);
+ goto error;
+ }
+
+ /* Is the file we have locked still named 'pidfile'? */
+ if (stat(pidfile, &s) || fstat(fileno(file), &s2)
+ || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) {
+ /* No. We lost a race with someone else who got the lock before
+ * us, deleted the pidfile, and closed it (releasing the lock). */
+ error = EALREADY;
+ VLOG_WARN("%s: lost race to delete pidfile", pidfile);
+ goto error;
+ }
+
+ /* We won the right to delete the stale pidfile. */
+ if (unlink(pidfile)) {
+ error = errno;
+ VLOG_WARN("%s: failed to delete stale pidfile (%s)",
+ pidfile, strerror(error));
+ goto error;
+ }
+ VLOG_DBG("%s: deleted stale pidfile", pidfile);
+ fclose(file);
+ return 0;
}
if (!fgets(line, sizeof line, file)) {
}
if (lck.l_pid != strtoul(line, NULL, 10)) {
+ /* The process that has the pidfile locked is not the process that
+ * created it. It must be stale, with the process that has it locked
+ * preparing to delete it. */
error = ESRCH;
- VLOG_WARN("l_pid (%ld) != %s pid (%s)",
- (long int) lck.l_pid, pidfile, line);
+ VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld",
+ pidfile, line, (long int) lck.l_pid);
goto error;
}
pid_t
read_pidfile(const char *pidfile)
{
- return read_pidfile__(pidfile, true);
+ return read_pidfile__(pidfile, false);
}
-
-/* Opens and reads a PID from 'pidfile', if it exists. Returns 0 if 'pidfile'
- * doesn't exist, the positive PID if successful, otherwise a negative errno
- * value. */
-pid_t
-read_pidfile_if_exists(const char *pidfile)
+/* Checks whether a process with the given 'pidfile' is already running and,
+ * if so, aborts. If 'pidfile' is stale, deletes it. */
+static void
+check_already_running(void)
{
- return read_pidfile__(pidfile, false);
+ long int pid = read_pidfile__(pidfile, true);
+ if (pid > 0) {
+ VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid);
+ } else if (pid < 0) {
+ VLOG_FATAL("%s: pidfile check failed (%s), aborting",
+ pidfile, strerror(-pid));
+ }
}