1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
25 import ovs.fatal_signal
28 import ovs.socket_util
32 # --detach: Should we run in the background?
35 # --pidfile: Name of pidfile (null if none).
38 # Our pidfile's inode and device, if we have created one.
42 # --overwrite-pidfile: Create pidfile even if one already exists and is locked?
43 _overwrite_pidfile = False
45 # --no-chdir: Should we chdir to "/"?
48 # --monitor: Should a supervisory process monitor the daemon and restart it if
49 # it dies due to an error signal?
52 # File descriptor used by daemonize_start() and daemonize_complete().
57 def make_pidfile_name(name):
58 """Returns the file name that would be used for a pidfile if 'name' were
59 provided to set_pidfile()."""
60 if name is None or name == "":
61 return "%s/%s.pid" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME)
63 return ovs.util.abs_file_name(ovs.dirs.RUNDIR, name)
65 def set_pidfile(name):
66 """Sets up a following call to daemonize() to create a pidfile named
67 'name'. If 'name' begins with '/', then it is treated as an absolute path.
68 Otherwise, it is taken relative to ovs.util.RUNDIR, which is
69 $(prefix)/var/run by default.
71 If 'name' is null, then ovs.util.PROGRAM_NAME followed by ".pid" is
74 _pidfile = make_pidfile_name(name)
77 """Returns an absolute path to the configured pidfile, or None if no
78 pidfile is configured."""
82 """Sets that we do not chdir to "/"."""
86 def is_chdir_enabled():
87 """Will we chdir to "/" as part of daemonizing?"""
90 def ignore_existing_pidfile():
91 """Normally, daemonize() or daemonize_start() will terminate the program
92 with a message if a locked pidfile already exists. If this function is
93 called, an existing pidfile will be replaced, with a warning."""
94 global _overwrite_pidfile
95 _overwrite_pidfile = True
98 """Sets up a following call to daemonize() to detach from the foreground
99 session, running this process in the background."""
104 """Will daemonize() really detach?"""
108 """Sets up a following call to daemonize() to fork a supervisory process to
109 monitor the daemon and restart it if it dies due to an error signal."""
115 sys.stderr.write("%s\n" % msg)
119 """If a pidfile has been configured, creates it and stores the running
120 process's pid in it. Ensures that the pidfile will be deleted when the
124 # Create a temporary pidfile.
125 tmpfile = "%s.tmp%d" % (_pidfile, pid)
126 ovs.fatal_signal.add_file_to_unlink(tmpfile)
128 # This is global to keep Python from garbage-collecting and
129 # therefore closing our file after this function exits. That would
130 # unlock the lock for us, and we don't want that.
133 file = open(tmpfile, "w")
135 _fatal("%s: create failed (%s)" % (tmpfile, e.strerror))
138 s = os.fstat(file.fileno())
140 _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror))
143 file.write("%s\n" % pid)
146 _fatal("%s: write failed: %s" % (tmpfile, e.strerror))
149 fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
151 _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror))
153 # Rename or link it to the correct name.
154 if _overwrite_pidfile:
156 os.rename(tmpfile, _pidfile)
158 _fatal("failed to rename \"%s\" to \"%s\" (%s)"
159 % (tmpfile, _pidfile, e.strerror))
163 os.link(tmpfile, _pidfile)
167 if error == errno.EEXIST:
168 _check_already_running()
169 elif error != errno.EINTR:
172 _fatal("failed to link \"%s\" as \"%s\" (%s)"
173 % (tmpfile, _pidfile, os.strerror(error)))
176 # Ensure that the pidfile will get deleted on exit.
177 ovs.fatal_signal.add_file_to_unlink(_pidfile)
179 # Delete the temporary pidfile if it still exists.
180 if not _overwrite_pidfile:
181 error = ovs.fatal_signal.unlink_file_now(tmpfile)
183 _fatal("%s: unlink failed (%s)" % (tmpfile, os.strerror(error)))
187 _pidfile_dev = s.st_dev
188 _pidfile_ino = s.st_ino
191 """If configured with set_pidfile() or set_detach(), creates the pid file
192 and detaches from the foreground session."""
196 def _waitpid(pid, options):
199 return os.waitpid(pid, options)
201 if e.errno == errno.EINTR:
205 def _fork_and_wait_for_startup():
209 sys.stderr.write("pipe failed: %s\n" % os.strerror(e.errno))
215 sys.stderr.write("could not fork: %s\n" % os.strerror(e.errno))
219 # Running in parent process.
221 ovs.fatal_signal.fork()
229 if error != errno.EINTR:
232 retval, status = _waitpid(pid, 0)
233 if (retval == pid and
234 os.WIFEXITED(status) and os.WEXITSTATUS(status)):
235 # Child exited with an error. Convey the same error to
236 # our parent process as a courtesy.
237 sys.exit(os.WEXITSTATUS(status))
239 sys.stderr.write("fork child failed to signal startup\n")
244 # Running in parent process.
246 ovs.timeval.postfork()
247 #ovs.lockfile.postfork()
253 def _fork_notify_startup(fd):
255 error, bytes_written = ovs.socket_util.write_fully(fd, "0")
257 sys.stderr.write("could not write to pipe\n")
261 def _should_restart(status):
262 global RESTART_EXIT_CODE
264 if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE:
267 if os.WIFSIGNALED(status):
268 for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL",
269 "SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"):
270 if os.WTERMSIG(status) == getattr(signal, signame, None):
274 def _monitor_daemon(daemon_pid):
275 # XXX should log daemon's stderr output at startup time
276 # XXX should use setproctitle module if available
279 retval, status = _waitpid(daemon_pid, 0)
281 sys.stderr.write("waitpid failed\n")
283 elif retval == daemon_pid:
284 status_msg = ("pid %d died, %s"
285 % (daemon_pid, ovs.process.status_msg(status)))
287 if _should_restart(status):
288 if os.WCOREDUMP(status):
289 # Disable further core dumps to save disk space.
291 resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
292 except resource.error:
293 logging.warning("failed to disable core dumps")
295 # Throttle restarts to no more than once every 10 seconds.
296 if (last_restart is not None and
297 ovs.timeval.msec() < last_restart + 10000):
298 logging.warning("%s, waiting until 10 seconds since last "
299 "restart" % status_msg)
301 now = ovs.timeval.msec()
302 wakeup = last_restart + 10000
305 print "sleep %f" % ((wakeup - now) / 1000.0)
306 time.sleep((wakeup - now) / 1000.0)
307 last_restart = ovs.timeval.msec()
309 logging.error("%s, restarting" % status_msg)
310 daemon_pid = _fork_and_wait_for_startup()
314 logging.info("%s, exiting" % status_msg)
317 # Running in new daemon process.
319 def _close_standard_fds():
320 """Close stdin, stdout, stderr. If we're started from e.g. an SSH session,
321 then this keeps us from holding that session open artificially."""
322 null_fd = ovs.socket_util.get_null_fd()
328 def daemonize_start():
329 """If daemonization is configured, then starts daemonization, by forking
330 and returning in the child process. The parent process hangs around until
331 the child lets it know either that it completed startup successfully (by
332 calling daemon_complete()) or that it failed to start up (by exiting with a
333 nonzero exit code)."""
336 if _fork_and_wait_for_startup() > 0:
337 # Running in parent process.
339 # Running in daemon or monitor process.
342 saved_daemonize_fd = _daemonize_fd
343 daemon_pid = _fork_and_wait_for_startup()
345 # Running in monitor process.
346 _fork_notify_startup(saved_daemonize_fd)
347 _close_standard_fds()
348 _monitor_daemon(daemon_pid)
349 # Running in daemon process
354 def daemonize_complete():
355 """If daemonization is configured, then this function notifies the parent
356 process that the child process has completed startup successfully."""
357 _fork_notify_startup(_daemonize_fd)
363 _close_standard_fds()
368 --detach run in background as daemon
369 --no-chdir do not chdir to '/'
370 --pidfile[=FILE] create pidfile (default: %s/%s.pid)
371 --overwrite-pidfile with --pidfile, start even if already running
372 """ % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME))
374 def __read_pidfile(pidfile, delete_if_stale):
375 if _pidfile_dev is not None:
378 if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev:
379 # It's our own pidfile. We can't afford to open it,
380 # because closing *any* fd for a file that a process
381 # has locked also releases all the locks on that file.
383 # Fortunately, we know the associated pid anyhow.
389 file = open(pidfile, "r+")
391 if e.errno == errno.ENOENT and delete_if_stale:
393 logging.warning("%s: open: %s" % (pidfile, e.strerror))
396 # Python fcntl doesn't directly support F_GETLK so we have to just try
399 fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
401 # pidfile exists but wasn't locked by anyone. Now we have the lock.
402 if not delete_if_stale:
404 logging.warning("%s: pid file is stale" % pidfile)
407 # Is the file we have locked still named 'pidfile'?
411 s2 = os.fstat(file.fileno())
412 if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev:
417 logging.warning("%s: lost race to delete pidfile" % pidfile)
418 return -errno.ALREADY
420 # We won the right to delete the stale pidfile.
424 logging.warning("%s: failed to delete stale pidfile"
425 % (pidfile, e.strerror))
428 logging.debug("%s: deleted stale pidfile" % pidfile)
432 if e.errno not in [errno.EACCES, errno.EAGAIN]:
433 logging.warn("%s: fcntl: %s" % (pidfile, e.strerror))
436 # Someone else has the pidfile locked.
439 return int(file.readline())
441 logging.warning("%s: read: %s" % (pidfile, e.strerror))
444 logging.warning("%s does not contain a pid" % pidfile)
452 def read_pidfile(pidfile):
453 """Opens and reads a PID from 'pidfile'. Returns the positive PID if
454 successful, otherwise a negative errno value."""
455 return __read_pidfile(pidfile, False)
457 def _check_already_running():
458 pid = __read_pidfile(_pidfile, True)
460 _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid))
462 _fatal("%s: pidfile check failed (%s), aborting"
463 % (_pidfile, os.strerror(pid)))
465 # XXX Python's getopt does not support options with optional arguments, so we
466 # have to separate --pidfile (with no argument) from --pidfile-name (with an
467 # argument). Need to write our own getopt I guess.
468 LONG_OPTIONS = ["detach", "no-chdir", "pidfile", "pidfile-name=",
469 "overwrite-pidfile", "monitor"]
471 def parse_opt(option, arg):
472 if option == '--detach':
474 elif option == '--no-chdir':
476 elif option == '--pidfile':
478 elif option == '--pidfile-name':
480 elif option == '--overwrite-pidfile':
481 ignore_existing_pidfile()
482 elif option == '--monitor':