1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
25 import ovs.fatal_signal
28 import ovs.socket_util
32 # --detach: Should we run in the background?
35 # --pidfile: Name of pidfile (null if none).
38 # Our pidfile's inode and device, if we have created one.
42 # --overwrite-pidfile: Create pidfile even if one already exists and is locked?
43 _overwrite_pidfile = False
45 # --no-chdir: Should we chdir to "/"?
48 # --monitor: Should a supervisory process monitor the daemon and restart it if
49 # it dies due to an error signal?
52 # File descriptor used by daemonize_start() and daemonize_complete().
58 def make_pidfile_name(name):
59 """Returns the file name that would be used for a pidfile if 'name' were
60 provided to set_pidfile()."""
61 if name is None or name == "":
62 return "%s/%s.pid" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME)
64 return ovs.util.abs_file_name(ovs.dirs.RUNDIR, name)
67 def set_pidfile(name):
68 """Sets up a following call to daemonize() to create a pidfile named
69 'name'. If 'name' begins with '/', then it is treated as an absolute path.
70 Otherwise, it is taken relative to ovs.util.RUNDIR, which is
71 $(prefix)/var/run by default.
73 If 'name' is null, then ovs.util.PROGRAM_NAME followed by ".pid" is
76 _pidfile = make_pidfile_name(name)
80 """Returns an absolute path to the configured pidfile, or None if no
81 pidfile is configured."""
86 """Sets that we do not chdir to "/"."""
91 def is_chdir_enabled():
92 """Will we chdir to "/" as part of daemonizing?"""
96 def ignore_existing_pidfile():
97 """Normally, daemonize() or daemonize_start() will terminate the program
98 with a message if a locked pidfile already exists. If this function is
99 called, an existing pidfile will be replaced, with a warning."""
100 global _overwrite_pidfile
101 _overwrite_pidfile = True
105 """Sets up a following call to daemonize() to detach from the foreground
106 session, running this process in the background."""
112 """Will daemonize() really detach?"""
117 """Sets up a following call to daemonize() to fork a supervisory process to
118 monitor the daemon and restart it if it dies due to an error signal."""
125 sys.stderr.write("%s\n" % msg)
130 """If a pidfile has been configured, creates it and stores the running
131 process's pid in it. Ensures that the pidfile will be deleted when the
135 # Create a temporary pidfile.
136 tmpfile = "%s.tmp%d" % (_pidfile, pid)
137 ovs.fatal_signal.add_file_to_unlink(tmpfile)
139 # This is global to keep Python from garbage-collecting and
140 # therefore closing our file after this function exits. That would
141 # unlock the lock for us, and we don't want that.
144 file_handle = open(tmpfile, "w")
146 _fatal("%s: create failed (%s)" % (tmpfile, e.strerror))
149 s = os.fstat(file_handle.fileno())
151 _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror))
154 file_handle.write("%s\n" % pid)
157 _fatal("%s: write failed: %s" % (tmpfile, e.strerror))
160 fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
162 _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror))
164 # Rename or link it to the correct name.
165 if _overwrite_pidfile:
167 os.rename(tmpfile, _pidfile)
169 _fatal("failed to rename \"%s\" to \"%s\" (%s)"
170 % (tmpfile, _pidfile, e.strerror))
174 os.link(tmpfile, _pidfile)
178 if error == errno.EEXIST:
179 _check_already_running()
180 elif error != errno.EINTR:
183 _fatal("failed to link \"%s\" as \"%s\" (%s)"
184 % (tmpfile, _pidfile, os.strerror(error)))
186 # Ensure that the pidfile will get deleted on exit.
187 ovs.fatal_signal.add_file_to_unlink(_pidfile)
189 # Delete the temporary pidfile if it still exists.
190 if not _overwrite_pidfile:
191 error = ovs.fatal_signal.unlink_file_now(tmpfile)
193 _fatal("%s: unlink failed (%s)" % (tmpfile, os.strerror(error)))
197 _pidfile_dev = s.st_dev
198 _pidfile_ino = s.st_ino
202 """If configured with set_pidfile() or set_detach(), creates the pid file
203 and detaches from the foreground session."""
208 def _waitpid(pid, options):
211 return os.waitpid(pid, options)
213 if e.errno == errno.EINTR:
218 def _fork_and_wait_for_startup():
222 sys.stderr.write("pipe failed: %s\n" % os.strerror(e.errno))
228 sys.stderr.write("could not fork: %s\n" % os.strerror(e.errno))
232 # Running in parent process.
234 ovs.fatal_signal.fork()
242 if error != errno.EINTR:
245 retval, status = _waitpid(pid, 0)
246 if (retval == pid and
247 os.WIFEXITED(status) and os.WEXITSTATUS(status)):
248 # Child exited with an error. Convey the same error to
249 # our parent process as a courtesy.
250 sys.exit(os.WEXITSTATUS(status))
252 sys.stderr.write("fork child failed to signal startup\n")
257 # Running in parent process.
259 ovs.timeval.postfork()
260 #ovs.lockfile.postfork()
267 def _fork_notify_startup(fd):
269 error, bytes_written = ovs.socket_util.write_fully(fd, "0")
271 sys.stderr.write("could not write to pipe\n")
276 def _should_restart(status):
277 global RESTART_EXIT_CODE
279 if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE:
282 if os.WIFSIGNALED(status):
283 for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL",
284 "SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"):
285 if os.WTERMSIG(status) == getattr(signal, signame, None):
290 def _monitor_daemon(daemon_pid):
291 # XXX should log daemon's stderr output at startup time
292 # XXX should use setproctitle module if available
295 retval, status = _waitpid(daemon_pid, 0)
297 sys.stderr.write("waitpid failed\n")
299 elif retval == daemon_pid:
300 status_msg = ("pid %d died, %s"
301 % (daemon_pid, ovs.process.status_msg(status)))
303 if _should_restart(status):
304 if os.WCOREDUMP(status):
305 # Disable further core dumps to save disk space.
307 resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
308 except resource.error:
309 logging.warning("failed to disable core dumps")
311 # Throttle restarts to no more than once every 10 seconds.
312 if (last_restart is not None and
313 ovs.timeval.msec() < last_restart + 10000):
314 logging.warning("%s, waiting until 10 seconds since last "
315 "restart" % status_msg)
317 now = ovs.timeval.msec()
318 wakeup = last_restart + 10000
321 print "sleep %f" % ((wakeup - now) / 1000.0)
322 time.sleep((wakeup - now) / 1000.0)
323 last_restart = ovs.timeval.msec()
325 logging.error("%s, restarting" % status_msg)
326 daemon_pid = _fork_and_wait_for_startup()
330 logging.info("%s, exiting" % status_msg)
333 # Running in new daemon process.
336 def _close_standard_fds():
337 """Close stdin, stdout, stderr. If we're started from e.g. an SSH session,
338 then this keeps us from holding that session open artificially."""
339 null_fd = ovs.socket_util.get_null_fd()
346 def daemonize_start():
347 """If daemonization is configured, then starts daemonization, by forking
348 and returning in the child process. The parent process hangs around until
349 the child lets it know either that it completed startup successfully (by
350 calling daemon_complete()) or that it failed to start up (by exiting with a
351 nonzero exit code)."""
354 if _fork_and_wait_for_startup() > 0:
355 # Running in parent process.
357 # Running in daemon or monitor process.
360 saved_daemonize_fd = _daemonize_fd
361 daemon_pid = _fork_and_wait_for_startup()
363 # Running in monitor process.
364 _fork_notify_startup(saved_daemonize_fd)
365 _close_standard_fds()
366 _monitor_daemon(daemon_pid)
367 # Running in daemon process
373 def daemonize_complete():
374 """If daemonization is configured, then this function notifies the parent
375 process that the child process has completed startup successfully."""
376 _fork_notify_startup(_daemonize_fd)
382 _close_standard_fds()
388 --detach run in background as daemon
389 --no-chdir do not chdir to '/'
390 --pidfile[=FILE] create pidfile (default: %s/%s.pid)
391 --overwrite-pidfile with --pidfile, start even if already running
392 """ % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME))
395 def __read_pidfile(pidfile, delete_if_stale):
396 if _pidfile_dev is not None:
399 if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev:
400 # It's our own pidfile. We can't afford to open it,
401 # because closing *any* fd for a file that a process
402 # has locked also releases all the locks on that file.
404 # Fortunately, we know the associated pid anyhow.
410 file_handle = open(pidfile, "r+")
412 if e.errno == errno.ENOENT and delete_if_stale:
414 logging.warning("%s: open: %s" % (pidfile, e.strerror))
417 # Python fcntl doesn't directly support F_GETLK so we have to just try
420 fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
422 # pidfile exists but wasn't locked by anyone. Now we have the lock.
423 if not delete_if_stale:
425 logging.warning("%s: pid file is stale" % pidfile)
428 # Is the file we have locked still named 'pidfile'?
432 s2 = os.fstat(file_handle.fileno())
433 if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev:
438 logging.warning("%s: lost race to delete pidfile" % pidfile)
439 return -errno.EALREADY
441 # We won the right to delete the stale pidfile.
445 logging.warning("%s: failed to delete stale pidfile (%s)"
446 % (pidfile, e.strerror))
449 logging.debug("%s: deleted stale pidfile" % pidfile)
453 if e.errno not in [errno.EACCES, errno.EAGAIN]:
454 logging.warn("%s: fcntl: %s" % (pidfile, e.strerror))
457 # Someone else has the pidfile locked.
460 return int(file_handle.readline())
462 logging.warning("%s: read: %s" % (pidfile, e.strerror))
465 logging.warning("%s does not contain a pid" % pidfile)
474 def read_pidfile(pidfile):
475 """Opens and reads a PID from 'pidfile'. Returns the positive PID if
476 successful, otherwise a negative errno value."""
477 return __read_pidfile(pidfile, False)
480 def _check_already_running():
481 pid = __read_pidfile(_pidfile, True)
483 _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid))
485 _fatal("%s: pidfile check failed (%s), aborting"
486 % (_pidfile, os.strerror(pid)))
488 # XXX Python's getopt does not support options with optional arguments, so we
489 # have to separate --pidfile (with no argument) from --pidfile-name (with an
490 # argument). Need to write our own getopt I guess.
491 LONG_OPTIONS = ["detach", "no-chdir", "pidfile", "pidfile-name=",
492 "overwrite-pidfile", "monitor"]
495 def parse_opt(option, arg):
496 if option == '--detach':
498 elif option == '--no-chdir':
500 elif option == '--pidfile':
502 elif option == '--pidfile-name':
504 elif option == '--overwrite-pidfile':
505 ignore_existing_pidfile()
506 elif option == '--monitor':