From: Ben Pfaff Date: Tue, 2 Jun 2009 21:07:19 +0000 (-0700) Subject: cfg: Reimplement vswitchd config file locking reliably. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=50716d9286d0c5fba1aa3931d36d00f8c49f09c8;p=openvswitch cfg: Reimplement vswitchd config file locking reliably. The lock file protocol used until now for the vswitchd configuration file was intended for use with serial devices that are not opened very often and stay open for a relatively long time (typically minutes or hours). This protocol contains possible (though probably unimportant) races, e.g. between one locker reading the contents of the lock file and another writing to it; if the reader saw a write of a partial PID then it could think that an nonexistent process owned it and thereby delete it. It is probably possible to fix this race, by writing to a temporary file and link()ing the temporary file to the lock file. Also, cfg_unlock() had a race in its unlock order: the lock file should be unlinked before it is closed; otherwise another process could race in and unlink and replace the lock file, and then when the unlocking process resumes it would unlink the other process's still-active lock file. But the serial lock file protocol has other problems for our purposes, at least: * It is rather complicated, which makes it more difficult to implement in other languages (such as Python, which the interface-reconfigure script should be doing itself). * There is no way to wait for the lock to become available without polling. This commit replaces the serial lock file protocol by one that does not have any of these shortcomings. The primary difference is that it uses a single long-lived lock file, to which the kernel's blocking F_SETLKW fcntl operation can be applied, instead of a series of short-lived lock files, for which there is no way to wait. --- diff --git a/lib/automake.mk b/lib/automake.mk index 255e2fd3..98f47a08 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -45,8 +45,6 @@ lib_libopenvswitch_a_SOURCES = \ lib/learning-switch.h \ lib/list.c \ lib/list.h \ - lib/lockfile.c \ - lib/lockfile.h \ lib/mac-learning.c \ lib/mac-learning.h \ lib/netdev.c \ @@ -160,6 +158,7 @@ install-data-local: # All the source files that have coverage counters. COVERAGE_SOURCES = \ + lib/cfg.c \ lib/dpif.c \ lib/flow.c \ lib/hmap.c \ diff --git a/lib/cfg.c b/lib/cfg.c index a6595a58..d6ae3c4b 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -27,6 +27,7 @@ #include #include "cfg.h" #include +#include #include #include #include @@ -37,8 +38,8 @@ #include #include #include +#include "coverage.h" #include "dynamic-string.h" -#include "lockfile.h" #include "ofpbuf.h" #include "packets.h" #include "svec.h" @@ -102,42 +103,51 @@ static bool is_type(const char *s, enum cfg_flags); #define CC_FILE_NAME CC_ALNUM "._-" #define CC_KEY CC_ALNUM "._-@$:+" -/* Adds 'file_name' to the set of files or directories that are read by - * cfg_read(). Returns 0 on success, otherwise a positive errno value if - * 'file_name' cannot be opened. - * - * If 'file_name' names a file, then cfg_read() will read it. If 'file_name' - * names a directory, then cfg_read() will read all of the files in that - * directory whose names consist entirely of the English letters, digits, - * periods, underscores, and hyphens and do not begin with a period. - * Subdirectories are not processed recursively. +/* Sets 'file_name' as the configuration file read by cfg_read(). Returns 0 on + * success, otherwise a positive errno value if 'file_name' cannot be opened. * * This function does not actually read the named file or directory. Use * cfg_read() to (re)read all the configuration files. */ int cfg_set_file(const char *file_name) { + const char *slash; int fd; if (cfg_name) { + assert(lock_fd < 0); free(cfg_name); free(lock_name); free(tmp_name); cfg_name = lock_name = tmp_name = NULL; } - /* Make sure that we can open this file or directory for reading. */ + /* Make sure that we can open this file for reading. */ fd = open(file_name, O_RDONLY); if (fd < 0) { return errno; } close(fd); - /* Add it to the list. */ - VLOG_INFO("using \"%s\" as the configuration file", file_name); cfg_name = xstrdup(file_name); - lock_name = xasprintf("%s.~lock~", file_name); + + /* Put the temporary file in the same directory as cfg_name, so that they + * are guaranteed to be in the same file system, to guarantee that + * rename(tmp_name, cfg_name) will work. */ tmp_name = xasprintf("%s.~tmp~", file_name); + + /* Put the lock file in the same directory as cfg_name, but prefixed by + * a dot so as not to garner administrator interest. */ + slash = strrchr(file_name, '/'); + if (slash) { + lock_name = xasprintf("%.*s/.%s.~lock~", + slash - file_name, file_name, slash + 1); + } else { + lock_name = xasprintf(".%s.~lock~", file_name); + } + + VLOG_INFO("using \"%s\" as configuration file, \"%s\" as lock file", + file_name, lock_name); return 0; } @@ -282,35 +292,114 @@ void cfg_unlock(void) { if (lock_fd != -1) { + COVERAGE_INC(cfg_unlock); close(lock_fd); - unlink(lock_name); lock_fd = -1; } } -/* Config may change, so caller is responsible for notifying others if it - * did. The 'timeout' specifies the maximum number of milliseconds to - * wait for the config file to become free. Returns positive errno. */ +static int +open_lockfile(const char *name) +{ + for (;;) { + /* Try to open an existing lock file. */ + int fd = open(name, O_RDWR); + if (fd >= 0) { + return fd; + } else if (errno != ENOENT) { + VLOG_WARN("%s: failed to open lock file: %s", + name, strerror(errno)); + return -errno; + } + + /* Try to create a new lock file. */ + VLOG_INFO("%s: lock file does not exist, creating", name); + fd = open(name, O_RDWR | O_CREAT | O_EXCL, 0600); + if (fd >= 0) { + return fd; + } else if (errno != EEXIST) { + VLOG_WARN("%s: failed to create lock file: %s", + name, strerror(errno)); + return -errno; + } + + /* Someone else created the lock file. Try again. */ + } +} + +static int +try_lock(int fd, bool block) +{ + struct flock l; + memset(&l, 0, sizeof l); + l.l_type = F_WRLCK; + l.l_whence = SEEK_SET; + l.l_start = 0; + l.l_len = 0; + return fcntl(fd, block ? F_SETLKW : F_SETLK, &l) == -1 ? errno : 0; +} + +/* Locks the configuration file against modification by other processes and + * re-reads it from disk. + * + * The 'timeout' specifies the maximum number of milliseconds to wait for the + * config file to become free. Use 0 to avoid waiting or INT_MAX to wait + * forever. + * + * Returns 0 on success, otherwise a positive errno value. */ int cfg_lock(uint8_t *cookie, int timeout) { long long int start = time_msec(); + long long int elapsed = 0; int fd; uint8_t curr_cookie[CFG_COOKIE_LEN]; + assert(lock_fd < 0); + COVERAGE_INC(cfg_lock); for (;;) { - /* Put the lock file in the same directory as cfg_name, so that - * they are guaranteed to be in the same file system. */ - fd = create_lockfile(lock_name); - if (fd < 0 && (timeout <= (time_msec() - start))) { - /* Wait a millisecond before trying again. */ - usleep(1000); - } else if (fd < 0) { - /* Couldn't lock config file in time. */ + int error; + + /* Open lock file. */ + fd = open_lockfile(lock_name); + if (fd < 0) { return -fd; - } else { + } + + /* Try to lock it. This will block (if 'timeout' > 0). */ + error = try_lock(fd, timeout > 0); + time_refresh(); + elapsed = time_msec() - start; + if (!error) { + /* Success! */ break; } + + /* Lock failed. Close the lock file and reopen it on the next + * iteration, just in case someone deletes it underneath us (even + * though that should not happen). */ + close(fd); + if (error != EINTR) { + /* Hard error, give up. */ + COVERAGE_INC(cfg_lock_error); + VLOG_WARN("%s: failed to lock file " + "(after %lld ms, with %d-ms timeout): %s", + lock_name, elapsed, timeout, strerror(error)); + return error; + } + + /* Probably, the periodic timer set up by time_init() woke up us. Just + * check whether it's time to give up. */ + if (timeout != INT_MAX && elapsed >= timeout) { + COVERAGE_INC(cfg_lock_timeout); + VLOG_WARN("%s: giving up on lock file after %lld ms", + lock_name, elapsed); + return ETIMEDOUT; + } + COVERAGE_INC(cfg_lock_retry); + } + if (elapsed) { + VLOG_WARN("%s: waited %lld ms for lock file", lock_name, elapsed); } lock_fd = fd; diff --git a/lib/lockfile.c b/lib/lockfile.c deleted file mode 100644 index 370567c9..00000000 --- a/lib/lockfile.c +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (C) 2008, 2009 Nicira Networks, Inc. All rights reserved. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "lockfile.h" - -#define THIS_MODULE VLM_lockfile -#include "vlog.h" - -static int -fcntl_lock(int fd) -{ - struct flock l; - memset(&l, 0, sizeof l); - l.l_type = F_WRLCK; - l.l_whence = SEEK_SET; - l.l_start = 0; - l.l_len = 0; - return fcntl(fd, F_SETLK, &l) == -1 ? errno : 0; -} - -/* Remove the lockfile with 'name', if it is stale. Returns 0 if successful, - * otherwise a negative errno value if the lockfile is fresh or if it otherwise - * cannot be removed. */ -int -remove_lockfile(const char *name) -{ - char buffer[BUFSIZ]; - ssize_t n; - pid_t pid; - int fd; - - /* Remove existing lockfile. */ - fd = open(name, O_RDWR); - if (fd < 0) { - if (errno == ENOENT) { - return 0; - } else { - VLOG_ERR("%s: open: %s", name, strerror(errno)); - return -errno; - } - } - - /* Read lockfile. */ - n = read(fd, buffer, sizeof buffer - 1); - if (n < 0) { - int error = errno; - VLOG_ERR("%s: read: %s", name, strerror(error)); - close(fd); - return -error; - } - buffer[n] = '\0'; - if (n == 4 && memchr(buffer, '\0', n)) { - int32_t x; - memcpy(&x, buffer, sizeof x); - pid = x; - } else if (n >= 0) { - pid = strtol(buffer, NULL, 10); - } - if (pid <= 0) { - close(fd); - VLOG_WARN("%s: format not recognized, treating as locked.", name); - return -EACCES; - } - - /* Is lockfile fresh? */ - if (strstr(buffer, "fcntl")) { - int retval = fcntl_lock(fd); - if (retval) { - int error = errno; - close(fd); - VLOG_ERR("%s: device is locked (via fcntl): %s", - name, strerror(retval)); - return -error; - } else { - VLOG_WARN("%s: removing stale lockfile (checked via fcntl)", name); - } - } else { - if (!(kill(pid, 0) < 0 && errno == ESRCH)) { - close(fd); - VLOG_ERR("%s: device is locked (without fcntl)", name); - return -EACCES; - } else { - VLOG_WARN("%s: removing stale lockfile (without fcntl)", name); - } - } - close(fd); - - /* Remove stale lockfile. */ - if (unlink(name)) { - VLOG_ERR("%s: unlink: %s", name, strerror(errno)); - return -errno; - } - return 0; -} - -/* Attempt to create the lockfile. On error, return -1 with errno set - * to an appropriate value. On success, return a file descriptor. */ -int -create_lockfile(const char *name) -{ - const char *username; - char buffer[BUFSIZ]; - struct passwd *pwd; - mode_t old_umask; - uid_t uid; - int retval; - int fd; - - /* Remove an existing lockfile if it was created by us. */ - retval = remove_lockfile(name); - if (retval) { - return retval; - } - - /* Create file. */ - old_umask = umask(022); - fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666); - if (fd < 0) { - int error = errno; - VLOG_ERR("%s: create: %s", name, strerror(error)); - umask(old_umask); - return -error; - } - umask(old_umask); - - /* Lock file. */ - if (fcntl_lock(fd)) { - int error = errno; - close(fd); - VLOG_ERR("%s: cannot lock: %s", name, strerror(error)); - return -error; - } - - /* Write to file. */ - uid = getuid(); - pwd = getpwuid(uid); - username = pwd ? pwd->pw_name : "unknown"; - snprintf(buffer, sizeof buffer, "%10ld %s %.20s fcntl\n", - (long int) getpid(), program_name, username); - if (write(fd, buffer, strlen(buffer)) != strlen(buffer)) { - int error = errno; - VLOG_ERR("%s: write: %s", name, strerror(error)); - close(fd); - unlink(name); - return -error; - } - - return fd; -} diff --git a/lib/lockfile.h b/lib/lockfile.h deleted file mode 100644 index 8e01c8f6..00000000 --- a/lib/lockfile.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2008, 2009 The Board of Trustees of The Leland Stanford - * Junior University - * - * We are making the OpenFlow specification and associated documentation - * (Software) available for public use and benefit with the expectation - * that others will use, modify and enhance the Software and contribute - * those enhancements back to the community. However, since we would - * like to make the Software available for broadest use, with as few - * restrictions as possible permission is hereby granted, free of - * charge, to any person obtaining a copy of this Software to deal in - * the Software under the copyrights without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * The name and trademarks of copyright holder(s) may NOT be used in - * advertising or publicity pertaining to the Software or any - * derivatives without specific, written prior permission. - */ - -#ifndef LOCKFILE_H -#define LOCKFILE_H 1 - -int create_lockfile(const char *name); -int remove_lockfile(const char *name); - -#endif /* lockfile.h */ diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def index cbbb42ce..833c20a4 100644 --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@ -23,7 +23,6 @@ VLOG_MODULE(flow) VLOG_MODULE(in_band) VLOG_MODULE(leak_checker) VLOG_MODULE(learning_switch) -VLOG_MODULE(lockfile) VLOG_MODULE(mac_learning) VLOG_MODULE(mgmt) VLOG_MODULE(netdev)