From: Ben Pfaff Date: Wed, 17 Dec 2008 01:19:09 +0000 (-0800) Subject: Initial, skeletal implementation of vswitchd. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5fd9af23216efec49203e4e3b06afe11980a0b4b;p=openvswitch Initial, skeletal implementation of vswitchd. --- diff --git a/Makefile.am b/Makefile.am index dadf8480..fd11c40a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -70,4 +70,5 @@ include tests/automake.mk include include/automake.mk include third-party/automake.mk include debian/automake.mk +include vswitchd/automake.mk include ext.mk diff --git a/lib/automake.mk b/lib/automake.mk index 8f2c09f8..238c3a15 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -42,6 +42,8 @@ lib_libopenflow_a_SOURCES = \ lib/poll-loop.h \ lib/port-array.c \ lib/port-array.h \ + lib/process.c \ + lib/process.h \ lib/queue.c \ lib/queue.h \ lib/random.c \ @@ -49,10 +51,14 @@ lib_libopenflow_a_SOURCES = \ lib/rconn.c \ lib/rconn.h \ lib/sat-math.h \ + lib/signals.c \ + lib/signals.h \ lib/socket-util.c \ lib/socket-util.h \ lib/stp.c \ lib/stp.h \ + lib/svec.c \ + lib/svec.h \ lib/timeval.c \ lib/timeval.h \ lib/type-props.h \ diff --git a/lib/process.c b/lib/process.c new file mode 100644 index 00000000..6680ef8a --- /dev/null +++ b/lib/process.c @@ -0,0 +1,384 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "process.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "dynamic-string.h" +#include "list.h" +#include "poll-loop.h" +#include "socket-util.h" +#include "util.h" + +#define THIS_MODULE VLM_process +#include "vlog.h" + +struct process { + struct list node; + char *name; + pid_t pid; + + /* Modified by signal handler. */ + volatile bool exited; + volatile int status; +}; + +/* Pipe used to signal child termination. */ +static int fds[2]; + +/* All processes. */ +static struct list all_processes = LIST_INITIALIZER(&all_processes); + +static void block_sigchld(sigset_t *); +static void unblock_sigchld(const sigset_t *); +static void sigchld_handler(int signr UNUSED); +static bool is_member(int x, const int *array, size_t); +static bool find_in_path(const char *name); + +/* Initializes the process subsystem (if it is not already initialized). Calls + * exit() if initialization fails. + * + * Calling this function is optional; it will be called automatically by + * process_start() if necessary. Calling it explicitly allows the client to + * prevent the process from exiting at an unexpected time. */ +void +process_init(void) +{ + static bool inited; + struct sigaction sa; + + if (inited) { + return; + } + inited = true; + + /* Create notification pipe. */ + if (pipe(fds)) { + ofp_fatal(errno, "could not create pipe"); + } + set_nonblocking(fds[0]); + set_nonblocking(fds[1]); + + /* Set up child termination signal handler. */ + memset(&sa, 0, sizeof sa); + sa.sa_handler = sigchld_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_NOCLDSTOP | SA_RESTART; + if (sigaction(SIGCHLD, &sa, NULL)) { + ofp_fatal(errno, "sigaction(SIGCHLD) failed"); + } +} + +/* Starts a subprocess with the arguments in the null-terminated argv[] array. + * argv[0] is used as the name of the process. Searches the PATH environment + * variable to find the program to execute. + * + * All file descriptors are closed before executing the subprocess, except for + * fds 0, 1, and 2 and the 'n_keep_fds' fds listed in 'keep_fds'. + * + * Returns 0 if successful, otherwise a positive errno value indicating the + * error. If successful, '*pp' is assigned a new struct process that may be + * used to query the process's status. On failure, '*pp' is set to NULL. */ +int +process_start(char **argv, const int keep_fds[], size_t n_keep_fds, + struct process **pp) +{ + sigset_t oldsigs; + pid_t pid; + + *pp = NULL; + process_init(); + + if (VLOG_IS_DBG_ENABLED()) { + struct ds ds = DS_EMPTY_INITIALIZER; + char **argp; + for (argp = argv; *argp; argp++) { + const char *arg = *argp; + const char *p; + if (argp != argv) { + ds_put_char(&ds, ' '); + } + if (arg[strcspn(arg, " \t\r\n\v\\")]) { + ds_put_char(&ds, '"'); + for (p = arg; *p; p++) { + if (*p == '\\' || *p == '\"') { + ds_put_char(&ds, '\\'); + } + ds_put_char(&ds, *p); + } + ds_put_char(&ds, '"'); + } else { + ds_put_cstr(&ds, arg); + } + } + VLOG_DBG("starting subprocess: %s", ds_cstr(&ds)); + ds_destroy(&ds); + } + + /* execvp() will search PATH too, but the error in that case is more + * obscure, since it is only reported post-fork. */ + if (!find_in_path(argv[0])) { + VLOG_ERR("%s not found in PATH", argv[0]); + return ENOENT; + } + + block_sigchld(&oldsigs); + pid = fork(); + if (pid < 0) { + unblock_sigchld(&oldsigs); + VLOG_WARN("fork failed: %s", strerror(errno)); + return errno; + } else if (pid) { + /* Running in parent process. */ + struct process *p; + const char *slash; + + p = xcalloc(1, sizeof *p); + p->pid = pid; + slash = strrchr(argv[0], '/'); + p->name = xstrdup(slash ? slash + 1 : argv[0]); + p->exited = false; + + list_push_back(&all_processes, &p->node); + unblock_sigchld(&oldsigs); + + *pp = p; + return 0; + } else { + /* Running in child process. */ + int fd_max = get_max_fds(); + int fd; + + unblock_sigchld(&oldsigs); + for (fd = 3; fd < fd_max; fd++) { + if (!is_member(fd, keep_fds, n_keep_fds)) { + close(fd); + } + } + execvp(argv[0], argv); + fprintf(stderr, "execvp(\"%s\") failed: %s\n", + argv[0], strerror(errno)); + _exit(1); + } +} + +/* Destroys process 'p'. */ +void +process_destroy(struct process *p) +{ + if (p) { + sigset_t oldsigs; + + block_sigchld(&oldsigs); + list_remove(&p->node); + unblock_sigchld(&oldsigs); + + free(p->name); + free(p); + } +} + +/* Sends signal 'signr' to process 'p'. Returns 0 if successful, otherwise a + * positive errno value. */ +int +process_kill(const struct process *p, int signr) +{ + return (p->exited ? ESRCH + : !kill(p->pid, signr) ? 0 + : errno); +} + +/* Returns the pid of process 'p'. */ +pid_t +process_pid(const struct process *p) +{ + return p->pid; +} + +/* Returns the name of process 'p' (the name passed to process_start() with any + * leading directories stripped). */ +const char * +process_name(const struct process *p) +{ + return p->name; +} + +/* Returns true if process 'p' has exited, false otherwise. */ +bool +process_exited(struct process *p) +{ + if (p->exited) { + return true; + } else { + char buf[_POSIX_PIPE_BUF]; + read(fds[0], buf, sizeof buf); + return false; + } +} + +/* Returns process 'p''s exit status, as reported by waitpid(2). + * process_status(p) may be called only after process_exited(p) has returned + * true. */ +int +process_status(const struct process *p) +{ + assert(p->exited); + return p->status; +} + +/* Given 'status', which is a process status in the form reported by waitpid(2) + * and returned by process_status(), returns a string describing how the + * process terminated. The caller is responsible for freeing the string when + * it is no longer needed. */ +char * +process_status_msg(int status) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + if (WIFEXITED(status)) { + ds_put_format(&ds, "exit status %d", WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + const char *name = NULL; +#ifdef HAVE_STRSIGNAL + name = strsignal(WTERMSIG(status)); +#endif + ds_put_format(&ds, "killed by signal %d", WTERMSIG(status)); + if (name) { + ds_put_format(&ds, " (%s)", name); + } + } + if (WCOREDUMP(status)) { + ds_put_cstr(&ds, ", core dumped"); + } + return ds_cstr(&ds); +} + +/* Causes the next call to poll_block() to wake up when process 'p' has + * exited. */ +void +process_wait(struct process *p) +{ + if (p->exited) { + poll_immediate_wake(); + } else { + poll_fd_wait(fds[0], POLLIN); + } +} + +static void +sigchld_handler(int signr UNUSED) +{ + for (;;) { + struct process *p; + int status; + pid_t pid; + + pid = waitpid(-1, &status, WNOHANG); + if (pid <= 0) { + break; + } + + LIST_FOR_EACH (p, struct process, node, &all_processes) { + if (p->pid == pid) { + p->exited = true; + p->status = status; + break; + } + } + } + write(fds[1], "", 1); +} + +static bool +is_member(int x, const int *array, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + if (array[i] == x) { + return true; + } + } + return false; +} + +static void +block_sigchld(sigset_t *oldsigs) +{ + sigset_t sigchld; + sigemptyset(&sigchld); + sigaddset(&sigchld, SIGCHLD); + if (sigprocmask(SIG_BLOCK, &sigchld, oldsigs)) { + ofp_fatal(errno, "sigprocmask"); + } +} + +static void +unblock_sigchld(const sigset_t *oldsigs) +{ + if (sigprocmask(SIG_SETMASK, oldsigs, NULL)) { + ofp_fatal(errno, "sigprocmask"); + } +} + +static bool +find_in_path(const char *name) +{ + char *save_ptr = NULL; + char *path, *dir; + struct stat s; + + if (strchr(name, '/') || !getenv("PATH")) { + return stat(name, &s) == 0; + } + + path = xstrdup(getenv("PATH")); + for (dir = strtok_r(path, ":", &save_ptr); dir; + dir = strtok_r(NULL, ":", &save_ptr)) { + char *file = xasprintf("%s/%s", dir, name); + if (stat(file, &s) == 0) { + free(file); + free(path); + return true; + } + free(file); + } + free(path); + return false; +} diff --git a/lib/process.h b/lib/process.h new file mode 100644 index 00000000..22601009 --- /dev/null +++ b/lib/process.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef PROCESS_H +#define PROCESS_H 1 + +#include +#include + +struct process; +void process_init(void); +int process_start(char **argv, const int *keep_fds, size_t n_keep_fds, + struct process **); +void process_destroy(struct process *); +int process_kill(const struct process *, int signr); + +pid_t process_pid(const struct process *); +const char *process_name(const struct process *); +bool process_exited(struct process *); +int process_status(const struct process *); +char *process_status_msg(int); + +void process_wait(struct process *); + +#endif /* process.h */ diff --git a/lib/signals.c b/lib/signals.c new file mode 100644 index 00000000..c724aba6 --- /dev/null +++ b/lib/signals.c @@ -0,0 +1,144 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "signals.h" +#include +#include +#include +#include +#include "poll-loop.h" +#include "socket-util.h" +#include "util.h" + +#if defined(_NSIG) +#define N_SIGNALS _NSIG +#elif defined(NSIG) +#define N_SIGNALS NSIG +#else +/* We could try harder to get the maximum signal number, but in practice we + * only care about SIGHUP, which is normally signal 1 anyway. */ +#define N_SIGNALS 32 +#endif + +struct signal { + int signr; +}; + +static volatile sig_atomic_t signaled[N_SIGNALS]; + +static int fds[2]; + +static void signal_handler(int signr); + +/* Initializes the signals subsystem (if it is not already initialized). Calls + * exit() if initialization fails. + * + * Calling this function is optional; it will be called automatically by + * signal_start() if necessary. Calling it explicitly allows the client to + * prevent the process from exiting at an unexpected time. */ +void +signal_init(void) +{ + static bool inited; + if (!inited) { + inited = true; + if (pipe(fds)) { + ofp_fatal(errno, "could not create pipe"); + } + set_nonblocking(fds[0]); + set_nonblocking(fds[1]); + } +} + +/* Sets up a handler for 'signr' and returns a structure that represents it. + * + * Only one handler for a given signal may be registered at a time. */ +struct signal * +signal_register(int signr) +{ + struct sigaction sa; + struct signal *s; + + signal_init(); + + /* Set up signal handler. */ + assert(signr >= 1 && signr < N_SIGNALS); + memset(&sa, 0, sizeof sa); + sa.sa_handler = signal_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if (sigaction(signr, &sa, NULL)) { + ofp_fatal(errno, "sigaction(%d) failed", signr); + } + + /* Return structure. */ + s = xmalloc(sizeof *s); + s->signr = signr; + return s; +} + +/* Returns true if signal 's' has been received since the last call to this + * function with argument 's'. */ +bool +signal_poll(struct signal *s) +{ + char buf[_POSIX_PIPE_BUF]; + read(fds[0], buf, sizeof buf); + if (signaled[s->signr]) { + signaled[s->signr] = 0; + return true; + } + return false; +} + +/* Causes the next call to poll_block() to wake up when signal_poll(s) would + * return true. */ +void +signal_wait(struct signal *s) +{ + if (signaled[s->signr]) { + poll_immediate_wake(); + } else { + poll_fd_wait(fds[0], POLLIN); + } +} + +static void +signal_handler(int signr) +{ + if (signr >= 1 && signr < N_SIGNALS) { + write(fds[1], "", 1); + signaled[signr] = true; + } +} diff --git a/lib/signals.h b/lib/signals.h new file mode 100644 index 00000000..fd91bd7e --- /dev/null +++ b/lib/signals.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef SIGNALS_H +#define SIGNALS_H 1 + +#include + +void signal_init(void); +struct signal *signal_register(int signr); +bool signal_poll(struct signal *); +void signal_wait(struct signal *); + +#endif /* signals.h */ diff --git a/lib/svec.c b/lib/svec.c new file mode 100644 index 00000000..bb58408e --- /dev/null +++ b/lib/svec.c @@ -0,0 +1,218 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "svec.h" +#include +#include +#include +#include "util.h" + +void +svec_init(struct svec *svec) +{ + svec->names = NULL; + svec->n = 0; + svec->allocated = 0; +} + +void +svec_destroy(struct svec *svec) +{ + svec_clear(svec); + free(svec->names); +} + +void +svec_clear(struct svec *svec) +{ + size_t i; + + for (i = 0; i < svec->n; i++) { + free(svec->names[i]); + } + svec->n = 0; +} + +void +svec_add(struct svec *svec, const char *name) +{ + svec_add_nocopy(svec, xstrdup(name)); +} + +static void +svec_expand(struct svec *svec) +{ + if (svec->n >= svec->allocated) { + svec->names = x2nrealloc(svec->names, &svec->allocated, + sizeof *svec->names); + } +} + +void +svec_add_nocopy(struct svec *svec, char *name) +{ + svec_expand(svec); + svec->names[svec->n++] = name; +} + +void +svec_terminate(struct svec *svec) +{ + svec_expand(svec); + svec->names[svec->n] = NULL; +} + +static int +compare_strings(const void *a_, const void *b_) +{ + char *const *a = a_; + char *const *b = b_; + return strcmp(*a, *b); +} + +void +svec_sort(struct svec *svec) +{ + qsort(svec->names, svec->n, sizeof *svec->names, compare_strings); +} + +void +svec_unique(struct svec *svec) +{ + assert(svec_is_sorted(svec)); + if (svec->n > 1) { + /* This algorithm is lazy and sub-optimal, but it's "obviously correct" + * and asymptotically optimal . */ + struct svec tmp; + size_t i; + + svec_init(&tmp); + svec_add(&tmp, svec->names[0]); + for (i = 1; i < svec->n; i++) { + if (strcmp(svec->names[i - 1], svec->names[i])) { + svec_add(&tmp, svec->names[i]); + } + } + svec_swap(&tmp, svec); + svec_destroy(&tmp); + } +} + +void +svec_diff(const struct svec *a, const struct svec *b, + struct svec *a_only, struct svec *both, struct svec *b_only) +{ + size_t i, j; + + assert(svec_is_sorted(a)); + assert(svec_is_sorted(b)); + if (a_only) { + svec_init(a_only); + } + if (both) { + svec_init(both); + } + if (b_only) { + svec_init(b_only); + } + for (i = j = 0; i < a->n && j < b->n; ) { + int cmp = strcmp(a->names[i], b->names[j]); + if (cmp > 0) { + if (a_only) { + svec_add(a_only, a->names[i]); + } + i++; + } else if (cmp < 0) { + if (b_only) { + svec_add(b_only, b->names[j]); + } + j++; + } else { + if (both) { + svec_add(both, a->names[i]); + } + i++; + j++; + } + } + if (a_only) { + for (; i < a->n; i++) { + svec_add(a_only, a->names[i]); + } + } + if (b_only) { + for (; j < b->n; j++) { + svec_add(b_only, b->names[j]); + } + } +} + +bool +svec_contains(const struct svec *svec, const char *name) +{ + return bsearch(&name, svec->names, svec->n, sizeof *svec->names, + compare_strings) != NULL; +} + +bool +svec_is_sorted(const struct svec *svec) +{ + size_t i; + + for (i = 1; i < svec->n; i++) { + if (strcmp(svec->names[i - 1], svec->names[i]) > 0) { + return false; + } + } + return true; +} + +void +svec_swap(struct svec *a, struct svec *b) +{ + struct svec tmp = *a; + *a = *b; + *b = tmp; +} + +void +svec_print(const struct svec *svec, const char *title) +{ + size_t i; + + printf("%s:\n", title); + for (i = 0; i < svec->n; i++) { + printf("\"%s\"\n", svec->names[i]); + } +} diff --git a/lib/svec.h b/lib/svec.h new file mode 100644 index 00000000..ac99a837 --- /dev/null +++ b/lib/svec.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef SVEC_H +#define SVEC_H 1 + +#include +#include + +struct svec { + char **names; + size_t n; + size_t allocated; +}; + +#define SVEC_EMPTY_INITIALIZER { NULL, 0, 0 } + +void svec_init(struct svec *); +void svec_destroy(struct svec *); +void svec_clear(struct svec *); +void svec_add(struct svec *, const char *); +void svec_add_nocopy(struct svec *, char *); +void svec_merge(struct svec *, const struct svec *); +void svec_terminate(struct svec *); +void svec_sort(struct svec *); +void svec_unique(struct svec *); +void svec_diff(const struct svec *a, const struct svec *b, + struct svec *a_only, struct svec *both, struct svec *b_only); +bool svec_contains(const struct svec *, const char *); +bool svec_is_sorted(const struct svec *); +void svec_swap(struct svec *a, struct svec *b); +void svec_print(const struct svec *svec, const char *title); + +#endif /* svec.h */ diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def index 58bd7d83..2bb5919a 100644 --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@ -1,5 +1,7 @@ /* Modules that can emit log messages. */ +VLOG_MODULE(bridge) VLOG_MODULE(chain) +VLOG_MODULE(cfg) VLOG_MODULE(controller) VLOG_MODULE(ctlpath) VLOG_MODULE(daemon) @@ -21,6 +23,7 @@ VLOG_MODULE(netlink) VLOG_MODULE(ofp_discover) VLOG_MODULE(poll_loop) VLOG_MODULE(port_watcher) +VLOG_MODULE(process) VLOG_MODULE(secchan) VLOG_MODULE(rconn) VLOG_MODULE(snat) @@ -39,6 +42,7 @@ VLOG_MODULE(vconn_unix) VLOG_MODULE(vconn) VLOG_MODULE(vlog) VLOG_MODULE(vlog_socket) +VLOG_MODULE(vswitchd) #ifdef HAVE_EXT #include "ext/vlogext-modules.def" diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk new file mode 100644 index 00000000..c53ece76 --- /dev/null +++ b/vswitchd/automake.mk @@ -0,0 +1,13 @@ +bin_PROGRAMS += vswitchd/vswitchd +man_MANS += vswitchd/vswitchd.8 +DISTCLEANFILES += vswitchd/vswitchd.8 + +vswitchd_vswitchd_SOURCES = \ + vswitchd/bridge.c \ + vswitchd/bridge.h \ + vswitchd/cfg.c \ + vswitchd/cfg.h \ + vswitchd/vswitchd.c +vswitchd_vswitchd_LDADD = lib/libopenflow.a $(FAULT_LIBS) $(SSL_LIBS) + +EXTRA_DIST += vswitchd/vswitchd.8.in diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c new file mode 100644 index 00000000..775bbd7d --- /dev/null +++ b/vswitchd/bridge.c @@ -0,0 +1,925 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "bridge.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cfg.h" +#include "dpif.h" +#include "flow.h" +#include "list.h" +#include "mac-learning.h" +#include "netdev.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "process.h" +#include "rconn.h" +#include "socket-util.h" +#include "svec.h" +#include "util.h" +#include "vconn.h" + +#define THIS_MODULE VLM_bridge +#include "vlog.h" + +struct iface { + struct port *port; /* Containing port. */ + size_t port_ifidx; /* Index within containing port. */ + + char *name; /* Host network device name. */ + int dp_ifidx; /* Index within kernel datapath. */ +}; + +struct port { + struct bridge *bridge; + size_t port_idx; + int vlan; /* 0=trunk port, otherwise a 12-bit VLAN ID. */ + const char *name; + + /* An ordinary bridge port has 1 interface. + * A bridge port for bonding has at least 2 interfaces. */ + struct iface **ifaces; + size_t n_ifaces, allocated_ifaces; +}; + +#define DP_MAX_PORTS 255 +struct bridge { + struct list node; /* Node in global list of bridges. */ + char *name; /* User-specified arbitrary name. */ + struct process *secchan; /* The "secchan" subprocess. */ + struct rconn *rconn; /* Connection to secchan subprocess. */ + int txqlen; /* # of messages queued to send on 'rconn'. */ + struct mac_learning *ml; /* MAC learning table, or null not to learn. */ + int flow_idle_time; /* Idle time for flows we set up. */ + + /* Kernel datapath information. */ + int dp_idx; /* Kernel datapath index. */ + struct iface *ifaces[DP_MAX_PORTS]; /* Index by kernel datapath port no. */ + + /* Bridge ports. */ + struct port **ports; + size_t n_ports, allocated_ports; +}; + +/* List of all bridges. */ +static struct list all_bridges = LIST_INITIALIZER(&all_bridges); + +/* Each value is true if the corresponding datapath has been created, + * false otherwise.*/ +static bool in_use_dps[DP_MAX]; + +/* Used for creating and destroying kernel datapaths, etc. */ +static struct dpif mgmt_dpif; + +static struct bridge *bridge_create(const char *name); +static void bridge_destroy(struct bridge *); +static struct bridge *bridge_lookup(const char *name); +static int if_up(const char *netdev_name); +static void bridge_run_one(struct bridge *); +static void bridge_reconfigure_one(struct bridge *); +static void bridge_get_all_ifaces(const struct bridge *, struct svec *ifaces); +static bool bridge_is_backlogged(const struct bridge *); +static int bridge_fetch_dp_ifaces(struct bridge *, struct svec *iface_names); + +static void bridge_process_msg(struct bridge *, struct ofpbuf *); + +static void port_create(struct bridge *, const char *name); +static void port_reconfigure(struct port *); +static void port_destroy(struct port *); + +static void iface_create(struct port *, const char *name); +static void iface_destroy(struct iface *); +static struct iface *iface_lookup(const struct bridge *, const char *name); + +/* Public functions. */ + +void +bridge_init(void) +{ + int retval; + size_t i; + + retval = dpif_open(-1, &mgmt_dpif); + if (retval) { + ofp_fatal(retval, "could not create datapath management socket"); + } + + for (i = 0; i < DP_MAX; i++) { + int retval = dpif_del_dp(&mgmt_dpif, i); + if (retval && retval != ENOENT) { + VLOG_ERR("failed to delete datapath nl:%d: %s", + i, strerror(retval)); + } + } +} + +void +bridge_reconfigure(void) +{ + struct svec old_br, new_br; + struct bridge *br, *next; + size_t i, j; + + /* Collect old and new bridges. */ + svec_init(&old_br); + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + svec_add(&old_br, br->name); + } + svec_sort(&old_br); + cfg_get_subsections(&new_br, "bridge"); + + /* Get rid of deleted bridges and add new bridges. */ + LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { + if (!svec_contains(&new_br, br->name)) { + bridge_destroy(br); + } + } + for (i = 0; i < new_br.n; i++) { + const char *name = new_br.names[i]; + if (!svec_contains(&old_br, name)) { + bridge_create(name); + } + } + + /* Reconfigure all bridges. */ + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + bridge_reconfigure_one(br); + } + + /* Add and delete ports on all datapaths. + * + * The kernel will reject any attempt to add a given port to a datapath if + * that port already belongs to a different datapath, so we must do all + * port deletions before any port additions. */ + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + struct svec cur_ifaces, want_ifaces, del_ifaces; + + bridge_fetch_dp_ifaces(br, &cur_ifaces); + bridge_get_all_ifaces(br, &want_ifaces); + svec_sort(&want_ifaces); + svec_sort(&cur_ifaces); + svec_diff(&want_ifaces, &cur_ifaces, NULL, NULL, &del_ifaces); + for (i = 0; i < del_ifaces.n; i++) { + const char *if_name = del_ifaces.names[i]; + int retval = dpif_del_port(&mgmt_dpif, br->dp_idx, if_name); + if (retval) { + VLOG_ERR("failed to remove %s interface from nl:%d: %s", + if_name, br->dp_idx, strerror(retval)); + } + } + svec_destroy(&cur_ifaces); + svec_destroy(&want_ifaces); + svec_destroy(&del_ifaces); + } + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + struct svec cur_ifaces, want_ifaces, add_ifaces; + + bridge_fetch_dp_ifaces(br, &cur_ifaces); + bridge_get_all_ifaces(br, &want_ifaces); + svec_sort(&want_ifaces); + svec_sort(&cur_ifaces); + svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL); + for (i = 0; i < add_ifaces.n; i++) { + const char *if_name = add_ifaces.names[i]; + int retval; + if_up(if_name); + retval = dpif_add_port(&mgmt_dpif, br->dp_idx, if_name); + if (retval) { + VLOG_ERR("failed to add %s interface to nl:%d: %s", + if_name, br->dp_idx, strerror(retval)); + } + } + svec_destroy(&cur_ifaces); + svec_destroy(&want_ifaces); + svec_destroy(&add_ifaces); + } + LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { + bridge_fetch_dp_ifaces(br, NULL); + for (i = 0; i < br->n_ports; ) { + struct port *port = br->ports[i]; + for (j = 0; j < port->n_ifaces; ) { + struct iface *iface = port->ifaces[j]; + if (iface->dp_ifidx < 0) { + VLOG_ERR("%s interface not in nl:%d, dropping", + iface->name, br->dp_idx); + iface_destroy(iface); + } else { + VLOG_DBG("datapath nl:%d has interface %s on port %d", + br->dp_idx, iface->name, iface->dp_ifidx); + j++; + } + } + if (!port->n_ifaces) { + VLOG_ERR("%s port has no interfaces, dropping", port->name); + port_destroy(port); + } else { + i++; + } + } + if (!br->n_ports) { + VLOG_ERR("%s bridge has no ports, dropping", br->name); + bridge_destroy(br); + } + } +} + +void +bridge_run(void) +{ + struct bridge *br, *next; + + LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) { + bridge_run_one(br); + } +} + +void +bridge_wait(void) +{ + struct bridge *br; + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + rconn_run_wait(br->rconn); + if (!bridge_is_backlogged(br)) { + rconn_recv_wait(br->rconn); + } + process_wait(br->secchan); + } +} + +/* Bridge reconfiguration functions. */ + +static int allocate_dp_idx(void); +static void sanitize_opp(struct ofp_phy_port *opp); + +static struct bridge * +bridge_create(const char *name) +{ + struct bridge *br; + struct svec argv; + char *dp_name; + int sockets[2]; + int retval; + + assert(!bridge_lookup(name)); + br = xcalloc(1, sizeof *br); + list_push_back(&all_bridges, &br->node); + br->name = xstrdup(name); + br->ml = mac_learning_create(); + br->flow_idle_time = 5; + + /* Create kernel datapath. */ + for (;;) { + /* Pick a datapath index. + * + * XXX we could make a bad choice if a user created a datapath manually + * with dpctl. Ideally the kernel module should provide a way to pick + * the datapath index for us. */ + br->dp_idx = allocate_dp_idx(); + if (br->dp_idx < 0) { + VLOG_EMER("out of datapath indexes; cannot create switches"); + /* XXX free memory */ + return NULL; + } + + /* Create the kernel datapath. */ + retval = dpif_add_dp(&mgmt_dpif, br->dp_idx); + if (retval) { + VLOG_ERR("failed to create datapath nl:%d: %s", + br->dp_idx, strerror(retval)); + } + break; + } + + VLOG_WARN("created bridge %s on datapath nl:%d", + br->name, br->dp_idx); + + /* Create socketpair for communicating with secchan subprocess. */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sockets)) { + VLOG_ERR("failed to create socket pair: %s", strerror(errno)); + /* XXX */ + } + set_nonblocking(sockets[0]); + set_nonblocking(sockets[1]); + + dp_name = xasprintf("fd:%d", sockets[0]); + br->rconn = rconn_new(dp_name, 30, 1); + free(dp_name); + + /* Start secchan subprocess. */ + svec_init(&argv); + svec_add(&argv, "secchan"); + svec_add(&argv, "--out-of-band"); + svec_add(&argv, "--fail=closed"); + svec_add(&argv, "--max-backoff=1"); + svec_add(&argv, "--no-stp"); + svec_add_nocopy(&argv, + xasprintf("-vPATTERN:console:%s|secchan(nl:%d)|%s", + "%d{%b %d %H:%M:%S}", + br->dp_idx, + "%c|%p|%m")); + svec_add_nocopy(&argv, xasprintf("nl:%d", br->dp_idx)); + svec_add_nocopy(&argv, xasprintf("fd:%d", sockets[1])); + svec_terminate(&argv); + + retval = process_start(argv.names, &sockets[1], 1, &br->secchan); + close(sockets[1]); + if (retval) { + VLOG_ERR("failed to start secchan for datapath nl:%d: %s", + br->dp_idx, strerror(retval)); + bridge_destroy(br); + return NULL; + } + return br; +} + +static void +bridge_destroy(struct bridge *br) +{ + if (br) { + size_t i; + + while (br->n_ports > 0) { + port_destroy(br->ports[br->n_ports - 1]); + } + list_remove(&br->node); + free(br->name); + if (br->dp_idx >= 0) { + int retval = dpif_del_dp(&mgmt_dpif, br->dp_idx); + if (!retval || retval == ENOENT) { + assert(br->dp_idx < DP_MAX); + in_use_dps[br->dp_idx] = false; + } else { + VLOG_ERR("failed to delete datapath nl:%d: %s", + br->dp_idx, strerror(retval)); + } + } + if (br->secchan) { + process_kill(br->secchan, SIGTERM); + } + process_destroy(br->secchan); + rconn_destroy(br->rconn); + for (i = 0; i < br->n_ports; i++) { + port_destroy(br->ports[i]); + } + mac_learning_destroy(br->ml); + free(br->ports); + free(br); + } +} + +static struct bridge * +bridge_lookup(const char *name) +{ + struct bridge *br; + + LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { + if (!strcmp(br->name, name)) { + return br; + } + } + return NULL; +} + +static int +allocate_dp_idx(void) +{ + int i; + for (i = 0; i < DP_MAX; i++) { + if (!in_use_dps[i]) { + in_use_dps[i] = true; + return i; + } + } + return -1; +} + +static int +if_up(const char *netdev_name) +{ + struct netdev *netdev; + int retval; + + retval = netdev_open(netdev_name, NETDEV_ETH_TYPE_NONE, &netdev); + if (!retval) { + retval = netdev_turn_flags_on(netdev, NETDEV_UP, true); + netdev_close(netdev); + } + return retval; +} + +static void +bridge_run_one(struct bridge *br) +{ + int iteration; + + rconn_run(br->rconn); + for (iteration = 0; iteration < 50 && !bridge_is_backlogged(br); + iteration++) { + struct ofpbuf *msg = rconn_recv(br->rconn); + if (!msg) { + break; + } + + bridge_process_msg(br, msg); + ofpbuf_delete(msg); + } + if (process_exited(br->secchan)) { + int status = process_status(br->secchan); + VLOG_ERR("%s: secchan subprocess with pid %ld died unexpectedly (%s)", + br->name, (long int) process_pid(br->secchan), + process_status_msg(status)); + bridge_destroy(br); + /* XXX restart */ + } else if (!rconn_is_alive(br->rconn)) { + VLOG_ERR("%s: connection to secchan closed unexpectedly", br->name); + bridge_destroy(br); + /* XXX kill and restart secchan */ + } +} + +static void +bridge_reconfigure_one(struct bridge *br) +{ + struct svec old_ports, new_ports; + size_t i; + + /* Collect old and new ports. */ + svec_init(&old_ports); + for (i = 0; i < br->n_ports; i++) { + svec_add(&old_ports, br->ports[i]->name); + } + cfg_get_all_keys(&new_ports, "bridge.%s.port", br->name); + + /* Get rid of deleted ports and add new ports. */ + for (i = 0; i < br->n_ports; ) { + struct port *port = br->ports[i]; + if (!svec_contains(&new_ports, port->name)) { + port_destroy(port); + } else { + i++; + } + } + for (i = 0; i < new_ports.n; i++) { + const char *name = new_ports.names[i]; + if (!svec_contains(&old_ports, name)) { + port_create(br, name); + } + } + + /* Reconfigure all ports. */ + for (i = 0; i < br->n_ports; i++) { + port_reconfigure(br->ports[i]); + } +} + +static void +bridge_get_all_ifaces(const struct bridge *br, struct svec *ifaces) +{ + size_t i, j; + + svec_init(ifaces); + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + svec_add(ifaces, iface->name); + } + } +} + +static bool +bridge_is_backlogged(const struct bridge *br) +{ + return br->txqlen >= 100; +} + +/* The kernel interface to add ports doesn't report what port numbers they were + * assigned (XXX), so now we have to connect to the datapath and use a feature + * request to obtain the port numbers. */ +static int +bridge_fetch_dp_ifaces(struct bridge *br, struct svec *iface_names) +{ + char *vconn_name; + struct vconn *vconn = NULL; + struct ofpbuf *request; + struct ofpbuf *reply = NULL; + struct ofp_switch_features *osf; + size_t n_ports; + size_t i, j; + int retval; + + if (iface_names) { + svec_init(iface_names); + } + + /* Reset all interface numbers. */ + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + iface->dp_ifidx = -1; + } + } + for (i = 0; i < ARRAY_SIZE(br->ifaces); i++) { + br->ifaces[i] = NULL; + } + + /* Open connection to datapath. */ + vconn_name = xasprintf("nl:%d", br->dp_idx); + retval = vconn_open_block(vconn_name, OFP_VERSION, &vconn); + free(vconn_name); + if (retval) { + VLOG_ERR("could not open connection to nl:%d: %s", + br->dp_idx, strerror(retval)); + goto done; + } + + /* Send request, receive reply. */ + make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &request); + retval = vconn_transact(vconn, request, &reply); + if (retval) { + if (retval == EOF) { + VLOG_ERR("unexpected connection close talking to nl:%d", + br->dp_idx); + } else { + VLOG_ERR("error requesting features from nl:%d: %s", + br->dp_idx, strerror(retval)); + } + goto done; + } + + /* Parse reply. */ + osf = reply->data; + retval = check_ofp_message_array(&osf->header, OFPT_FEATURES_REPLY, + sizeof *osf, sizeof *osf->ports, + &n_ports); + if (retval) { + goto done; + } + for (i = 0; i < n_ports; i++) { + struct ofp_phy_port *opp = &osf->ports[i]; + int port_no = ntohs(opp->port_no); + struct iface *iface; + + /* Ignore special ports in general and OFPP_LOCAL in particular. */ + sanitize_opp(opp); + if (port_no >= DP_MAX_PORTS) { + if (port_no < OFPP_MAX) { + VLOG_WARN("datapath nl:%d reports having port %d (%s), which " + "exceeds vswitchd maximum supported port number %d", + br->dp_idx, port_no, opp->name, + DP_MAX_PORTS - 1); + } + continue; + } + + iface = iface_lookup(br, (const char *) opp->name); + if (iface) { + if (iface->dp_ifidx >= 0) { + VLOG_WARN("datapath nl:%d reported interface %s twice", + br->dp_idx, opp->name); + } else if (br->ifaces[port_no]) { + VLOG_WARN("datapath nl:%d reported interface %d twice", + br->dp_idx, port_no); + } else { + iface->dp_ifidx = port_no; + br->ifaces[port_no] = iface; + } + } + if (iface_names) { + svec_add(iface_names, (const char *) opp->name); + } + } + retval = 0; + +done: + vconn_close(vconn); + ofpbuf_delete(reply); + return retval; +} + +static void +sanitize_opp(struct ofp_phy_port *opp) +{ + size_t i; + + for (i = 0; i < sizeof opp->name; i++) { + char c = opp->name[i]; + if (c && (c < 0x20 || c > 0x7e)) { + opp->name[i] = '.'; + } + } + opp->name[sizeof opp->name - 1] = '\0'; +} + +/* Bridge packet processing functions. */ + +typedef void packet_handler_func(struct bridge *, void *); +static packet_handler_func process_echo_request; +static packet_handler_func process_packet_in; + +static void +bridge_process_msg(struct bridge *br, struct ofpbuf *msg) +{ + struct processor { + uint8_t type; + packet_handler_func *handler; + }; + static const struct processor processors[] = { + { + OFPT_ECHO_REQUEST, + process_echo_request + }, + { + OFPT_PACKET_IN, + process_packet_in + }, + { + OFPT_PORT_STATUS, + NULL + }, + { + OFPT_FLOW_EXPIRED, + NULL + }, + }; + const size_t n_processors = ARRAY_SIZE(processors); + const struct processor *p; + struct ofp_header *oh; + + oh = msg->data; + for (p = processors; p < &processors[n_processors]; p++) { + if (oh->type == p->type) { + if (p->handler) { + (p->handler)(br, msg->data); + } + return; + } + } + if (VLOG_IS_DBG_ENABLED()) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); + char *p = ofp_to_string(msg->data, msg->size, 2); + VLOG_DBG_RL(&rl, "bridge %s: OpenFlow packet ignored: %s", + br->name, p); + free(p); + } +} + +static void +queue_tx(struct bridge *br, struct ofpbuf *msg) +{ + int retval = rconn_send(br->rconn, msg, &br->txqlen); + if (retval) { + ofpbuf_delete(msg); + /* No point in logging: rconn_send() only fails due to disconnection, + * and disconnect from secchan will cause all kinds of log messages + * anyhow. */ + } +} + +static void +process_packet_in(struct bridge *br, void *opi_) +{ + struct ofp_packet_in *opi = opi_; + uint16_t in_port = ntohs(opi->in_port); + uint16_t out_port = OFPP_FLOOD; + + size_t pkt_len; + struct ofpbuf pkt; + struct flow flow; + + if (check_ofp_message_array(&opi->header, OFPT_PACKET_IN, + offsetof(struct ofp_packet_in, data), + 1, &pkt_len)) { + return; + } + + /* Extract flow data from 'opi' into 'flow'. */ + pkt.data = opi->data; + pkt.size = pkt_len; + flow_extract(&pkt, in_port, &flow); + + if (br->ml) { + if (mac_learning_learn(br->ml, flow.dl_src, in_port)) { + /* The log messages here could actually be useful in debugging, so + * keep the rate limit relatively high. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); + VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is on " + "port %"PRIu16, + br->name, ETH_ADDR_ARGS(flow.dl_src), in_port); + } + } + + if (br->ml) { + out_port = mac_learning_lookup(br->ml, flow.dl_dst); + } + + if (in_port == out_port) { + /* Don't send out packets on their input ports. */ + goto drop_it; + } else if (br->flow_idle_time >= 0 + && (!br->ml || out_port != OFPP_FLOOD)) { + /* The output port is known, or we always flood everything, so add a + * new flow. */ + queue_tx(br, make_add_simple_flow(&flow, ntohl(opi->buffer_id), + out_port, br->flow_idle_time)); + + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + queue_tx(br, + make_unbuffered_packet_out(&pkt, in_port, out_port)); + } + } else { + /* We don't know that MAC, or we don't set up flows. Send along the + * packet without setting up a flow. */ + struct ofpbuf *b; + if (ntohl(opi->buffer_id) == UINT32_MAX) { + b = make_unbuffered_packet_out(&pkt, in_port, out_port); + } else { + b = make_buffered_packet_out(ntohl(opi->buffer_id), + in_port, out_port); + } + queue_tx(br, b); + } + return; + +drop_it: + /* Set up a flow to drop packets, or just drop the packet if we don't set + * up flows at all. */ + if (br->flow_idle_time >= 0) { + queue_tx(br, make_add_flow(&flow, ntohl(opi->buffer_id), + br->flow_idle_time, 0)); + } + return; +} + +static void +process_echo_request(struct bridge *br, void *rq_) +{ + struct ofp_header *rq = rq_; + queue_tx(br, make_echo_reply(rq)); +} + +/* Port functions. */ + +static void +port_create(struct bridge *br, const char *name) +{ + struct port *port; + + port = xcalloc(1, sizeof *port); + port->bridge = br; + port->port_idx = br->n_ports; + port->vlan = 0; + port->name = xstrdup(name); + + if (br->n_ports >= br->allocated_ports) { + br->ports = x2nrealloc(br->ports, &br->allocated_ports, + sizeof *br->ports); + } + br->ports[br->n_ports++] = port; + + VLOG_WARN("created port %s on bridge %s", port->name, br->name); +} + +static void +port_reconfigure(struct port *port) +{ + struct svec old_ifaces, new_ifaces; + size_t i; + + /* Collect old and new interfaces. */ + svec_init(&old_ifaces); + for (i = 0; i < port->n_ifaces; i++) { + svec_add(&old_ifaces, port->ifaces[i]->name); + } + if (cfg_has_section("bonding.%s", port->name)) { + cfg_get_all_keys(&new_ifaces, "bonding.%s.slave", port->name); + } else { + svec_init(&new_ifaces); + svec_add(&new_ifaces, port->name); + } + + /* Get rid of deleted interfaces and add new interfaces. */ + for (i = 0; i < port->n_ifaces; i++) { + struct iface *iface = port->ifaces[i]; + if (!svec_contains(&new_ifaces, iface->name)) { + iface_destroy(iface); + } else { + i++; + } + } + for (i = 0; i < new_ifaces.n; i++) { + const char *name = new_ifaces.names[i]; + if (!svec_contains(&old_ifaces, name)) { + iface_create(port, name); + } + } +} + +static void +port_destroy(struct port *port) +{ + if (port) { + struct bridge *br = port->bridge; + struct port *del; + size_t i; + + del = br->ports[port->port_idx] = br->ports[--br->n_ports]; + del->port_idx = port->port_idx; + + for (i = 0; i < port->n_ifaces; i++) { + iface_destroy(port->ifaces[i]); + } + free(port->ifaces); + free(port); + } +} + +/* Interface functions. */ + +static void +iface_create(struct port *port, const char *name) +{ + struct iface *iface = xcalloc(1, sizeof *iface); + iface->port = port; + iface->port_ifidx = port->n_ifaces; + iface->name = xstrdup(name); + iface->dp_ifidx = -1; + + if (port->n_ifaces >= port->allocated_ifaces) { + port->ifaces = x2nrealloc(port->ifaces, &port->allocated_ifaces, + sizeof *port->ifaces); + } + port->ifaces[port->n_ifaces++] = iface; + + VLOG_DBG("attached network device %s to port %s", iface->name, port->name); +} + +static void +iface_destroy(struct iface *iface) +{ + if (iface) { + struct port *port = iface->port; + port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces]; + free(iface->name); + free(iface); + } +} + +static struct iface * +iface_lookup(const struct bridge *br, const char *name) +{ + size_t i, j; + + for (i = 0; i < br->n_ports; i++) { + struct port *port = br->ports[i]; + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + if (!strcmp(iface->name, name)) { + return iface; + } + } + } + return NULL; +} diff --git a/vswitchd/bridge.h b/vswitchd/bridge.h new file mode 100644 index 00000000..684537ca --- /dev/null +++ b/vswitchd/bridge.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef VSWITCHD_BRIDGE_H +#define VSWITCHD_BRIDGE_H 1 + +#include +#include "list.h" + +void bridge_init(void); +void bridge_reconfigure(void); +void bridge_run(void); +void bridge_wait(void); + +#endif /* bridge.h */ diff --git a/vswitchd/cfg.c b/vswitchd/cfg.c new file mode 100644 index 00000000..aa33bddb --- /dev/null +++ b/vswitchd/cfg.c @@ -0,0 +1,737 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "cfg.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dynamic-string.h" +#include "packets.h" +#include "svec.h" +#include "util.h" + +#define THIS_MODULE VLM_cfg +#include "vlog.h" + +/* List of configuration files. */ +static struct svec cfg_files = SVEC_EMPTY_INITIALIZER; + +/* Current configuration. Maintained in sorted order. */ +static struct svec cfg = SVEC_EMPTY_INITIALIZER; + +static void read_directory(const char *dir_name); +static bool has_double_dot(const char *key, size_t len); +static bool is_valid_key(const char *key, size_t len, + const char *file_name, int line_number, + const char *id); +static char *parse_section(const char *file_name, int line_number, + const char *); +static void parse_setting(const char *file_name, int line_number, + const char *section, const char *); +static void read_file(const char *file_name); +static int compare_key(const char *a, const char *b); +static char **find_key_le(const char *key); +static char **find_key_ge(const char *key); +static char *find_key(const char *); +static bool parse_mac(const char *, uint8_t mac[6]); +static bool is_key(const char *); +static bool is_int(const char *); +static bool is_bool(const char *); +static const char *extract_value(const char *key); +static const char *get_nth_value(int idx, const char *key); + +/* Adds 'file_name' to the set of files or directories that are read by + * cfg_read(). Returns 0 on success, otherwise a positive errno value if + * 'file_name' cannot be opened. + * + * If 'file_name' names a file, then cfg_read() will read it. If 'file_name' + * names a directory, then cfg_read() will read all of the files in that + * directory whose names consist entirely of the English letters, digits, + * periods, underscores, and hyphens and do not begin with a period. + * Subdirectories are not processed recursively. + * + * This function does not actually read the named file or directory. Use + * cfg_read() to (re)read all the configuration files. */ +int +cfg_add_file(const char *file_name) +{ + int fd; + + /* Make sure that we can open this file or directory for reading. */ + fd = open(file_name, O_RDONLY); + if (fd < 0) { + return errno; + } + close(fd); + + /* Add it to the list. */ + VLOG_WARN("using \"%s\" as a configuration file", file_name); + svec_add(&cfg_files, file_name); + return 0; +} + +/* Reads all of the configuration files or directories that have been added + * with cfg_add_file(), merges their content. Any previous configuration is + * replaced. */ +void +cfg_read(void) +{ + size_t i; + + /* Clear old configuration data. */ + svec_clear(&cfg); + + /* Read new configuration. */ + VLOG_WARN("reading configuration..."); + for (i = 0; i < cfg_files.n; i++) { + const char *fn = cfg_files.names[i]; + struct stat s; + + VLOG_DBG("reading \"%s\"", fn); + if (stat(fn, &s) < 0) { + VLOG_WARN("failed to stat \"%s\": %s", fn, strerror(errno)); + } else if (S_ISDIR(s.st_mode)) { + read_directory(fn); + } else if (S_ISREG(s.st_mode)) { + read_file(fn); + } else { + VLOG_WARN("\"%s\" is not a regular file or a directory, ignoring", + fn); + } + } + + if (VLOG_IS_DBG_ENABLED()) { + size_t i; + + VLOG_DBG("new configuration:"); + for (i = 0; i < cfg.n; i++) { + VLOG_DBG("%s", cfg.names[i]); + } + } +} + +/* Formats the printf()-style format string in the parameter 'format', which + * must be the function's last parameter, into string variable 'dst'. The + * function is responsible for freeing 'dst'. */ +#define FORMAT_KEY(FORMAT, DST) \ + do { \ + va_list args__; \ + va_start(args__, FORMAT); \ + (DST) = xvasprintf(FORMAT, args__); \ + va_end(args__); \ + } while (0) + +/* Returns true if the configuration includes a key named 'key'. */ +bool +cfg_has(const char *key_, ...) +{ + char *key; + bool retval; + + FORMAT_KEY(key_, key); + retval = find_key(key) != NULL; + free(key); + return retval; +} + +/* Returns true if the configuration includes at least one key whose name + * begins with 'section' followed by a dot. */ +bool +cfg_has_section(const char *section_, ...) +{ + struct ds section; + bool retval; + va_list args; + char **p; + + ds_init(§ion); + va_start(args, section_); + ds_put_format_valist(§ion, section_, args); + ds_put_char(§ion, '.'); + va_end(args); + + p = find_key_le(ds_cstr(§ion)); + retval = *p && !strncmp(section.string, *p, section.length); + + ds_destroy(§ion); + return retval; +} + +/* Returns the number of values for the given 'key'. The return value is 0 if + * no values exist for 'key'. */ +int +cfg_count(const char *key_, ...) +{ + char *key; + int retval; + + FORMAT_KEY(key_, key); + retval = find_key_ge(key) - find_key_le(key); + free(key); + return retval; +} + +/* Initializes 'svec' to all of the immediate subsections of 'section'. For + * example, if 'section' is "bridge" and keys bridge.a, bridge.b, bridge.b.c, + * and bridge.c.x.y.z exist, then 'svec' would be initialized to a, b, and + * c. */ +void +cfg_get_subsections(struct svec *svec, const char *section_, ...) +{ + struct ds section; + va_list args; + char **p; + + ds_init(§ion); + va_start(args, section_); + ds_put_format_valist(§ion, section_, args); + ds_put_char(§ion, '.'); + va_end(args); + + svec_init(svec); + for (p = find_key_le(ds_cstr(§ion)); + *p && !strncmp(section.string, *p, section.length); + p++) { + const char *ss = *p + section.length; + size_t ss_len = strcspn(ss, ".="); + svec_add_nocopy(svec, xmemdup0(ss, ss_len)); + } + svec_unique(svec); + ds_destroy(§ion); +} + +/* Returns the value numbered 'idx' of 'key'. Returns a null pointer if 'idx' + * is greater than or equal to cfg_count(key). The caller must not modify or + * free the returned string or retain its value beyond the next call to + * cfg_read(). */ +const char * +cfg_get_string(int idx, const char *key_, ...) +{ + const char *retval; + char *key; + + FORMAT_KEY(key_, key); + retval = get_nth_value(idx, key); + free(key); + return retval; +} + +/* Returns the value numbered 'idx' of 'key'. Returns a null pointer if 'idx' + * is greater than or equal to cfg_count(key) or if the value 'idx' of 'key' is + * not a valid key. The caller must not modify or free the returned string or + * retain its value beyond the next call to cfg_read(). */ +const char * +cfg_get_key(int idx, const char *key_, ...) +{ + const char *value, *retval; + char *key; + + FORMAT_KEY(key_, key); + value = get_nth_value(idx, key); + retval = value && is_key(value) ? value : NULL; + free(key); + return retval; +} + +/* Returns the value numbered 'idx' of 'key', converted to an integer. Returns + * 0 if 'idx' is greater than or equal to cfg_count(key) or if the value 'idx' + * of 'key' is not a valid integer. */ +int +cfg_get_int(int idx, const char *key_, ...) +{ + const char *value; + int retval; + char *key; + + FORMAT_KEY(key_, key); + value = get_nth_value(idx, key); + retval = value && is_int(value) ? atoi(value) : 0; + free(key); + return retval; +} + +/* Returns the value numbered 'idx' of 'key', converted to a boolean value. + * Returns 0 if 'idx' is greater than or equal to cfg_count(key) or if the + * value 'idx' of 'key' is not a valid boolean. */ +bool +cfg_get_bool(int idx, const char *key_, ...) +{ + const char *value; + bool retval; + char *key; + + FORMAT_KEY(key_, key); + value = get_nth_value(idx, key); + retval = value && is_bool(value) ? !strcmp(value, "true") : false; + free(key); + return retval; +} + +/* Returns the value numbered 'idx' of 'key', converted to an IP address in + * network byte order. Returns 0 if 'idx' is greater than or equal to + * cfg_count(key) or if the value 'idx' of 'key' is not a valid IP address (as + * determined by inet_aton()). */ +uint32_t +cfg_get_ip(int idx, const char *key_, ...) +{ + struct in_addr addr; + const char *value; + char *key; + + FORMAT_KEY(key_, key); + value = get_nth_value(idx, key); + if (!value || !inet_aton(value, &addr)) { + addr.s_addr = htonl(0); + } + free(key); + return addr.s_addr; +} + +/* Returns the value numbered 'idx' of 'key', converted to an MAC address in + * host byte order. Returns 0 if 'idx' is greater than or equal to + * cfg_count(key) or if the value 'idx' of 'key' is not a valid MAC address in + * the format "##:##:##:##:##:##". */ +uint64_t +cfg_get_mac(int idx, const char *key_, ...) +{ + uint8_t mac[ETH_ADDR_LEN]; + const char *value; + char *key; + + FORMAT_KEY(key_, key); + value = get_nth_value(idx, key); + if (!value || !parse_mac(value, mac)) { + memset(mac, 0, sizeof mac); + } + free(key); + return eth_addr_to_uint64(mac); +} + +/* Initializes 'svec' with all of the string values of 'key'. */ +void +cfg_get_all_strings(struct svec *svec, const char *key_, ...) +{ + char **p, **q; + char *key; + + FORMAT_KEY(key_, key); + svec_init(svec); + for (p = find_key_le(key), q = find_key_ge(key); p < q; p++) { + svec_add(svec, extract_value(*p)); + } + free(key); +} + +/* Initializes 'svec' with all of the values of 'key' that are valid keys. + * Values of 'key' that are not valid keys are omitted. */ +void +cfg_get_all_keys(struct svec *svec, const char *key_, ...) +{ + char **p, **q; + char *key; + + FORMAT_KEY(key_, key); + svec_init(svec); + for (p = find_key_le(key), q = find_key_ge(key); p < q; p++) { + const char *value = extract_value(*p); + if (is_key(value)) { + svec_add(svec, value); + } + } + free(key); +} + +#define CC_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define CC_DIGIT "0123456789" +#define CC_ALNUM CC_ALPHA CC_DIGIT +#define CC_SPACE " \t\r\n\v" + +#define CC_FILE_NAME CC_ALNUM "._-" +#define CC_KEY CC_ALNUM "._-@$:+" + +static void +read_directory(const char *dir_name) +{ + DIR *dir; + struct dirent *de; + + dir = opendir(dir_name); + if (!dir) { + VLOG_ERR("failed to open \"%s\" as a directory: %s", + dir_name, strerror(errno)); + return; + } + + while ((de = readdir(dir)) != NULL) { + const char *name = de->d_name; + if (name[0] != '.' && name[strspn(name, CC_FILE_NAME)] == '\0') { + char *file_name = xasprintf("%s/%s", dir_name, name); + VLOG_DBG("reading %s in %s", name, dir_name); + read_file(file_name); + free(file_name); + } else { + VLOG_DBG("ignoring %s in %s", name, dir_name); + } + } + closedir(dir); +} + +static bool +has_double_dot(const char *key, size_t len) +{ + if (len >= 2) { + size_t i; + + for (i = 0; i < len - 1; i++) { + if (key[i] == '.' && key[i + 1] == '.') { + return true; + } + } + } + return false; +} + +static bool +is_valid_key(const char *key, size_t len, + const char *file_name, int line_number, const char *id) +{ + if (!len) { + VLOG_ERR("%s:%d: missing %s name", file_name, line_number, id); + return false; + } else if (key[0] == '.') { + VLOG_ERR("%s:%d: %s name \"%.*s\" begins with invalid character '.'", + file_name, line_number, id, (int) len, key); + return false; + } else if (key[len - 1] == '.') { + VLOG_ERR("%s:%d: %s name \"%.*s\" ends with invalid character '.'", + file_name, line_number, id, (int) len, key); + return false; + } else if (has_double_dot(key, len)) { + VLOG_ERR("%s:%d: %s name \"%.*s\" contains '..', which is not allowed", + file_name, line_number, id, (int) len, key); + return false; + } else { + return true; + } +} + +static char * +parse_section(const char *file_name, int line_number, const char *s) +{ + struct ds section; + size_t len; + + ds_init(§ion); + + /* Skip [ and any white space. */ + s++; + s += strspn(s, CC_SPACE); + + /* Obtain the section name. */ + len = strspn(s, CC_KEY); + if (!is_valid_key(s, len, file_name, line_number, "section")) { + goto error; + } + ds_put_buffer(§ion, s, len); + s += len; + + /* Obtain the subsection name, if any. */ + s += strspn(s, CC_SPACE); + if (*s == '"') { + s++; + len = strspn(s, CC_KEY); + if (!is_valid_key(s, len, file_name, line_number, "subsection")) { + goto error; + } + ds_put_char(§ion, '.'); + ds_put_buffer(§ion, s, len); + s += len; + if (*s != '"') { + VLOG_ERR("%s:%d: missing '\"' following subsection name", + file_name, line_number); + goto error; + } + s++; + s += strspn(s, CC_SPACE); + } + + /* Check for ]. */ + if (*s != ']') { + VLOG_ERR("%s:%d: missing ']' following section name", + file_name, line_number); + goto error; + } + s++; + s += strspn(s, CC_SPACE); + if (*s != '\0') { + VLOG_ERR("%s:%d: trailing garbage following ']'", + file_name, line_number); + goto error; + } + + return ds_cstr(§ion); + +error: + ds_destroy(§ion); + return NULL; +} + +static void +parse_setting(const char *file_name, int line_number, const char *section, + const char *s) +{ + struct ds key = DS_EMPTY_INITIALIZER; + struct ds value = DS_EMPTY_INITIALIZER; + size_t len; + + if (section) { + ds_put_format(&key, "%s.", section); + } + + /* Obtain the key. */ + len = strspn(s, CC_KEY); + if (!len) { + VLOG_ERR("%s:%d: missing key name", file_name, line_number); + goto done; + } + if (!is_valid_key(s, len, file_name, line_number, "key")) { + goto done; + } + ds_put_buffer(&key, s, len); + s += len; + + /* Skip the '='. */ + s += strspn(s, CC_SPACE); + if (*s != '=') { + VLOG_ERR("%s:%d: missing '=' following key", file_name, line_number); + goto done; + } + s++; + s += strspn(s, CC_SPACE); + + /* Obtain the value. */ + ds_put_cstr(&value, s); + while (value.length > 0 && strchr(CC_SPACE, ds_last(&value))) { + value.length--; + } + + /* Add the setting. */ + svec_add_nocopy(&cfg, xasprintf("%s=%s", ds_cstr(&key), ds_cstr(&value))); + +done: + ds_destroy(&key); + ds_destroy(&value); +} + +static void +read_file(const char *file_name) +{ + struct ds ds; + FILE *file; + char *section; + int line_number; + + /* XXX should record st_dev, st_ino and make sure that we don't read the + * same file twice, otherwise all the pairs from that file will get + * doubled. */ + + file = fopen(file_name, "r"); + if (!file) { + VLOG_ERR("failed to open \"%s\": %s", file_name, strerror(errno)); + return; + } + + ds_init(&ds); + section = NULL; + line_number = 0; + while (!ds_get_line(&ds, file)) { + const char *s = ds_cstr(&ds); + size_t indent = strspn(s, CC_SPACE); + + line_number++; + s += indent; + if (*s == '#' || *s == '\0') { + /* Ignore comments and lines that contain only white space. */ + } else if (*s == '[') { + if (!indent) { + free(section); + section = parse_section(file_name, line_number, s); + } else { + VLOG_ERR("%s:%d: ignoring indented section header", + file_name, line_number); + } + } else if (indent && !section) { + VLOG_ERR("%s:%d: ignoring indented line outside any section", + file_name, line_number); + } else { + if (!indent) { + free(section); + section = NULL; + } + parse_setting(file_name, line_number, section, s); + } + } + ds_destroy(&ds); + + svec_sort(&cfg); + svec_terminate(&cfg); +} + +static int +compare_key(const char *a, const char *b) +{ + for (;;) { + int ac = *a == '=' ? '\0' : *a; + int bc = *b == '=' ? '\0' : *b; + if (ac != bc) { + return ac < bc ? -1 : 1; + } else if (!ac) { + return 0; + } + a++; + b++; + } +} + +/* Returns the address of the greatest configuration string with a key less + * than or equal to 'key'. Returns the address of the null terminator if all + * configuration strings are greater than 'key'. */ +static char ** +find_key_le(const char *key) +{ + int low = 0; + int len = cfg.n; + while (len > 0) { + int half = len >> 1; + int middle = low + half; + if (compare_key(cfg.names[middle], key) < 0) { + low = middle + 1; + len -= half + 1; + } else { + len = half; + } + } + return &cfg.names[low]; +} + +/* Returns the address of the least configuration string with a key greater + * than or equal to 'key'. Returns the address of the null terminator if all + * configuration strings are less than 'key'. */ +static char ** +find_key_ge(const char *key) +{ + int low = 0; + int len = cfg.n; + while (len > 0) { + int half = len >> 1; + int middle = low + half; + if (compare_key(cfg.names[middle], key) > 0) { + len = half; + } else { + low = middle + 1; + len -= half + 1; + } + } + return &cfg.names[low]; +} + +static char * +find_key(const char *key) +{ + char **p = find_key_le(key); + return p < &cfg.names[cfg.n] && !compare_key(*p, key) ? *p : NULL; +} + +static bool +parse_mac(const char *s, uint8_t mac[6]) +{ + return sscanf(s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8, + &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6; +} + +static bool +is_key(const char *s) +{ + return *s && s[strspn(s, CC_KEY)] == '\0'; +} + +static bool +is_int(const char *s) +{ + return *s && s[strspn(s, CC_DIGIT)] == '\0'; +} + +static bool +is_bool(const char *s) +{ + return !strcmp(s, "true") || !strcmp(s, "false"); +} + +static const char * +extract_value(const char *key) +{ + const char *p = strchr(key, '='); + return p ? p + 1 : NULL; +} + +static const char * +get_nth_value(int idx, const char *key) +{ + char **p = find_key_le(key); + char **q = find_key_ge(key); + return idx < q - p ? extract_value(key) : NULL; +} + +#if 0 +static bool +is_type(const char *s, enum cfg_flags flags) +{ + uint8_t mac[ETH_ADDR_LEN]; + struct in_addr addr; + + return (flags & CFG_STRING + || (flags & CFG_KEY && is_key(s)) + || (flags & CFG_INT && is_int(s)) + || (flags & CFG_BOOL && is_bool(s)) + || (flags & CFG_IP && inet_aton(s, &addr)) + || (flags & CFG_MAC && parse_mac(s, mac))); +} +#endif diff --git a/vswitchd/cfg.h b/vswitchd/cfg.h new file mode 100644 index 00000000..cb0953e5 --- /dev/null +++ b/vswitchd/cfg.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef VSWITCHD_CFG_H +#define VSWITCHD_CFG_H 1 + +#include +#include +#include "compiler.h" + +struct svec; + +int cfg_add_file(const char *file_name); +void cfg_read(void); + +void cfg_get_subsections(struct svec *, const char *, ...) PRINTF_FORMAT(2, 3); + +enum cfg_flags { + /* Types allowed. */ + CFG_STRING = 1 << 0, /* Arbitrary content. */ + CFG_KEY = 1 << 0, /* Valid key name. */ + CFG_INT = 1 << 2, /* Integer value. */ + CFG_BOOL = 1 << 3, /* Boolean. */ + CFG_IP = 1 << 4, /* IPv4 address. */ + CFG_MAC = 1 << 5, /* MAC address. */ + + /* Number allowed. */ + CFG_REQUIRED = 1 << 6, /* At least one value allowed. */ + CFG_MULTIPLE = 1 << 7 /* More than one value allowed. */ +}; +void cfg_register(const char *key_spec, enum cfg_flags); + +bool cfg_has(const char *key, ...) PRINTF_FORMAT(1, 2); +bool cfg_has_section(const char *key, ...) PRINTF_FORMAT(1, 2); +int cfg_count(const char *key, ...) PRINTF_FORMAT(1, 2); + +const char *cfg_get_string(int idx, const char *key, ...) PRINTF_FORMAT(2, 3); +const char *cfg_get_key(int idx, const char *key, ...) PRINTF_FORMAT(2, 3); +int cfg_get_int(int idx, const char *key, ...) PRINTF_FORMAT(2, 3); +bool cfg_get_bool(int idx, const char *key, ...) PRINTF_FORMAT(2, 3); +uint32_t cfg_get_ip(int idx, const char *key, ...) PRINTF_FORMAT(2, 3); +uint64_t cfg_get_mac(int idx, const char *key, ...) PRINTF_FORMAT(2, 3); + +void cfg_get_all_strings(struct svec *, const char *key, ...) + PRINTF_FORMAT(2, 3); +void cfg_get_all_keys(struct svec *, const char *key, ...) PRINTF_FORMAT(2, 3); + +#endif /* vswitchd/conf.h */ diff --git a/vswitchd/vswitchd.8.in b/vswitchd/vswitchd.8.in new file mode 100644 index 00000000..e69de29b diff --git a/vswitchd/vswitchd.c b/vswitchd/vswitchd.c new file mode 100644 index 00000000..36925158 --- /dev/null +++ b/vswitchd/vswitchd.c @@ -0,0 +1,204 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "bridge.h" +#include "cfg.h" +#include "command-line.h" +#include "compiler.h" +#include "daemon.h" +#include "fault.h" +#include "poll-loop.h" +#include "process.h" +#include "signals.h" +#include "timeval.h" +#include "util.h" +#include "vconn-ssl.h" +#include "vconn.h" +#include "vlog-socket.h" + +#include "vlog.h" +#define THIS_MODULE VLM_vswitchd + +static void parse_options(int argc, char *argv[]); +static void usage(void) NO_RETURN; + +static void reconfigure(void); + +int +main(int argc, char *argv[]) +{ + struct signal *sighup; + int retval; + + set_program_name(argv[0]); + register_fault_handlers(); + time_init(); + vlog_init(); + parse_options(argc, argv); + signal(SIGPIPE, SIG_IGN); + sighup = signal_register(SIGHUP); + process_init(); + + die_if_already_running(); + daemonize(); + + retval = vlog_server_listen(NULL, NULL); + if (retval) { + ofp_fatal(retval, "could not listen for vlog connections"); + } + + bridge_init(); + reconfigure(); + + for (;;) { + if (signal_poll(sighup)) { + reconfigure(); + } + bridge_run(); + + signal_wait(sighup); + bridge_wait(); + poll_block(); + } + + return 0; +} + +static void +reconfigure(void) +{ + cfg_read(); + bridge_reconfigure(); +} + +static void +parse_options(int argc, char *argv[]) +{ + enum { + OPT_PEER_CA_CERT, + VLOG_OPTION_ENUMS + }; + static struct option long_options[] = { + {"config", required_argument, 0, 'F'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + DAEMON_LONG_OPTIONS, + VLOG_LONG_OPTIONS, +#ifdef HAVE_OPENSSL + VCONN_SSL_LONG_OPTIONS + {"peer-ca-cert", required_argument, 0, OPT_PEER_CA_CERT}, +#endif + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + bool configured = false; + + for (;;) { + int error; + int c; + + c = getopt_long(argc, argv, short_options, long_options, NULL); + if (c == -1) { + break; + } + + switch (c) { + case 'F': + configured = true; + error = cfg_add_file(optarg); + if (error) { + ofp_fatal(error, "failed to add configuration file or " + "directory \"%s\"", optarg); + } + break; + + case 'H': + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + VLOG_OPTION_HANDLERS + DAEMON_OPTION_HANDLERS + VCONN_SSL_OPTION_HANDLERS + +#ifdef HAVE_OPENSSL + case OPT_PEER_CA_CERT: + vconn_ssl_set_peer_ca_cert_file(optarg); + break; +#endif + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); + + if (!configured) { + ofp_fatal(0, "at least one -F or --config option is required"); + } + if (optind < argc) { + ofp_fatal(0, "non-option arguments not accepted; use --help for help"); + } +} + +static void +usage(void) +{ + printf("%s: virtual switch daemon\n" + "usage: %s [OPTIONS]\n", + program_name, program_name); + printf("\nConfiguration options (must specify at least one):\n" + " -F, --config=FILE|DIR reads configuration from FILE or DIR\n"); + daemon_usage(); + vlog_usage(); + printf("\nOther options:\n" + " -h, --help display this help message\n" + " -V, --version display version information\n"); + exit(EXIT_SUCCESS); +}