X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fovs-brcompatd.c;h=df9332f3aaf38bc6e73b4f293392b4eed93864c2;hb=2702241699d0539d6ff4a2d44c5bc0ab9d7f7fc8;hp=93d9469bda82836a6bcddda132de540c0fe9a984;hpb=064af42167bf4fc9aaea2702d80ce08074b889c0;p=openvswitch diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 93d9469b..df9332f3 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -1,22 +1,21 @@ -/* Copyright (c) 2008, 2009 Nicira Networks - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ #include +#include #include #include #include @@ -29,67 +28,55 @@ #include #include #include +#include #include +#include #include #include -#include "cfg.h" #include "command-line.h" #include "coverage.h" #include "daemon.h" #include "dirs.h" -#include "dpif.h" +#include "dynamic-string.h" #include "fatal-signal.h" -#include "fault.h" +#include "json.h" #include "leak-checker.h" #include "netdev.h" #include "netlink.h" +#include "netlink-notifier.h" +#include "netlink-socket.h" #include "ofpbuf.h" #include "openvswitch/brcompat-netlink.h" +#include "packets.h" #include "poll-loop.h" #include "process.h" +#include "rtnetlink-link.h" #include "signals.h" +#include "sset.h" #include "svec.h" #include "timeval.h" #include "unixctl.h" #include "util.h" - #include "vlog.h" -#define THIS_MODULE VLM_brcompatd +VLOG_DEFINE_THIS_MODULE(brcompatd); /* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ -/* Actions to modify bridge compatibility configuration. */ -enum bmc_action { - BMC_ADD_DP, - BMC_DEL_DP, - BMC_ADD_PORT, - BMC_DEL_PORT -}; - static void parse_options(int argc, char *argv[]); static void usage(void) NO_RETURN; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); -/* Maximum number of milliseconds to wait for the config file to be - * unlocked. If set to zero, no waiting will occur. */ -static int lock_timeout = 500; - -/* Maximum number of milliseconds to wait before pruning port entries that - * no longer exist. If set to zero, ports are never pruned. */ -static int prune_timeout = 5000; +/* --appctl: Absolute path to ovs-appctl. */ +static char *appctl_program; -/* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */ -static char *config_file; +/* --vsctl: Absolute path to ovs-vsctl. */ +static char *vsctl_program; -/* Command to run (via system()) to reload the ovs-vswitchd configuration - * file. */ -static char *reload_command; - -/* Netlink socket to listen for interface changes. */ -static struct nl_sock *rtnl_sock; +/* Options that we should generally pass to ovs-vsctl. */ +#define VSCTL_OPTIONS "--timeout=5", "-vconsole:warn" /* Netlink socket to bridge compatibility kernel module. */ static struct nl_sock *brc_sock; @@ -101,10 +88,90 @@ static const struct nl_policy brc_multicast_policy[] = { [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 } }; -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, - [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, -}; +static char * +capture_vsctl_valist(const char *arg0, va_list args) +{ + char *stdout_log, *stderr_log; + enum vlog_level log_level; + struct svec argv; + int status; + char *msg; + + /* Compose arguments. */ + svec_init(&argv); + svec_add(&argv, arg0); + for (;;) { + const char *arg = va_arg(args, const char *); + if (!arg) { + break; + } + svec_add(&argv, arg); + } + svec_terminate(&argv); + + /* Run process. */ + if (process_run_capture(argv.names, &stdout_log, &stderr_log, SIZE_MAX, + &status)) { + svec_destroy(&argv); + return NULL; + } + + /* Log results. */ + if (WIFEXITED(status)) { + int code = WEXITSTATUS(status); + log_level = code == 0 ? VLL_DBG : code == 1 ? VLL_WARN : VLL_ERR; + } else { + log_level = VLL_ERR; + } + msg = process_status_msg(status); + VLOG(log_level, "ovs-vsctl exited (%s)", msg); + if (stdout_log && *stdout_log) { + VLOG(log_level, "ovs-vsctl wrote to stdout:\n%s\n", stdout_log); + } + if (stderr_log && *stderr_log) { + VLOG(log_level, "ovs-vsctl wrote to stderr:\n%s\n", stderr_log); + } + free(msg); + + svec_destroy(&argv); + + free(stderr_log); + if (WIFEXITED(status) && !WEXITSTATUS(status)) { + return stdout_log; + } else { + free(stdout_log); + return NULL; + } +} + +static char * SENTINEL(0) +capture_vsctl(const char *arg0, ...) +{ + char *stdout_log; + va_list args; + + va_start(args, arg0); + stdout_log = capture_vsctl_valist(arg0, args); + va_end(args); + + return stdout_log; +} + +static bool SENTINEL(0) +run_vsctl(const char *arg0, ...) +{ + char *stdout_log; + va_list args; + bool ok; + + va_start(args, arg0); + stdout_log = capture_vsctl_valist(arg0, args); + va_end(args); + + ok = stdout_log != NULL; + free(stdout_log); + return ok; +} static int lookup_brc_multicast_group(int *multicast_group) @@ -114,12 +181,12 @@ lookup_brc_multicast_group(int *multicast_group) struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)]; int retval; - retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + retval = nl_sock_create(NETLINK_GENERIC, &sock); if (retval) { return retval; } ofpbuf_init(&request, 0); - nl_msg_put_genlmsghdr(&request, sock, 0, brc_family, + nl_msg_put_genlmsghdr(&request, 0, brc_family, NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1); retval = nl_sock_transact(sock, &request, &reply); ofpbuf_uninit(&request); @@ -159,500 +226,621 @@ brc_open(struct nl_sock **sock) return retval; } - retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock); + retval = nl_sock_create(NETLINK_GENERIC, sock); if (retval) { return retval; } - return 0; + retval = nl_sock_join_mcgroup(*sock, multicast_group); + if (retval) { + nl_sock_destroy(*sock); + *sock = NULL; + } + return retval; } static const struct nl_policy brc_dp_policy[] = { [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, }; -static bool -bridge_exists(const char *name) -{ - return cfg_has_section("bridge.%s", name); -} - static int -rewrite_and_reload_config(void) +parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, + const char **port_name, uint64_t *count, uint64_t *skip) { - if (cfg_is_dirty()) { - int error1 = cfg_write(); - int error2 = cfg_read(); - long long int reload_start = time_msec(); - int error3 = system(reload_command); - long long int elapsed = time_msec() - reload_start; - COVERAGE_INC(brcompatd_reload); - if (elapsed > 0) { - VLOG_INFO("reload command executed in %lld ms", elapsed); - } - if (error3 == -1) { - VLOG_ERR("failed to execute reload command: %s", strerror(errno)); - } else if (error3 != 0) { - char *msg = process_status_msg(error3); - VLOG_ERR("reload command exited with error (%s)", msg); - free(msg); - } - return error1 ? error1 : error2 ? error2 : error3 ? ECHILD : 0; + static const struct nl_policy policy[] = { + [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING, .optional = true }, + [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true }, + [BRC_GENL_A_FDB_COUNT] = { .type = NL_A_U64, .optional = true }, + [BRC_GENL_A_FDB_SKIP] = { .type = NL_A_U64, .optional = true }, + }; + struct nlattr *attrs[ARRAY_SIZE(policy)]; + + if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy, + attrs, ARRAY_SIZE(policy)) + || (br_name && !attrs[BRC_GENL_A_DP_NAME]) + || (port_name && !attrs[BRC_GENL_A_PORT_NAME]) + || (count && !attrs[BRC_GENL_A_FDB_COUNT]) + || (skip && !attrs[BRC_GENL_A_FDB_SKIP])) { + return EINVAL; + } + + *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq; + if (br_name) { + *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + } + if (port_name) { + *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); + } + if (count) { + *count = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_COUNT]); + } + if (skip) { + *skip = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_SKIP]); } return 0; } -/* Go through the configuration file and remove any ports that no longer - * exist associated with a bridge. */ -static void -prune_ports(void) +/* Composes and returns a reply to a request made by the datapath with error + * code 'error'. The caller may add additional attributes to the message, then + * it may send it with send_reply(). */ +static struct ofpbuf * +compose_reply(int error) { - int i, j; - int error; - struct svec bridges, delete; + struct ofpbuf *reply = ofpbuf_new(4096); + nl_msg_put_genlmsghdr(reply, 32, brc_family, NLM_F_REQUEST, + BRC_GENL_C_DP_RESULT, 1); + nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error); + return reply; +} - if (cfg_lock(NULL, 0)) { - /* Couldn't lock config file. */ - return; +/* Sends 'reply' to the datapath, using sequence number 'nlmsg_seq', and frees + * it. */ +static void +send_reply(struct ofpbuf *reply, uint32_t nlmsg_seq) +{ + int retval = nl_sock_send_seq(brc_sock, reply, nlmsg_seq, false); + if (retval) { + VLOG_WARN_RL(&rl, "replying to brcompat request: %s", + strerror(retval)); } + ofpbuf_delete(reply); +} - svec_init(&bridges); - svec_init(&delete); - cfg_get_subsections(&bridges, "bridge"); - for (i=0; i, but + * the 'port_hi' member was only introduced in Linux 2.6.26 and so systems + * with old header files won't have it. */ + struct __fdb_entry { + __u8 mac_addr[6]; + __u8 port_no; + __u8 is_local; + __u32 ageing_timer_value; + __u8 port_hi; + __u8 pad0; + __u16 unused; }; - struct nlattr *attrs[ARRAY_SIZE(policy)]; - if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy, - attrs, ARRAY_SIZE(policy)) - || (port_name && !attrs[BRC_GENL_A_PORT_NAME])) { - return EINVAL; + struct mac { + uint8_t addr[6]; + }; + struct mac *local_macs; + int n_local_macs; + int i; + + /* Impedance matching between the vswitchd and Linux kernel notions of what + * a bridge is. The kernel only handles a single VLAN per bridge, but + * vswitchd can deal with all the VLANs on a single bridge. We have to + * pretend that the former is the case even though the latter is the + * implementation. */ + const char *linux_name; /* Name used by brctl. */ + int br_vlan; /* VLAN tag. */ + struct sset ifaces; + + struct ofpbuf query_data; + const char *iface_name; + struct ofpbuf *reply; + uint64_t count, skip; + char *br_name; + char *output; + char *save_ptr; + uint32_t seq; + int error; + + /* Parse the command received from brcompat. */ + error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip); + if (error) { + return error; } - *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq; - *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); - if (port_name) { - *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); + /* Figure out vswitchd bridge and VLAN. */ + br_name = linux_bridge_to_ovs_bridge(linux_name, &br_vlan); + if (!br_name) { + error = EINVAL; + send_simple_reply(seq, error); + return error; } + + /* Fetch the forwarding database using ovs-appctl. */ + output = capture_vsctl(appctl_program, "fdb/show", br_name, + (char *) NULL); + if (!output) { + error = ECHILD; + send_simple_reply(seq, error); + return error; + } + + /* Fetch the MAC address for each interface on the bridge, so that we can + * fill in the is_local field in the response. */ + sset_init(&ifaces); + get_bridge_ifaces(linux_name, &ifaces); + local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs); + n_local_macs = 0; + SSET_FOR_EACH (iface_name, &ifaces) { + struct mac *mac = &local_macs[n_local_macs]; + struct netdev *netdev; + + error = netdev_open(iface_name, "system", &netdev); + if (!error) { + if (!netdev_get_etheraddr(netdev, mac->addr)) { + n_local_macs++; + } + netdev_close(netdev); + } + } + sset_destroy(&ifaces); + + /* Parse the response from ovs-appctl and convert it to binary format to + * pass back to the kernel. */ + ofpbuf_init(&query_data, sizeof(struct __fdb_entry) * 8); + save_ptr = NULL; + strtok_r(output, "\n", &save_ptr); /* Skip header line. */ + while (count > 0) { + struct __fdb_entry *entry; + int port, vlan, age; + uint8_t mac[ETH_ADDR_LEN]; + char *line; + bool is_local; + + line = strtok_r(NULL, "\n", &save_ptr); + if (!line) { + break; + } + + if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d", + &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age) + != 2 + ETH_ADDR_SCAN_COUNT + 1) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line); + continue; + } + + if (vlan != br_vlan) { + continue; + } + + if (skip > 0) { + skip--; + continue; + } + + /* Is this the MAC address of an interface on the bridge? */ + is_local = false; + for (i = 0; i < n_local_macs; i++) { + if (eth_addr_equals(local_macs[i].addr, mac)) { + is_local = true; + break; + } + } + + entry = ofpbuf_put_uninit(&query_data, sizeof *entry); + memcpy(entry->mac_addr, mac, ETH_ADDR_LEN); + entry->port_no = port & 0xff; + entry->is_local = is_local; + entry->ageing_timer_value = age * HZ; + entry->port_hi = (port & 0xff00) >> 8; + entry->pad0 = 0; + entry->unused = 0; + count--; + } + free(output); + + /* Compose and send reply to datapath. */ + reply = compose_reply(0); + nl_msg_put_unspec(reply, BRC_GENL_A_FDB_DATA, + query_data.data, query_data.size); + send_reply(reply, seq); + + /* Free memory. */ + ofpbuf_uninit(&query_data); + free(local_macs); + return 0; } static void -send_reply(uint32_t seq, int error) +send_ifindex_reply(uint32_t seq, char *output) { - struct ofpbuf msg; - int retval; - - /* Compose reply. */ - ofpbuf_init(&msg, 0); - nl_msg_put_genlmsghdr(&msg, brc_sock, 32, brc_family, NLM_F_REQUEST, - BRC_GENL_C_DP_RESULT, 1); - ((struct nlmsghdr *) msg.data)->nlmsg_seq = seq; - nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error); + size_t allocated_indices; + char *save_ptr = NULL; + struct ofpbuf *reply; + const char *iface; + size_t n_indices; + int *indices; + + indices = NULL; + n_indices = allocated_indices = 0; + for (iface = strtok_r(output, " \t\r\n", &save_ptr); iface; + iface = strtok_r(NULL, " \t\r\n", &save_ptr)) { + int ifindex; + + if (n_indices >= allocated_indices) { + indices = x2nrealloc(indices, &allocated_indices, sizeof *indices); + } - /* Send reply. */ - retval = nl_sock_send(brc_sock, &msg, false); - if (retval) { - VLOG_WARN_RL(&rl, "replying to brcompat request: %s", - strerror(retval)); + ifindex = if_nametoindex(iface); + if (ifindex) { + indices[n_indices++] = ifindex; + } } - ofpbuf_uninit(&msg); + + /* Compose and send reply. */ + reply = compose_reply(0); + nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES, + indices, n_indices * sizeof *indices); + send_reply(reply, seq); + + /* Free memory. */ + free(indices); } static int -handle_bridge_cmd(struct ofpbuf *buffer, bool add) +handle_get_bridges_cmd(struct ofpbuf *buffer) { - const char *br_name; + char *output; uint32_t seq; int error; - error = parse_command(buffer, &seq, &br_name, NULL); - if (!error) { - error = add ? add_bridge(br_name) : del_bridge(br_name); - if (!error) { - error = rewrite_and_reload_config(); - } - send_reply(seq, error); + /* Parse Netlink command. + * + * The command doesn't actually have any arguments, but we need the + * sequence number to send the reply. */ + error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL); + if (error) { + return error; } - return error; -} -static const struct nl_policy brc_port_policy[] = { - [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, - [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING }, -}; + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-br", (char *) NULL); + if (!output) { + return ENODEV; + } -static void -del_port(const char *br_name, const char *port_name) -{ - cfg_del_entry("bridge.%s.port=%s", br_name, port_name); - cfg_del_match("bonding.*.slave=%s", port_name); - cfg_del_match("vlan.%s.*", port_name); + send_ifindex_reply(seq, output); + free(output); + return 0; } static int -handle_port_cmd(struct ofpbuf *buffer, bool add) +handle_get_ports_cmd(struct ofpbuf *buffer) { - const char *cmd_name = add ? "add-if" : "del-if"; - const char *br_name, *port_name; + const char *linux_name; uint32_t seq; + char *output; int error; - error = parse_command(buffer, &seq, &br_name, &port_name); - if (!error) { - if (!bridge_exists(br_name)) { - VLOG_WARN("%s %s %s: no bridge named %s", - cmd_name, br_name, port_name, br_name); - error = EINVAL; - } else if (!netdev_exists(port_name)) { - VLOG_WARN("%s %s %s: no network device named %s", - cmd_name, br_name, port_name, port_name); - error = EINVAL; - } else { - if (add) { - cfg_add_entry("bridge.%s.port=%s", br_name, port_name); - } else { - del_port(br_name, port_name); - } - VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name); - error = rewrite_and_reload_config(); - } - send_reply(seq, error); + /* Parse Netlink command. */ + error = parse_command(buffer, &seq, &linux_name, NULL, NULL, NULL); + if (error) { + return error; } - return error; + output = capture_vsctl(vsctl_program, VSCTL_OPTIONS, "list-ports", linux_name, + (char *) NULL); + if (!output) { + return ENODEV; + } + + send_ifindex_reply(seq, output); + free(output); + return 0; } -static int -brc_recv_update(void) +static bool +brc_recv_update__(struct ofpbuf *buffer) { - int retval; - struct ofpbuf *buffer; - struct genlmsghdr *genlmsghdr; + for (;;) { + int retval = nl_sock_recv(brc_sock, buffer, false); + switch (retval) { + case 0: + if (nl_msg_nlmsgerr(buffer, NULL) + || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE) { + break; + } + return true; + case ENOBUFS: + break; - buffer = NULL; - do { - ofpbuf_delete(buffer); - retval = nl_sock_recv(brc_sock, &buffer, false); - } while (retval == ENOBUFS - || (!retval - && (nl_msg_nlmsgerr(buffer, NULL) - || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE))); - if (retval) { - if (retval != EAGAIN) { + case EAGAIN: + return false; + + default: VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); + return false; } - return retval; } +} - genlmsghdr = nl_msg_genlmsghdr(buffer); +static void +brc_recv_update(void) +{ + struct genlmsghdr *genlmsghdr; + uint64_t buffer_stub[1024 / 8]; + struct ofpbuf buffer; + + ofpbuf_use_stub(&buffer, buffer_stub, sizeof buffer_stub); + if (!brc_recv_update__(&buffer)) { + goto error; + } + + genlmsghdr = nl_msg_genlmsghdr(&buffer); if (!genlmsghdr) { VLOG_WARN_RL(&rl, "received packet too short for generic NetLink"); goto error; } - if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) { + if (nl_msg_nlmsghdr(&buffer)->nlmsg_type != brc_family) { VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)", - nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family); + nl_msg_nlmsghdr(&buffer)->nlmsg_type, brc_family); goto error; } - if (cfg_lock(NULL, lock_timeout)) { - /* Couldn't lock config file. */ - retval = EAGAIN; - goto error; - } + /* Service all pending network device notifications before executing the + * command. This is very important to avoid a race in a scenario like the + * following, which is what happens with XenServer Tools version 5.0.0 + * during boot of a Windows VM: + * + * 1. Create tap1.0 and vif1.0. + * 2. Delete tap1.0. + * 3. Delete vif1.0. + * 4. Re-create vif1.0. + * + * We must process the network device notification from step 3 before we + * process the brctl command from step 4. If we process them in the + * reverse order, then step 4 completes as a no-op but step 3 then deletes + * the port that was just added. + * + * (XenServer Tools 5.5.0 does not exhibit this behavior, and neither does + * a VM without Tools installed at all.) + */ + rtnetlink_link_run(); switch (genlmsghdr->cmd) { case BRC_GENL_C_DP_ADD: - retval = handle_bridge_cmd(buffer, true); + handle_bridge_cmd(&buffer, true); break; case BRC_GENL_C_DP_DEL: - retval = handle_bridge_cmd(buffer, false); + handle_bridge_cmd(&buffer, false); break; case BRC_GENL_C_PORT_ADD: - retval = handle_port_cmd(buffer, true); + handle_port_cmd(&buffer, true); break; case BRC_GENL_C_PORT_DEL: - retval = handle_port_cmd(buffer, false); + handle_port_cmd(&buffer, false); + break; + + case BRC_GENL_C_FDB_QUERY: + handle_fdb_query_cmd(&buffer); + break; + + case BRC_GENL_C_GET_BRIDGES: + handle_get_bridges_cmd(&buffer); + break; + + case BRC_GENL_C_GET_PORTS: + handle_get_ports_cmd(&buffer); break; default: - retval = EPROTO; + VLOG_WARN_RL(&rl, "received unknown brc netlink command: %d\n", + genlmsghdr->cmd); + break; } - cfg_unlock(); - error: - ofpbuf_delete(buffer); - return retval; + ofpbuf_uninit(&buffer); } -/* Check for interface configuration changes announced through RTNL. */ static void -rtnl_recv_update(void) +netdev_changed_cb(const struct rtnetlink_link_change *change, + void *aux OVS_UNUSED) { - struct ofpbuf *buf; + char br_name[IFNAMSIZ]; + const char *port_name; - int error = nl_sock_recv(rtnl_sock, &buf, false); - if (error == EAGAIN) { - /* Nothing to do. */ - } else if (error == ENOBUFS) { + if (!change) { VLOG_WARN_RL(&rl, "network monitor socket overflowed"); - } else if (error) { - VLOG_WARN_RL(&rl, "error on network monitor socket: %s", - strerror(error)); - } else { - struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; - struct nlmsghdr *nlh; - struct ifinfomsg *iim; - - nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); - iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim); - if (!iim) { - VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)"); - ofpbuf_delete(buf); - return; - } - - if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), - rtnlgrp_link_policy, - attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { - VLOG_WARN_RL(&rl,"received bad rtnl message (policy)"); - ofpbuf_delete(buf); - return; - } - if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) { - const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); - char br_name[IFNAMSIZ]; - uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); - struct svec ports; - - if (!if_indextoname(br_idx, br_name)) { - ofpbuf_delete(buf); - return; - } + return; + } - if (cfg_lock(NULL, lock_timeout)) { - /* Couldn't lock config file. */ - /* xxx this should try again and print error msg. */ - ofpbuf_delete(buf); - return; - } + if (change->nlmsg_type != RTM_DELLINK || !change->master_ifindex) { + return; + } - svec_init(&ports); - cfg_get_all_keys(&ports, "bridge.%s.port", br_name); - svec_sort(&ports); - if (svec_contains(&ports, port_name)) { - del_port(br_name, port_name); - rewrite_and_reload_config(); - } - cfg_unlock(); - } - ofpbuf_delete(buf); + port_name = change->ifname; + if (!if_indextoname(change->master_ifindex, br_name)) { + return; } + + VLOG_INFO("network device %s destroyed, removing from bridge %s", + port_name, br_name); + + run_vsctl(vsctl_program, VSCTL_OPTIONS, + "--", "--if-exists", "del-port", port_name, + "--", "comment", "ovs-brcompatd:", port_name, "disappeared", + (char *) NULL); } int main(int argc, char *argv[]) { + extern struct vlog_module VLM_reconnect; + struct nln_notifier *link_notifier; struct unixctl_server *unixctl; int retval; + proctitle_init(argc, argv); set_program_name(argv[0]); - register_fault_handlers(); - time_init(); - vlog_init(); + vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); + parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); process_init(); - die_if_already_running(); - daemonize(); + daemonize_start(); retval = unixctl_server_create(NULL, &unixctl); if (retval) { - ovs_fatal(retval, "could not listen for vlog connections"); + exit(EXIT_FAILURE); } if (brc_open(&brc_sock)) { - ovs_fatal(0, "could not open brcompat socket. Check " - "\"brcompat\" kernel module."); + VLOG_FATAL("could not open brcompat socket. Check " + "\"brcompat\" kernel module."); } - if (prune_timeout) { - if (nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &rtnl_sock)) { - ovs_fatal(0, "could not create rtnetlink socket"); - } - } + link_notifier = rtnetlink_link_notifier_create(netdev_changed_cb, NULL); - cfg_read(); + daemonize_complete(); for (;;) { unixctl_server_run(unixctl); + rtnetlink_link_run(); brc_recv_update(); - /* If 'prune_timeout' is non-zero, we actively prune from the - * config file any 'bridge..port' entries that are no - * longer valid. We use two methods: - * - * 1) The kernel explicitly notifies us of removed ports - * through the RTNL messages. - * - * 2) We periodically check all ports associated with bridges - * to see if they no longer exist. - */ - if (prune_timeout) { - rtnl_recv_update(); - prune_ports(); - - nl_sock_wait(rtnl_sock, POLLIN); - poll_timer_wait(prune_timeout); - } + netdev_run(); nl_sock_wait(brc_sock, POLLIN); unixctl_server_wait(unixctl); + rtnetlink_link_wait(); + netdev_wait(); poll_block(); } + rtnetlink_link_notifier_destroy(link_notifier); + return 0; } @@ -660,31 +848,26 @@ static void parse_options(int argc, char *argv[]) { enum { - OPT_LOCK_TIMEOUT = UCHAR_MAX + 1, - OPT_PRUNE_TIMEOUT, - OPT_RELOAD_COMMAND, + OPT_APPCTL, + OPT_VSCTL, VLOG_OPTION_ENUMS, - LEAK_CHECKER_OPTION_ENUMS + LEAK_CHECKER_OPTION_ENUMS, + DAEMON_OPTION_ENUMS }; static struct option long_options[] = { - {"help", no_argument, 0, 'h'}, - {"version", no_argument, 0, 'V'}, - {"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT}, - {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT}, - {"reload-command", required_argument, 0, OPT_RELOAD_COMMAND}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"appctl", required_argument, NULL, OPT_APPCTL}, + {"vsctl", required_argument, NULL, OPT_VSCTL}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, LEAK_CHECKER_LONG_OPTIONS, - {0, 0, 0, 0}, + {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); - int error; + const char *appctl = "ovs-appctl"; + const char *vsctl = "ovs-vsctl"; - reload_command = xasprintf("%s/ovs-appctl -t " - "%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl " - "-e vswitchd/reload 2>&1 " - "| /usr/bin/logger -t brcompatd-reload", - ovs_bindir, ovs_rundir, ovs_rundir); for (;;) { int c; @@ -694,24 +877,19 @@ parse_options(int argc, char *argv[]) } switch (c) { - case 'H': case 'h': usage(); case 'V': - OVS_PRINT_VERSION(0, 0); + ovs_print_version(0, 0); exit(EXIT_SUCCESS); - case OPT_LOCK_TIMEOUT: - lock_timeout = atoi(optarg); + case OPT_APPCTL: + appctl = optarg; break; - case OPT_PRUNE_TIMEOUT: - prune_timeout = atoi(optarg) * 1000; - break; - - case OPT_RELOAD_COMMAND: - reload_command = optarg; + case OPT_VSCTL: + vsctl = optarg; break; VLOG_OPTION_HANDLERS @@ -727,19 +905,21 @@ parse_options(int argc, char *argv[]) } free(short_options); - argc -= optind; - argv += optind; + appctl_program = process_search_path(appctl); + if (!appctl_program) { + VLOG_FATAL("%s: not found in $PATH (use --appctl to specify an " + "alternate location)", appctl); + } - if (argc != 1) { - ovs_fatal(0, "exactly one non-option argument required; " - "use --help for usage"); + vsctl_program = process_search_path(vsctl); + if (!vsctl_program) { + VLOG_FATAL("%s: not found in $PATH (use --vsctl to specify an " + "alternate location)", vsctl); } - config_file = argv[0]; - error = cfg_set_file(config_file); - if (error) { - ovs_fatal(error, "failed to add configuration file \"%s\"", - config_file); + if (argc != optind) { + VLOG_FATAL("no non-option arguments are supported; " + "use --help for usage"); } } @@ -747,13 +927,11 @@ static void usage(void) { printf("%s: bridge compatibility front-end for ovs-vswitchd\n" - "usage: %s [OPTIONS] CONFIG\n" - "CONFIG is the configuration file used by ovs-vswitchd.\n", + "usage: %s [OPTIONS]\n", program_name, program_name); printf("\nConfiguration options:\n" - " --reload-command=COMMAND shell command to reload ovs-vswitchd\n" - " --prune-timeout=SECS wait at most SECS before pruning ports\n" - " --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n" + " --appctl=PROGRAM overrides $PATH for finding ovs-appctl\n" + " --vsctl=PROGRAM overrides $PATH for finding ovs-vsctl\n" ); daemon_usage(); vlog_usage(); @@ -761,6 +939,5 @@ usage(void) " -h, --help display this help message\n" " -V, --version display version information\n"); leak_checker_usage(); - printf("\nThe default reload command is:\n%s\n", reload_command); exit(EXIT_SUCCESS); }