X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=vswitchd%2Fovs-brcompatd.c;h=bf571d743a11465ff7a8b4719f8b3f3056e57a74;hb=bfc96d9b50ae119fcbf39a9511bd9f662e7ad390;hp=93d9469bda82836a6bcddda132de540c0fe9a984;hpb=064af42167bf4fc9aaea2702d80ce08074b889c0;p=openvswitch diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 93d9469b..bf571d74 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -1,22 +1,21 @@ -/* Copyright (c) 2008, 2009 Nicira Networks - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. +/* Copyright (c) 2008, 2009, 2010 Nicira Networks * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ #include +#include #include #include #include @@ -30,22 +29,23 @@ #include #include #include +#include #include #include -#include "cfg.h" #include "command-line.h" #include "coverage.h" #include "daemon.h" #include "dirs.h" -#include "dpif.h" +#include "dynamic-string.h" #include "fatal-signal.h" -#include "fault.h" #include "leak-checker.h" #include "netdev.h" #include "netlink.h" #include "ofpbuf.h" #include "openvswitch/brcompat-netlink.h" +#include "ovsdb-idl.h" +#include "packets.h" #include "poll-loop.h" #include "process.h" #include "signals.h" @@ -53,6 +53,7 @@ #include "timeval.h" #include "unixctl.h" #include "util.h" +#include "vswitchd/vswitch-idl.h" #include "vlog.h" #define THIS_MODULE VLM_brcompatd @@ -68,25 +69,19 @@ enum bmc_action { BMC_DEL_PORT }; -static void parse_options(int argc, char *argv[]); +static const char *parse_options(int argc, char *argv[]); static void usage(void) NO_RETURN; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); -/* Maximum number of milliseconds to wait for the config file to be - * unlocked. If set to zero, no waiting will occur. */ -static int lock_timeout = 500; - /* Maximum number of milliseconds to wait before pruning port entries that * no longer exist. If set to zero, ports are never pruned. */ static int prune_timeout = 5000; -/* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */ -static char *config_file; - -/* Command to run (via system()) to reload the ovs-vswitchd configuration - * file. */ -static char *reload_command; +/* Shell command to execute (via popen()) to send a control command to the + * running ovs-vswitchd process. The string must contain one instance of %s, + * which is replaced by the control command. */ +static char *appctl_command; /* Netlink socket to listen for interface changes. */ static struct nl_sock *rtnl_sock; @@ -171,44 +166,126 @@ static const struct nl_policy brc_dp_policy[] = { [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, }; -static bool -bridge_exists(const char *name) +static struct ovsrec_bridge * +find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) { - return cfg_has_section("bridge.%s", name); + size_t i; + + for (i = 0; i < ovs->n_bridges; i++) { + if (!strcmp(br_name, ovs->bridges[i]->name)) { + return ovs->bridges[i]; + } + } + + return NULL; } static int -rewrite_and_reload_config(void) -{ - if (cfg_is_dirty()) { - int error1 = cfg_write(); - int error2 = cfg_read(); - long long int reload_start = time_msec(); - int error3 = system(reload_command); - long long int elapsed = time_msec() - reload_start; - COVERAGE_INC(brcompatd_reload); - if (elapsed > 0) { - VLOG_INFO("reload command executed in %lld ms", elapsed); +execute_appctl_command(const char *unixctl_command, char **output) +{ + char *stdout_log, *stderr_log; + int error, status; + char *argv[5]; + + argv[0] = "/bin/sh"; + argv[1] = "-c"; + argv[2] = xasprintf(appctl_command, unixctl_command); + argv[3] = NULL; + + /* Run process and log status. */ + error = process_run_capture(argv, &stdout_log, &stderr_log, &status); + if (error) { + VLOG_ERR("failed to execute %s command via ovs-appctl: %s", + unixctl_command, strerror(error)); + } else if (status) { + char *msg = process_status_msg(status); + VLOG_ERR("ovs-appctl exited with error (%s)", msg); + free(msg); + error = ECHILD; + } + + /* Deal with stdout_log. */ + if (output) { + *output = stdout_log; + } else { + free(stdout_log); + } + + /* Deal with stderr_log */ + if (stderr_log && *stderr_log) { + VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log); + } + free(stderr_log); + + free(argv[2]); + + return error; +} + +static void +do_get_bridge_parts(const struct ovsrec_bridge *br, struct svec *parts, + int vlan, bool break_down_bonds) +{ + struct svec ports; + size_t i, j; + + svec_init(&ports); + for (i = 0; i < br->n_ports; i++) { + const struct ovsrec_port *port = br->ports[i]; + + svec_add(&ports, port->name); + if (vlan >= 0) { + int port_vlan = port->n_tag ? *port->tag : 0; + if (vlan != port_vlan) { + continue; + } } - if (error3 == -1) { - VLOG_ERR("failed to execute reload command: %s", strerror(errno)); - } else if (error3 != 0) { - char *msg = process_status_msg(error3); - VLOG_ERR("reload command exited with error (%s)", msg); - free(msg); + if (break_down_bonds) { + for (j = 0; j < port->n_interfaces; j++) { + const struct ovsrec_interface *iface = port->interfaces[j]; + svec_add(parts, iface->name); + } + } else { + svec_add(parts, port->name); } - return error1 ? error1 : error2 ? error2 : error3 ? ECHILD : 0; } - return 0; + svec_destroy(&ports); +} + +/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces + * down into their constituent parts. + * + * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, + * then only interfaces for trunk ports or ports with implicit VLAN 0 are + * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are + * reported. */ +static void +get_bridge_ifaces(const struct ovsrec_bridge *br, struct svec *ifaces, + int vlan) +{ + do_get_bridge_parts(br, ifaces, vlan, true); } +/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under + * the bond name, not broken down into their constituent interfaces. + * + * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then + * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, + * only port with implicit VLAN 'vlan' are reported. */ +static void +get_bridge_ports(const struct ovsrec_bridge *br, struct svec *ports, + int vlan) +{ + do_get_bridge_parts(br, ports, vlan, false); +} + +#if 0 /* Go through the configuration file and remove any ports that no longer * exist associated with a bridge. */ static void prune_ports(void) { int i, j; - int error; struct svec bridges, delete; if (cfg_lock(NULL, 0)) { @@ -221,32 +298,13 @@ prune_ports(void) cfg_get_subsections(&bridges, "bridge"); for (i=0; iheader_); +} -/* Checks whether a network device named 'name' exists and returns true if so, - * false otherwise. - * - * XXX it is possible that this doesn't entirely accomplish what we want in - * context, since ovs-vswitchd.conf may cause vswitchd to create or destroy - * network devices based on iface.*.internal settings. - * - * XXX may want to move this to lib/netdev. */ static bool -netdev_exists(const char *name) +port_is_fake_bridge(const struct ovsrec_port *port) { - struct stat s; - char *filename; - int error; - - filename = xasprintf("/sys/class/net/%s", name); - error = stat(filename, &s); - free(filename); - return !error; + return (port->fake_bridge + && port->tag + && *port->tag >= 1 && *port->tag <= 4095); } +static void +ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs, + struct ovsrec_bridge *bridge) +{ + struct ovsrec_bridge **bridges; + size_t i; + + bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1)); + for (i = 0; i < ovs->n_bridges; i++) { + bridges[i] = ovs->bridges[i]; + } + bridges[ovs->n_bridges] = bridge; + ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1); + free(bridges); +} + static int -add_bridge(const char *br_name) +add_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) { - if (bridge_exists(br_name)) { + struct ovsrec_bridge *br; + struct ovsrec_port *port; + struct ovsrec_interface *iface; + + if (find_bridge(ovs, br_name)) { VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); return EEXIST; } else if (netdev_exists(br_name)) { - if (cfg_get_bool(0, "iface.%s.fake-bridge", br_name)) { - VLOG_WARN("addbr %s: %s exists as a fake bridge", - br_name, br_name); - return 0; - } else { - VLOG_WARN("addbr %s: cannot create bridge %s because a network " - "device named %s already exists", - br_name, br_name, br_name); - return EEXIST; + size_t i; + + for (i = 0; i < ovs->n_bridges; i++) { + size_t j; + struct ovsrec_bridge *br_cfg = ovs->bridges[i]; + + for (j = 0; j < br_cfg->n_ports; j++) { + if (port_is_fake_bridge(br_cfg->ports[j])) { + VLOG_WARN("addbr %s: %s exists as a fake bridge", + br_name, br_name); + return 0; + } + } } + + VLOG_WARN("addbr %s: cannot create bridge %s because a network " + "device named %s already exists", + br_name, br_name, br_name); + return EEXIST; } - cfg_add_entry("bridge.%s.port=%s", br_name, br_name); + iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); + ovsrec_interface_set_name(iface, br_name); + + port = ovsrec_port_insert(txn_from_openvswitch(ovs)); + ovsrec_port_set_name(port, br_name); + ovsrec_port_set_interfaces(port, &iface, 1); + + br = ovsrec_bridge_insert(txn_from_openvswitch(ovs)); + ovsrec_bridge_set_name(br, br_name); + ovsrec_bridge_set_ports(br, &port, 1); + + ovs_insert_bridge(ovs, br); + VLOG_INFO("addbr %s: success", br_name); return 0; } +static void +add_port(const struct ovsrec_open_vswitch *ovs, + const struct ovsrec_bridge *br, const char *port_name) +{ + struct ovsrec_interface *iface; + struct ovsrec_port *port; + struct ovsrec_port **ports; + size_t i; + + /* xxx Check conflicts? */ + iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); + ovsrec_interface_set_name(iface, port_name); + + port = ovsrec_port_insert(txn_from_openvswitch(ovs)); + ovsrec_port_set_name(port, port_name); + ovsrec_port_set_interfaces(port, &iface, 1); + + ports = xmalloc(sizeof *br->ports * (br->n_ports + 1)); + for (i = 0; i < br->n_ports; i++) { + ports[i] = br->ports[i]; + } + ports[br->n_ports] = port; + ovsrec_bridge_set_ports(br, ports, br->n_ports + 1); + free(ports); +} + +static void +del_port(const struct ovsrec_bridge *br, const char *port_name) +{ + size_t i, j; + struct ovsrec_port *port_rec = NULL; + + for (i = 0; i < br->n_ports; i++) { + struct ovsrec_port *port = br->ports[i]; + if (!strcmp(port_name, port->name)) { + port_rec = port; + } + for (j = 0; j < port->n_interfaces; j++) { + struct ovsrec_interface *iface = port->interfaces[j]; + if (!strcmp(port_name, iface->name)) { + ovsrec_interface_delete(iface); + } + } + } + + /* xxx Probably can move this into the "for" loop. */ + if (port_rec) { + struct ovsrec_port **ports; + size_t n; + + ports = xmalloc(sizeof *br->ports * br->n_ports); + for (i = n = 0; i < br->n_ports; i++) { + if (br->ports[i] != port_rec) { + ports[n++] = br->ports[i]; + } + } + ovsrec_bridge_set_ports(br, ports, n); + free(ports); + + ovsrec_port_delete(port_rec); + } +} + static int -del_bridge(const char *br_name) +del_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) { - if (!bridge_exists(br_name)) { + struct ovsrec_bridge *br = find_bridge(ovs, br_name); + struct ovsrec_bridge **bridges; + size_t i, n; + + if (!br) { VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name); return ENXIO; } - cfg_del_section("bridge.%s", br_name); + del_port(br, br_name); + + ovsrec_bridge_delete(br); + + bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges); + for (i = n = 0; i < ovs->n_bridges; i++) { + if (ovs->bridges[i] != br) { + bridges[n++] = ovs->bridges[i]; + } + } + ovsrec_open_vswitch_set_bridges(ovs, bridges, n); + free(bridges); + + /* Delete the bridge itself. */ + ovsrec_bridge_delete(br); + VLOG_INFO("delbr %s: success", br_name); return 0; @@ -349,64 +519,87 @@ del_bridge(const char *br_name) static int parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, - const char **port_name) + const char **port_name, uint64_t *count, uint64_t *skip) { static const struct nl_policy policy[] = { - [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, + [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING, .optional = true }, [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true }, + [BRC_GENL_A_FDB_COUNT] = { .type = NL_A_U64, .optional = true }, + [BRC_GENL_A_FDB_SKIP] = { .type = NL_A_U64, .optional = true }, }; struct nlattr *attrs[ARRAY_SIZE(policy)]; if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy, attrs, ARRAY_SIZE(policy)) - || (port_name && !attrs[BRC_GENL_A_PORT_NAME])) { + || (br_name && !attrs[BRC_GENL_A_DP_NAME]) + || (port_name && !attrs[BRC_GENL_A_PORT_NAME]) + || (count && !attrs[BRC_GENL_A_FDB_COUNT]) + || (skip && !attrs[BRC_GENL_A_FDB_SKIP])) { return EINVAL; } *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq; - *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + if (br_name) { + *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); + } if (port_name) { *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); } + if (count) { + *count = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_COUNT]); + } + if (skip) { + *skip = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_SKIP]); + } return 0; } -static void -send_reply(uint32_t seq, int error) +/* Composes and returns a reply to a request made by the datapath with Netlink + * sequence number 'seq' and error code 'error'. The caller may add additional + * attributes to the message, then it may send it with send_reply(). */ +static struct ofpbuf * +compose_reply(uint32_t seq, int error) { - struct ofpbuf msg; - int retval; - - /* Compose reply. */ - ofpbuf_init(&msg, 0); - nl_msg_put_genlmsghdr(&msg, brc_sock, 32, brc_family, NLM_F_REQUEST, + struct ofpbuf *reply = ofpbuf_new(4096); + nl_msg_put_genlmsghdr(reply, brc_sock, 32, brc_family, NLM_F_REQUEST, BRC_GENL_C_DP_RESULT, 1); - ((struct nlmsghdr *) msg.data)->nlmsg_seq = seq; - nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error); + ((struct nlmsghdr *) reply->data)->nlmsg_seq = seq; + nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error); + return reply; +} - /* Send reply. */ - retval = nl_sock_send(brc_sock, &msg, false); +/* Sends 'reply' to the datapath and frees it. */ +static void +send_reply(struct ofpbuf *reply) +{ + int retval = nl_sock_send(brc_sock, reply, false); if (retval) { VLOG_WARN_RL(&rl, "replying to brcompat request: %s", strerror(retval)); } - ofpbuf_uninit(&msg); + ofpbuf_delete(reply); +} + +/* Composes and sends a reply to a request made by the datapath with Netlink + * sequence number 'seq' and error code 'error'. */ +static void +send_simple_reply(uint32_t seq, int error) +{ + send_reply(compose_reply(seq, error)); } static int -handle_bridge_cmd(struct ofpbuf *buffer, bool add) +handle_bridge_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer, bool add) { const char *br_name; uint32_t seq; int error; - error = parse_command(buffer, &seq, &br_name, NULL); + error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL); if (!error) { - error = add ? add_bridge(br_name) : del_bridge(br_name); - if (!error) { - error = rewrite_and_reload_config(); - } - send_reply(seq, error); + error = add ? add_bridge(ovs, br_name) : del_bridge(ovs, br_name); + send_simple_reply(seq, error); } return error; } @@ -416,25 +609,20 @@ static const struct nl_policy brc_port_policy[] = { [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING }, }; -static void -del_port(const char *br_name, const char *port_name) -{ - cfg_del_entry("bridge.%s.port=%s", br_name, port_name); - cfg_del_match("bonding.*.slave=%s", port_name); - cfg_del_match("vlan.%s.*", port_name); -} - static int -handle_port_cmd(struct ofpbuf *buffer, bool add) +handle_port_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer, bool add) { const char *cmd_name = add ? "add-if" : "del-if"; const char *br_name, *port_name; uint32_t seq; int error; - error = parse_command(buffer, &seq, &br_name, &port_name); + error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL); if (!error) { - if (!bridge_exists(br_name)) { + struct ovsrec_bridge *br = find_bridge(ovs, br_name); + + if (!br) { VLOG_WARN("%s %s %s: no bridge named %s", cmd_name, br_name, port_name, br_name); error = EINVAL; @@ -444,21 +632,324 @@ handle_port_cmd(struct ofpbuf *buffer, bool add) error = EINVAL; } else { if (add) { - cfg_add_entry("bridge.%s.port=%s", br_name, port_name); + add_port(ovs, br, port_name); } else { - del_port(br_name, port_name); + del_port(br, port_name); } VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name); - error = rewrite_and_reload_config(); } - send_reply(seq, error); + send_simple_reply(seq, error); } return error; } +/* The caller is responsible for freeing '*ovs_name' if the call is + * successful. */ +static int +linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs, + const char *linux_name, + const struct ovsrec_bridge **ovs_bridge, + int *br_vlan) +{ + *ovs_bridge = find_bridge(ovs, linux_name); + if (*ovs_bridge) { + /* Bridge name is the same. We are interested in VLAN 0. */ + *br_vlan = 0; + return 0; + } else { + /* No such Open vSwitch bridge 'linux_name', but there might be an + * internal port named 'linux_name' on some other bridge + * 'ovs_bridge'. If so then we are interested in the VLAN assigned to + * port 'linux_name' on the bridge named 'ovs_bridge'. */ + size_t i, j; + + for (i = 0; i < ovs->n_bridges; i++) { + const struct ovsrec_bridge *br = ovs->bridges[i]; + + for (j = 0; j < br->n_ports; j++) { + const struct ovsrec_port *port = br->ports[j]; + + if (!strcmp(port->name, linux_name)) { + *ovs_bridge = br; + *br_vlan = port->n_tag ? *port->tag : -1; + return 0; + } + } + + } + return ENODEV; + } +} + +static int +handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer) +{ + /* This structure is copied directly from the Linux 2.6.30 header files. + * It would be more straightforward to #include , but + * the 'port_hi' member was only introduced in Linux 2.6.26 and so systems + * with old header files won't have it. */ + struct __fdb_entry { + __u8 mac_addr[6]; + __u8 port_no; + __u8 is_local; + __u32 ageing_timer_value; + __u8 port_hi; + __u8 pad0; + __u16 unused; + }; + + struct mac { + uint8_t addr[6]; + }; + struct mac *local_macs; + int n_local_macs; + int i; + + /* Impedance matching between the vswitchd and Linux kernel notions of what + * a bridge is. The kernel only handles a single VLAN per bridge, but + * vswitchd can deal with all the VLANs on a single bridge. We have to + * pretend that the former is the case even though the latter is the + * implementation. */ + const char *linux_name; /* Name used by brctl. */ + const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */ + int br_vlan; /* VLAN tag. */ + struct svec ifaces; + + struct ofpbuf query_data; + struct ofpbuf *reply; + char *unixctl_command; + uint64_t count, skip; + char *output; + char *save_ptr; + uint32_t seq; + int error; + + /* Parse the command received from brcompat_mod. */ + error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip); + if (error) { + return error; + } + + /* Figure out vswitchd bridge and VLAN. */ + error = linux_bridge_to_ovs_bridge(ovs, linux_name, + &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; + } + + /* Fetch the forwarding database using ovs-appctl. */ + unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name); + error = execute_appctl_command(unixctl_command, &output); + free(unixctl_command); + if (error) { + send_simple_reply(seq, error); + return error; + } + + /* Fetch the MAC address for each interface on the bridge, so that we can + * fill in the is_local field in the response. */ + svec_init(&ifaces); + get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); + local_macs = xmalloc(ifaces.n * sizeof *local_macs); + n_local_macs = 0; + for (i = 0; i < ifaces.n; i++) { + const char *iface_name = ifaces.names[i]; + struct mac *mac = &local_macs[n_local_macs]; + struct netdev *netdev; + + error = netdev_open_default(iface_name, &netdev); + if (!error) { + if (!netdev_get_etheraddr(netdev, mac->addr)) { + n_local_macs++; + } + netdev_close(netdev); + } + } + svec_destroy(&ifaces); + + /* Parse the response from ovs-appctl and convert it to binary format to + * pass back to the kernel. */ + ofpbuf_init(&query_data, sizeof(struct __fdb_entry) * 8); + save_ptr = NULL; + strtok_r(output, "\n", &save_ptr); /* Skip header line. */ + while (count > 0) { + struct __fdb_entry *entry; + int port, vlan, age; + uint8_t mac[ETH_ADDR_LEN]; + char *line; + bool is_local; + + line = strtok_r(NULL, "\n", &save_ptr); + if (!line) { + break; + } + + if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d", + &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age) + != 2 + ETH_ADDR_SCAN_COUNT + 1) { + struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line); + continue; + } + + if (vlan != br_vlan) { + continue; + } + + if (skip > 0) { + skip--; + continue; + } + + /* Is this the MAC address of an interface on the bridge? */ + is_local = false; + for (i = 0; i < n_local_macs; i++) { + if (eth_addr_equals(local_macs[i].addr, mac)) { + is_local = true; + break; + } + } + + entry = ofpbuf_put_uninit(&query_data, sizeof *entry); + memcpy(entry->mac_addr, mac, ETH_ADDR_LEN); + entry->port_no = port & 0xff; + entry->is_local = is_local; + entry->ageing_timer_value = age * HZ; + entry->port_hi = (port & 0xff00) >> 8; + entry->pad0 = 0; + entry->unused = 0; + count--; + } + free(output); + + /* Compose and send reply to datapath. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_FDB_DATA, + query_data.data, query_data.size); + send_reply(reply); + + /* Free memory. */ + ofpbuf_uninit(&query_data); + + return 0; +} + +static void +send_ifindex_reply(uint32_t seq, struct svec *ifaces) +{ + struct ofpbuf *reply; + const char *iface; + size_t n_indices; + int *indices; + size_t i; + + /* Make sure that any given interface only occurs once. This shouldn't + * happen, but who knows what people put into their configuration files. */ + svec_sort_unique(ifaces); + + /* Convert 'ifaces' into ifindexes. */ + n_indices = 0; + indices = xmalloc(ifaces->n * sizeof *indices); + SVEC_FOR_EACH (i, iface, ifaces) { + int ifindex = if_nametoindex(iface); + if (ifindex) { + indices[n_indices++] = ifindex; + } + } + + /* Compose and send reply. */ + reply = compose_reply(seq, 0); + nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES, + indices, n_indices * sizeof *indices); + send_reply(reply); + + /* Free memory. */ + free(indices); +} + +static int +handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer) +{ + struct svec bridges; + size_t i, j; + + uint32_t seq; + + int error; + + /* Parse Netlink command. + * + * The command doesn't actually have any arguments, but we need the + * sequence number to send the reply. */ + error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL); + if (error) { + return error; + } + + /* Get all the real bridges and all the fake ones. */ + svec_init(&bridges); + for (i = 0; i < ovs->n_bridges; i++) { + const struct ovsrec_bridge *br = ovs->bridges[i]; + + svec_add(&bridges, br->name); + for (j = 0; j < br->n_ports; j++) { + const struct ovsrec_port *port = br->ports[j]; + + if (port->fake_bridge) { + svec_add(&bridges, port->name); + } + } + } + + send_ifindex_reply(seq, &bridges); + svec_destroy(&bridges); + + return 0; +} + static int -brc_recv_update(void) +handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, + struct ofpbuf *buffer) +{ + uint32_t seq; + + const char *linux_name; + const struct ovsrec_bridge *ovs_bridge; + int br_vlan; + + struct svec ports; + + int error; + + /* Parse Netlink command. */ + error = parse_command(buffer, &seq, &linux_name, NULL, NULL, NULL); + if (error) { + return error; + } + + error = linux_bridge_to_ovs_bridge(ovs, linux_name, + &ovs_bridge, &br_vlan); + if (error) { + send_simple_reply(seq, error); + return error; + } + + svec_init(&ports); + get_bridge_ports(ovs_bridge, &ports, br_vlan); + svec_sort(&ports); + svec_del(&ports, linux_name); + send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ + svec_destroy(&ports); + + return 0; +} + +static void +brc_recv_update(const struct ovsrec_open_vswitch *ovs) { int retval; struct ofpbuf *buffer; @@ -477,7 +968,7 @@ brc_recv_update(void) if (retval != EAGAIN) { VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); } - return retval; + return; } genlmsghdr = nl_msg_genlmsghdr(buffer); @@ -492,43 +983,57 @@ brc_recv_update(void) goto error; } - if (cfg_lock(NULL, lock_timeout)) { - /* Couldn't lock config file. */ - retval = EAGAIN; + /* Just drop the request on the floor if a valid configuration + * doesn't exist. We don't immediately do this check, because we + * want to drain pending netlink messages. */ + if (!ovs) { + VLOG_WARN_RL(&rl, "could not find valid configuration to update"); goto error; } switch (genlmsghdr->cmd) { case BRC_GENL_C_DP_ADD: - retval = handle_bridge_cmd(buffer, true); + handle_bridge_cmd(ovs, buffer, true); break; case BRC_GENL_C_DP_DEL: - retval = handle_bridge_cmd(buffer, false); + handle_bridge_cmd(ovs, buffer, false); break; case BRC_GENL_C_PORT_ADD: - retval = handle_port_cmd(buffer, true); + handle_port_cmd(ovs, buffer, true); break; case BRC_GENL_C_PORT_DEL: - retval = handle_port_cmd(buffer, false); + handle_port_cmd(ovs, buffer, false); + break; + + case BRC_GENL_C_FDB_QUERY: + handle_fdb_query_cmd(ovs, buffer); + break; + + case BRC_GENL_C_GET_BRIDGES: + handle_get_bridges_cmd(ovs, buffer); + break; + + case BRC_GENL_C_GET_PORTS: + handle_get_ports_cmd(ovs, buffer); break; default: - retval = EPROTO; + VLOG_WARN_RL(&rl, "received unknown brc netlink command: %d\n", + genlmsghdr->cmd); + break; } - cfg_unlock(); - error: ofpbuf_delete(buffer); - return retval; + return; } /* Check for interface configuration changes announced through RTNL. */ static void -rtnl_recv_update(void) +rtnl_recv_update(const struct ovsrec_open_vswitch *ovs) { struct ofpbuf *buf; @@ -564,28 +1069,73 @@ rtnl_recv_update(void) const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); char br_name[IFNAMSIZ]; uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); - struct svec ports; if (!if_indextoname(br_idx, br_name)) { ofpbuf_delete(buf); return; } - if (cfg_lock(NULL, lock_timeout)) { - /* Couldn't lock config file. */ - /* xxx this should try again and print error msg. */ - ofpbuf_delete(buf); - return; - } + if (!netdev_exists(port_name)) { + /* Network device is really gone. */ + struct ovsrec_bridge *br = find_bridge(ovs, br_name); + + VLOG_INFO("network device %s destroyed, " + "removing from bridge %s", port_name, br_name); - svec_init(&ports); - cfg_get_all_keys(&ports, "bridge.%s.port", br_name); - svec_sort(&ports); - if (svec_contains(&ports, port_name)) { - del_port(br_name, port_name); - rewrite_and_reload_config(); + if (!br) { + VLOG_WARN("no bridge named %s from which to remove %s", + br_name, port_name); + ofpbuf_delete(buf); + return; + } + + del_port(br, port_name); + } else { + /* A network device by that name exists even though the kernel + * told us it had disappeared. Probably, what happened was + * this: + * + * 1. Device destroyed. + * 2. Notification sent to us. + * 3. New device created with same name as old one. + * 4. ovs-brcompatd notified, removes device from bridge. + * + * There's no a priori reason that in this situation that the + * new device with the same name should remain in the bridge; + * on the contrary, that would be unexpected. *But* there is + * one important situation where, if we do this, bad things + * happen. This is the case of XenServer Tools version 5.0.0, + * which on boot of a Windows VM cause something like this to + * happen on the Xen host: + * + * i. Create tap1.0 and vif1.0. + * ii. Delete tap1.0. + * iii. Delete vif1.0. + * iv. Re-create vif1.0. + * + * (XenServer Tools 5.5.0 does not exhibit this behavior, and + * neither does a VM without Tools installed at all.@.) + * + * Steps iii and iv happen within a few seconds of each other. + * Step iv causes /etc/xensource/scripts/vif to run, which in + * turn calls ovs-cfg-mod to add the new device to the bridge. + * If step iv happens after step 4 (in our first list of + * steps), then all is well, but if it happens between 3 and 4 + * (which can easily happen if ovs-brcompatd has to wait to + * lock the configuration file), then we will remove the new + * incarnation from the bridge instead of the old one! + * + * So, to avoid this problem, we do nothing here. This is + * strictly incorrect except for this one particular case, and + * perhaps that will bite us someday. If that happens, then we + * will have to somehow track network devices by ifindex, since + * a new device will have a new ifindex even if it has the same + * name as an old device. + */ + VLOG_INFO("kernel reported network device %s removed but " + "a device by that name exists (XS Tools 5.0.0?)", + port_name); } - cfg_unlock(); } ofpbuf_delete(buf); } @@ -595,22 +1145,28 @@ int main(int argc, char *argv[]) { struct unixctl_server *unixctl; + const char *remote; + struct ovsdb_idl *idl; int retval; + proctitle_init(argc, argv); set_program_name(argv[0]); - register_fault_handlers(); time_init(); vlog_init(); - parse_options(argc, argv); + vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_WARN); + vlog_set_levels(VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); + + remote = parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); process_init(); + ovsrec_init(); die_if_already_running(); - daemonize(); + daemonize_start(); retval = unixctl_server_create(NULL, &unixctl); if (retval) { - ovs_fatal(retval, "could not listen for vlog connections"); + exit(EXIT_FAILURE); } if (brc_open(&brc_sock)) { @@ -624,15 +1180,33 @@ main(int argc, char *argv[]) } } - cfg_read(); + daemonize_complete(); + + idl = ovsdb_idl_create(remote, &ovsrec_idl_class); for (;;) { + const struct ovsrec_open_vswitch *ovs; + struct ovsdb_idl_txn *txn; + enum ovsdb_idl_txn_status status; + + ovsdb_idl_run(idl); + + txn = ovsdb_idl_txn_create(idl); + unixctl_server_run(unixctl); - brc_recv_update(); + ovs = ovsrec_open_vswitch_first(idl); + brc_recv_update(ovs); + + if (!ovs && ovsdb_idl_has_ever_connected(idl)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch " + "configuration", remote); + } + netdev_run(); /* If 'prune_timeout' is non-zero, we actively prune from the - * config file any 'bridge..port' entries that are no - * longer valid. We use two methods: + * configuration of port entries that are no longer valid. We + * use two methods: * * 1) The kernel explicitly notifies us of removed ports * through the RTNL messages. @@ -640,51 +1214,106 @@ main(int argc, char *argv[]) * 2) We periodically check all ports associated with bridges * to see if they no longer exist. */ - if (prune_timeout) { - rtnl_recv_update(); + if (ovs && prune_timeout) { + rtnl_recv_update(ovs); +#if 0 prune_ports(); +#endif nl_sock_wait(rtnl_sock, POLLIN); poll_timer_wait(prune_timeout); } + while ((status = ovsdb_idl_txn_commit(txn)) == TXN_INCOMPLETE) { + ovsdb_idl_run(idl); + ovsdb_idl_wait(idl); + ovsdb_idl_txn_wait(txn); + poll_block(); + } + + switch (status) { + case TXN_INCOMPLETE: + NOT_REACHED(); + + case TXN_ABORTED: + /* Should not happen--we never call ovsdb_idl_txn_abort(). */ + ovs_fatal(0, "transaction aborted"); + + case TXN_SUCCESS: + case TXN_UNCHANGED: + break; + + case TXN_TRY_AGAIN: + /* xxx Handle this better! */ + VLOG_ERR("OVSDB transaction needs retry"); + break; + + case TXN_ERROR: + /* xxx Handle this better! */ + VLOG_ERR("OVSDB transaction failed: %s", + ovsdb_idl_txn_get_error(txn)); + break; + + default: + NOT_REACHED(); + } + ovsdb_idl_txn_destroy(txn); + nl_sock_wait(brc_sock, POLLIN); + ovsdb_idl_wait(idl); unixctl_server_wait(unixctl); + netdev_wait(); poll_block(); } + ovsdb_idl_destroy(idl); + return 0; } static void +validate_appctl_command(void) +{ + const char *p; + int n; + + n = 0; + for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) { + if (p[1] == '%') { + /* Nothing to do. */ + } else if (p[1] == 's') { + n++; + } else { + ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command"); + } + } + if (n != 1) { + ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command"); + } +} + +static const char * parse_options(int argc, char *argv[]) { enum { - OPT_LOCK_TIMEOUT = UCHAR_MAX + 1, OPT_PRUNE_TIMEOUT, - OPT_RELOAD_COMMAND, + OPT_APPCTL_COMMAND, VLOG_OPTION_ENUMS, LEAK_CHECKER_OPTION_ENUMS }; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, - {"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT}, {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT}, - {"reload-command", required_argument, 0, OPT_RELOAD_COMMAND}, + {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, LEAK_CHECKER_LONG_OPTIONS, {0, 0, 0, 0}, }; char *short_options = long_options_to_short_options(long_options); - int error; - reload_command = xasprintf("%s/ovs-appctl -t " - "%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl " - "-e vswitchd/reload 2>&1 " - "| /usr/bin/logger -t brcompatd-reload", - ovs_bindir, ovs_rundir, ovs_rundir); + appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir); for (;;) { int c; @@ -702,16 +1331,12 @@ parse_options(int argc, char *argv[]) OVS_PRINT_VERSION(0, 0); exit(EXIT_SUCCESS); - case OPT_LOCK_TIMEOUT: - lock_timeout = atoi(optarg); - break; - case OPT_PRUNE_TIMEOUT: prune_timeout = atoi(optarg) * 1000; break; - case OPT_RELOAD_COMMAND: - reload_command = optarg; + case OPT_APPCTL_COMMAND: + appctl_command = optarg; break; VLOG_OPTION_HANDLERS @@ -727,20 +1352,17 @@ parse_options(int argc, char *argv[]) } free(short_options); + validate_appctl_command(); + argc -= optind; argv += optind; if (argc != 1) { - ovs_fatal(0, "exactly one non-option argument required; " + ovs_fatal(0, "database socket is non-option argument; " "use --help for usage"); } - config_file = argv[0]; - error = cfg_set_file(config_file); - if (error) { - ovs_fatal(error, "failed to add configuration file \"%s\"", - config_file); - } + return argv[0]; } static void @@ -751,9 +1373,8 @@ usage(void) "CONFIG is the configuration file used by ovs-vswitchd.\n", program_name, program_name); printf("\nConfiguration options:\n" - " --reload-command=COMMAND shell command to reload ovs-vswitchd\n" + " --appctl-command=COMMAND shell command to run ovs-appctl\n" " --prune-timeout=SECS wait at most SECS before pruning ports\n" - " --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n" ); daemon_usage(); vlog_usage(); @@ -761,6 +1382,6 @@ usage(void) " -h, --help display this help message\n" " -V, --version display version information\n"); leak_checker_usage(); - printf("\nThe default reload command is:\n%s\n", reload_command); + printf("\nThe default appctl command is:\n%s\n", appctl_command); exit(EXIT_SUCCESS); }