#include "packets.h"
#include "poll-loop.h"
#include "process.h"
+#include "rtnetlink.h"
+#include "rtnetlink-link.h"
#include "signals.h"
-#include "svec.h"
+#include "sset.h"
#include "timeval.h"
#include "unixctl.h"
#include "util.h"
* which is replaced by the control command. */
static char *appctl_command;
-/* Netlink socket to listen for interface changes. */
-static struct nl_sock *rtnl_sock;
-
/* Netlink socket to bridge compatibility kernel module. */
static struct nl_sock *brc_sock;
[BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 }
};
-static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
- [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
-};
-
static int
lookup_brc_multicast_group(int *multicast_group)
{
argv[3] = NULL;
/* Run process and log status. */
- error = process_run_capture(argv, &stdout_log, &stderr_log, &status);
+ error = process_run_capture(argv, &stdout_log, &stderr_log, 65536,
+ &status);
if (error) {
VLOG_ERR("failed to execute %s command via ovs-appctl: %s",
unixctl_command, strerror(error));
}
static void
-do_get_bridge_parts(const struct ovsrec_bridge *br, struct svec *parts,
+do_get_bridge_parts(const struct ovsrec_bridge *br, struct sset *parts,
int vlan, bool break_down_bonds)
{
- struct svec ports;
size_t i, j;
- svec_init(&ports);
for (i = 0; i < br->n_ports; i++) {
const struct ovsrec_port *port = br->ports[i];
- svec_add(&ports, port->name);
if (vlan >= 0) {
int port_vlan = port->n_tag ? *port->tag : 0;
if (vlan != port_vlan) {
if (break_down_bonds) {
for (j = 0; j < port->n_interfaces; j++) {
const struct ovsrec_interface *iface = port->interfaces[j];
- svec_add(parts, iface->name);
+ sset_add(parts, iface->name);
}
} else {
- svec_add(parts, port->name);
+ sset_add(parts, port->name);
}
}
- svec_destroy(&ports);
}
/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces
* reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are
* reported. */
static void
-get_bridge_ifaces(const struct ovsrec_bridge *br, struct svec *ifaces,
+get_bridge_ifaces(const struct ovsrec_bridge *br, struct sset *ifaces,
int vlan)
{
do_get_bridge_parts(br, ifaces, vlan, true);
* only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0,
* only port with implicit VLAN 'vlan' are reported. */
static void
-get_bridge_ports(const struct ovsrec_bridge *br, struct svec *ports,
+get_bridge_ports(const struct ovsrec_bridge *br, struct sset *ports,
int vlan)
{
do_get_bridge_parts(br, ports, vlan, false);
const char *linux_name; /* Name used by brctl. */
const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */
int br_vlan; /* VLAN tag. */
- struct svec ifaces;
+ struct sset ifaces;
struct ofpbuf query_data;
+ const char *iface_name;
struct ofpbuf *reply;
char *unixctl_command;
uint64_t count, skip;
/* Fetch the MAC address for each interface on the bridge, so that we can
* fill in the is_local field in the response. */
- svec_init(&ifaces);
+ sset_init(&ifaces);
get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan);
- local_macs = xmalloc(ifaces.n * sizeof *local_macs);
+ local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs);
n_local_macs = 0;
- for (i = 0; i < ifaces.n; i++) {
- const char *iface_name = ifaces.names[i];
+ SSET_FOR_EACH (iface_name, &ifaces) {
struct mac *mac = &local_macs[n_local_macs];
struct netdev *netdev;
netdev_close(netdev);
}
}
- svec_destroy(&ifaces);
+ sset_destroy(&ifaces);
/* Parse the response from ovs-appctl and convert it to binary format to
* pass back to the kernel. */
}
static void
-send_ifindex_reply(uint32_t seq, struct svec *ifaces)
+send_ifindex_reply(uint32_t seq, struct sset *ifaces)
{
struct ofpbuf *reply;
const char *iface;
size_t n_indices;
int *indices;
- size_t i;
-
- /* Make sure that any given interface only occurs once. This shouldn't
- * happen, but who knows what people put into their configuration files. */
- svec_sort_unique(ifaces);
/* Convert 'ifaces' into ifindexes. */
n_indices = 0;
- indices = xmalloc(ifaces->n * sizeof *indices);
- SVEC_FOR_EACH (i, iface, ifaces) {
+ indices = xmalloc(sset_count(ifaces) * sizeof *indices);
+ SSET_FOR_EACH (iface, ifaces) {
int ifindex = if_nametoindex(iface);
if (ifindex) {
indices[n_indices++] = ifindex;
handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs,
struct ofpbuf *buffer)
{
- struct svec bridges;
+ struct sset bridges;
size_t i, j;
uint32_t seq;
}
/* Get all the real bridges and all the fake ones. */
- svec_init(&bridges);
+ sset_init(&bridges);
for (i = 0; i < ovs->n_bridges; i++) {
const struct ovsrec_bridge *br = ovs->bridges[i];
- svec_add(&bridges, br->name);
+ sset_add(&bridges, br->name);
for (j = 0; j < br->n_ports; j++) {
const struct ovsrec_port *port = br->ports[j];
if (port->fake_bridge) {
- svec_add(&bridges, port->name);
+ sset_add(&bridges, port->name);
}
}
}
send_ifindex_reply(seq, &bridges);
- svec_destroy(&bridges);
+ sset_destroy(&bridges);
return 0;
}
const struct ovsrec_bridge *ovs_bridge;
int br_vlan;
- struct svec ports;
+ struct sset ports;
int error;
return error;
}
- svec_init(&ports);
+ sset_init(&ports);
get_bridge_ports(ovs_bridge, &ports, br_vlan);
- svec_sort(&ports);
- svec_del(&ports, linux_name);
+ sset_find_and_delete(&ports, linux_name);
send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */
- svec_destroy(&ports);
+ sset_destroy(&ports);
return 0;
}
return;
}
-/* Check for interface configuration changes announced through RTNL. */
static void
-rtnl_recv_update(struct ovsdb_idl *idl,
- const struct ovsrec_open_vswitch *ovs)
+netdev_changed_cb(const struct rtnetlink_link_change *change, void *idl_)
{
- struct ofpbuf *buf;
+ struct ovsdb_idl *idl = idl_;
+ const struct ovsrec_open_vswitch *ovs;
+ const struct ovsrec_interface *iface;
+ struct ovsdb_idl_txn *txn;
+ struct ovsrec_port *port;
+ struct ovsrec_bridge *br;
+ char br_name[IFNAMSIZ];
+ const char *port_name;
- int error = nl_sock_recv(rtnl_sock, &buf, false);
- if (error == EAGAIN) {
- /* Nothing to do. */
- } else if (error == ENOBUFS) {
+ if (!change) {
VLOG_WARN_RL(&rl, "network monitor socket overflowed");
- } else if (error) {
- VLOG_WARN_RL(&rl, "error on network monitor socket: %s",
- strerror(error));
- } else {
- struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
- struct nlmsghdr *nlh;
- struct ifinfomsg *iim;
-
- nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
- iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim);
- if (!iim) {
- VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)");
- ofpbuf_delete(buf);
- return;
- }
+ return;
+ }
- if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
- rtnlgrp_link_policy,
- attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
- VLOG_WARN_RL(&rl,"received bad rtnl message (policy)");
- ofpbuf_delete(buf);
- return;
- }
- if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) {
- const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]);
- char br_name[IFNAMSIZ];
- uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]);
-
- if (!if_indextoname(br_idx, br_name)) {
- ofpbuf_delete(buf);
- return;
- }
+ if (change->nlmsg_type != RTM_DELLINK || !change->master_ifindex) {
+ return;
+ }
- if (!netdev_exists(port_name)) {
- /* Network device is really gone. */
- struct ovsdb_idl_txn *txn;
- const struct ovsrec_interface *iface;
- struct ovsrec_port *port;
- struct ovsrec_bridge *br;
-
- VLOG_INFO("network device %s destroyed, "
- "removing from bridge %s", port_name, br_name);
-
- br = find_bridge(ovs, br_name);
- if (!br) {
- VLOG_WARN("no bridge named %s from which to remove %s",
- br_name, port_name);
- ofpbuf_delete(buf);
- return;
- }
+ ovs = ovsrec_open_vswitch_first(idl);
+ if (!ovs) {
+ return;
+ }
- txn = ovsdb_idl_txn_create(idl);
+ port_name = change->ifname;
+ if (!if_indextoname(change->master_ifindex, br_name)) {
+ return;
+ }
- iface = find_interface(br, port_name, &port);
- if (iface) {
- del_interface(br, port, iface);
- ovsdb_idl_txn_add_comment(txn,
- "ovs-brcompatd: destroy port %s",
- port_name);
- }
+ if (netdev_exists(port_name)) {
+ /* A network device by that name exists even though the kernel
+ * told us it had disappeared. Probably, what happened was
+ * this:
+ *
+ * 1. Device destroyed.
+ * 2. Notification sent to us.
+ * 3. New device created with same name as old one.
+ * 4. ovs-brcompatd notified, removes device from bridge.
+ *
+ * There's no a priori reason that in this situation that the
+ * new device with the same name should remain in the bridge;
+ * on the contrary, that would be unexpected. *But* there is
+ * one important situation where, if we do this, bad things
+ * happen. This is the case of XenServer Tools version 5.0.0,
+ * which on boot of a Windows VM cause something like this to
+ * happen on the Xen host:
+ *
+ * i. Create tap1.0 and vif1.0.
+ * ii. Delete tap1.0.
+ * iii. Delete vif1.0.
+ * iv. Re-create vif1.0.
+ *
+ * (XenServer Tools 5.5.0 does not exhibit this behavior, and
+ * neither does a VM without Tools installed at all.)
+ *
+ * Steps iii and iv happen within a few seconds of each other.
+ * Step iv causes /etc/xensource/scripts/vif to run, which in
+ * turn calls ovs-cfg-mod to add the new device to the bridge.
+ * If step iv happens after step 4 (in our first list of
+ * steps), then all is well, but if it happens between 3 and 4
+ * (which can easily happen if ovs-brcompatd has to wait to
+ * lock the configuration file), then we will remove the new
+ * incarnation from the bridge instead of the old one!
+ *
+ * So, to avoid this problem, we do nothing here. This is
+ * strictly incorrect except for this one particular case, and
+ * perhaps that will bite us someday. If that happens, then we
+ * will have to somehow track network devices by ifindex, since
+ * a new device will have a new ifindex even if it has the same
+ * name as an old device.
+ */
+ VLOG_INFO("kernel reported network device %s removed but "
+ "a device by that name exists (XS Tools 5.0.0?)",
+ port_name);
+ return;
+ }
- commit_txn(txn, false);
- } else {
- /* A network device by that name exists even though the kernel
- * told us it had disappeared. Probably, what happened was
- * this:
- *
- * 1. Device destroyed.
- * 2. Notification sent to us.
- * 3. New device created with same name as old one.
- * 4. ovs-brcompatd notified, removes device from bridge.
- *
- * There's no a priori reason that in this situation that the
- * new device with the same name should remain in the bridge;
- * on the contrary, that would be unexpected. *But* there is
- * one important situation where, if we do this, bad things
- * happen. This is the case of XenServer Tools version 5.0.0,
- * which on boot of a Windows VM cause something like this to
- * happen on the Xen host:
- *
- * i. Create tap1.0 and vif1.0.
- * ii. Delete tap1.0.
- * iii. Delete vif1.0.
- * iv. Re-create vif1.0.
- *
- * (XenServer Tools 5.5.0 does not exhibit this behavior, and
- * neither does a VM without Tools installed at all.@.)
- *
- * Steps iii and iv happen within a few seconds of each other.
- * Step iv causes /etc/xensource/scripts/vif to run, which in
- * turn calls ovs-cfg-mod to add the new device to the bridge.
- * If step iv happens after step 4 (in our first list of
- * steps), then all is well, but if it happens between 3 and 4
- * (which can easily happen if ovs-brcompatd has to wait to
- * lock the configuration file), then we will remove the new
- * incarnation from the bridge instead of the old one!
- *
- * So, to avoid this problem, we do nothing here. This is
- * strictly incorrect except for this one particular case, and
- * perhaps that will bite us someday. If that happens, then we
- * will have to somehow track network devices by ifindex, since
- * a new device will have a new ifindex even if it has the same
- * name as an old device.
- */
- VLOG_INFO("kernel reported network device %s removed but "
- "a device by that name exists (XS Tools 5.0.0?)",
- port_name);
- }
- }
- ofpbuf_delete(buf);
+ VLOG_INFO("network device %s destroyed, removing from bridge %s",
+ port_name, br_name);
+
+ br = find_bridge(ovs, br_name);
+ if (!br) {
+ VLOG_WARN("no bridge named %s from which to remove %s",
+ br_name, port_name);
+ return;
}
+
+ iface = find_interface(br, port_name, &port);
+ if (!iface) {
+ return;
+ }
+
+ txn = ovsdb_idl_txn_create(idl);
+ del_interface(br, port, iface);
+ ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: destroy port %s",
+ port_name);
+ commit_txn(txn, false);
}
int
main(int argc, char *argv[])
{
extern struct vlog_module VLM_reconnect;
+ struct rtnetlink_notifier link_notifier;
struct unixctl_server *unixctl;
const char *remote;
struct ovsdb_idl *idl;
process_init();
ovsrec_init();
- die_if_already_running();
daemonize_start();
retval = unixctl_server_create(NULL, &unixctl);
}
if (brc_open(&brc_sock)) {
- ovs_fatal(0, "could not open brcompat socket. Check "
- "\"brcompat\" kernel module.");
+ VLOG_FATAL("could not open brcompat socket. Check "
+ "\"brcompat\" kernel module.");
}
- if (prune_timeout) {
- int error;
- error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
- if (error) {
- ovs_fatal(error, "could not create rtnetlink socket");
- }
-
- error = nl_sock_join_mcgroup(rtnl_sock, RTNLGRP_LINK);
- if (error) {
- ovs_fatal(error, "could not join RTNLGRP_LINK multicast group");
- }
+ if (prune_timeout) {
+ rtnetlink_link_notifier_register(&link_notifier,
+ netdev_changed_cb, NULL);
}
daemonize_complete();
ovsdb_idl_run(idl);
unixctl_server_run(unixctl);
+ rtnetlink_link_notifier_run();
brc_recv_update(idl);
ovs = ovsrec_open_vswitch_first(idl);
* to see if they no longer exist.
*/
if (ovs && prune_timeout) {
- rtnl_recv_update(idl, ovs);
- nl_sock_wait(rtnl_sock, POLLIN);
+ rtnetlink_link_notifier_run();
poll_timer_wait(prune_timeout);
}
-
nl_sock_wait(brc_sock, POLLIN);
ovsdb_idl_wait(idl);
unixctl_server_wait(unixctl);
+ rtnetlink_link_notifier_wait();
netdev_wait();
poll_block();
}
+ if (prune_timeout) {
+ rtnetlink_link_notifier_unregister(&link_notifier);
+ }
ovsdb_idl_destroy(idl);
return 0;
} else if (p[1] == 's') {
n++;
} else {
- ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command");
+ VLOG_FATAL("only '%%s' and '%%%%' allowed in --appctl-command");
}
}
if (n != 1) {
- ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command");
+ VLOG_FATAL("'%%s' must appear exactly once in --appctl-command");
}
}
DAEMON_OPTION_ENUMS
};
static struct option long_options[] = {
- {"help", no_argument, 0, 'h'},
- {"version", no_argument, 0, 'V'},
- {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT},
- {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
+ {"prune-timeout", required_argument, NULL, OPT_PRUNE_TIMEOUT},
+ {"appctl-command", required_argument, NULL, OPT_APPCTL_COMMAND},
DAEMON_LONG_OPTIONS,
VLOG_LONG_OPTIONS,
LEAK_CHECKER_LONG_OPTIONS,
- {0, 0, 0, 0},
+ {NULL, 0, NULL, 0},
};
char *short_options = long_options_to_short_options(long_options);
argv += optind;
if (argc != 1) {
- ovs_fatal(0, "database socket is non-option argument; "
- "use --help for usage");
+ VLOG_FATAL("database socket is non-option argument; "
+ "use --help for usage");
}
return argv[0];