int local_ifindex; /* Ifindex of local port. */
struct svec changed_ports; /* Ports that have changed. */
struct linux_netdev_notifier port_notifier;
+ bool change_error;
};
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
return CONTAINER_OF(dpif, struct dpif_linux, dpif);
}
-static void
-dpif_linux_run(void)
-{
- linux_netdev_notifier_run();
-}
-
-static void
-dpif_linux_wait(void)
-{
- linux_netdev_notifier_wait();
-}
-
static int
dpif_linux_enumerate(struct svec *all_dps)
{
dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- int error;
- error = linux_netdev_notifier_get_error(&dpif->port_notifier);
- if (!error) {
- if (!dpif->changed_ports.n) {
- return EAGAIN;
- }
+ if (dpif->change_error) {
+ dpif->change_error = false;
+ svec_clear(&dpif->changed_ports);
+ return ENOBUFS;
+ } else if (dpif->changed_ports.n) {
*devnamep = dpif->changed_ports.names[--dpif->changed_ports.n];
+ return 0;
} else {
- svec_clear(&dpif->changed_ports);
+ return EAGAIN;
}
- return error;
}
static void
dpif_linux_port_poll_wait(const struct dpif *dpif_)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- if (dpif->changed_ports.n
- || linux_netdev_notifier_peek_error(&dpif->port_notifier)) {
+ if (dpif->changed_ports.n || dpif->change_error) {
poll_immediate_wake();
} else {
linux_netdev_notifier_wait();
const struct dpif_class dpif_linux_class = {
"", /* This is the default class. */
"linux",
- dpif_linux_run,
- dpif_linux_wait,
+ NULL,
+ NULL,
dpif_linux_enumerate,
dpif_linux_open,
dpif_linux_close,
dpif->minor = minor;
dpif->local_ifindex = 0;
svec_init(&dpif->changed_ports);
+ dpif->change_error = false;
*dpifp = &dpif->dpif;
} else {
free(dpif);
{
struct dpif_linux *dpif = dpif_;
- if (change->master_ifindex == dpif->local_ifindex
- && (change->nlmsg_type == RTM_NEWLINK
- || change->nlmsg_type == RTM_DELLINK))
- {
- /* Our datapath changed, either adding a new port or deleting an
- * existing one. */
- if (!svec_contains(&dpif->changed_ports, change->ifname)) {
- svec_add(&dpif->changed_ports, change->ifname);
- svec_sort(&dpif->changed_ports);
+ if (change) {
+ if (change->master_ifindex == dpif->local_ifindex
+ && (change->nlmsg_type == RTM_NEWLINK
+ || change->nlmsg_type == RTM_DELLINK))
+ {
+ /* Our datapath changed, either adding a new port or deleting an
+ * existing one. */
+ if (!svec_contains(&dpif->changed_ports, change->ifname)) {
+ svec_add(&dpif->changed_ports, change->ifname);
+ svec_sort(&dpif->changed_ports);
+ }
}
+ } else {
+ dpif->change_error = true;
}
}
/* Performs periodic work needed by all the various kinds of dpifs.
*
- * If your program opens any dpifs, it must call this function within its main
- * poll loop. */
+ * If your program opens any dpifs, it must call both this function and
+ * netdev_run() within its main poll loop. */
void
dp_run(void)
{
/* Arranges for poll_block() to wake up when dp_run() needs to be called.
*
- * If your program opens any dpifs, it must call this function within its main
- * poll loop. */
+ * If your program opens any dpifs, it must call both this function and
+ * netdev_wait() within its main poll loop. */
void
dp_wait(void)
{
#include "netdev-linux.h"
+#include <assert.h>
#include <errno.h>
-#include <sys/socket.h>
+#include <fcntl.h>
+#include <arpa/inet.h>
+#include <inttypes.h>
+#include <linux/if_tun.h>
+#include <linux/types.h>
+#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+#include <linux/sockios.h>
+#include <linux/version.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <netpacket/packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_packet.h>
+#include <net/route.h>
+#include <netinet/in.h>
#include <poll.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
#include "coverage.h"
+#include "dynamic-string.h"
+#include "fatal-signal.h"
+#include "netdev-provider.h"
#include "netlink.h"
#include "ofpbuf.h"
+#include "openflow/openflow.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "socket-util.h"
+#include "shash.h"
+#include "svec.h"
#define THIS_MODULE VLM_netdev_linux
#include "vlog.h"
+\f
+/* These were introduced in Linux 2.6.14, so they might be missing if we have
+ * old headers. */
+#ifndef ADVERTISED_Pause
+#define ADVERTISED_Pause (1 << 13)
+#endif
+#ifndef ADVERTISED_Asym_Pause
+#define ADVERTISED_Asym_Pause (1 << 14)
+#endif
+
+struct netdev_linux {
+ struct netdev netdev;
+
+ /* File descriptors. For ordinary network devices, the two fds below are
+ * the same; for tap devices, they differ. */
+ int netdev_fd; /* Network device. */
+ int tap_fd; /* TAP character device, if any, otherwise the
+ * network device. */
+
+ struct netdev_linux_cache *cache;
+};
+
+enum {
+ VALID_IFINDEX = 1 << 0,
+ VALID_ETHERADDR = 1 << 1,
+ VALID_IN4 = 1 << 2,
+ VALID_IN6 = 1 << 3,
+ VALID_MTU = 1 << 4,
+ VALID_CARRIER = 1 << 5
+};
+
+/* Cached network device information. */
+struct netdev_linux_cache {
+ struct shash_node *shash_node;
+ unsigned int valid;
+ int ref_cnt;
+
+ int ifindex;
+ uint8_t etheraddr[ETH_ADDR_LEN];
+ struct in_addr in4;
+ struct in6_addr in6;
+ int mtu;
+ int carrier;
+};
+
+static struct shash cache_map = SHASH_INITIALIZER(&cache_map);
+static struct linux_netdev_notifier netdev_linux_cache_notifier;
+
+/* Policy for RTNLGRP_LINK messages.
+ *
+ * There are *many* more fields in these messages, but currently we only care
+ * about interface names. */
+static const struct nl_policy rtnlgrp_link_policy[] = {
+ [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
+ [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
+ [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true,
+ .min_len = sizeof(struct rtnl_link_stats) },
+};
+
+/* An AF_INET socket (used for ioctl operations). */
+static int af_inet_sock = -1;
+
+struct netdev_linux_notifier {
+ struct netdev_notifier notifier;
+ struct list node;
+};
+
+static struct shash netdev_linux_notifiers =
+ SHASH_INITIALIZER(&netdev_linux_notifiers);
+static struct linux_netdev_notifier netdev_linux_poll_notifier;
+
+/* This is set pretty low because we probably won't learn anything from the
+ * additional log messages. */
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+
+static int netdev_linux_do_ethtool(struct netdev *, struct ethtool_cmd *,
+ int cmd, const char *cmd_name);
+static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *,
+ int cmd, const char *cmd_name);
+static int get_flags(const struct netdev *, int *flagsp);
+static int set_flags(struct netdev *, int flags);
+static int do_get_ifindex(const char *netdev_name);
+static int get_ifindex(const struct netdev *, int *ifindexp);
+static int do_set_addr(struct netdev *netdev,
+ int ioctl_nr, const char *ioctl_name,
+ struct in_addr addr);
+static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]);
+static int set_etheraddr(const char *netdev_name, int hwaddr_family,
+ const uint8_t[ETH_ADDR_LEN]);
+static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats);
+static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats);
+
+static struct netdev_linux *
+netdev_linux_cast(const struct netdev *netdev)
+{
+ netdev_assert_class(netdev, &netdev_linux_class);
+ return CONTAINER_OF(netdev, struct netdev_linux, netdev);
+}
+
+static int
+netdev_linux_init(void)
+{
+ static int status = -1;
+ if (status < 0) {
+ af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0);
+ status = af_inet_sock >= 0 ? 0 : errno;
+ if (status) {
+ VLOG_ERR("failed to create inet socket: %s", strerror(status));
+ }
+ }
+ return status;
+}
+
+static void
+netdev_linux_run(void)
+{
+ linux_netdev_notifier_run();
+}
+
+static void
+netdev_linux_wait(void)
+{
+ linux_netdev_notifier_wait();
+}
+
+static void
+netdev_linux_cache_cb(const struct linux_netdev_change *change,
+ void *aux UNUSED)
+{
+ struct netdev_linux_cache *cache;
+ if (change) {
+ cache = shash_find_data(&cache_map, change->ifname);
+ if (cache) {
+ cache->valid = 0;
+ }
+ } else {
+ struct shash_node *node;
+ SHASH_FOR_EACH (node, &cache_map) {
+ cache = node->data;
+ cache->valid = 0;
+ }
+ }
+}
+
+static int
+netdev_linux_open(const char *name, char *suffix, int ethertype,
+ struct netdev **netdevp)
+{
+ struct netdev_linux *netdev;
+ enum netdev_flags flags;
+ int error;
+
+ /* Allocate network device. */
+ netdev = xcalloc(1, sizeof *netdev);
+ netdev_init(&netdev->netdev, suffix, &netdev_linux_class);
+ netdev->netdev_fd = -1;
+ netdev->tap_fd = -1;
+ netdev->cache = shash_find_data(&cache_map, suffix);
+ if (!netdev->cache) {
+ if (shash_is_empty(&cache_map)) {
+ int error = linux_netdev_notifier_register(
+ &netdev_linux_cache_notifier, netdev_linux_cache_cb, NULL);
+ if (error) {
+ netdev_close(&netdev->netdev);
+ return error;
+ }
+ }
+ netdev->cache = xmalloc(sizeof *netdev->cache);
+ netdev->cache->shash_node = shash_add(&cache_map, suffix,
+ netdev->cache);
+ netdev->cache->valid = 0;
+ netdev->cache->ref_cnt = 0;
+ }
+ netdev->cache->ref_cnt++;
+
+ if (!strncmp(name, "tap:", 4)) {
+ static const char tap_dev[] = "/dev/net/tun";
+ struct ifreq ifr;
+
+ /* Open tap device. */
+ netdev->tap_fd = open(tap_dev, O_RDWR);
+ if (netdev->tap_fd < 0) {
+ error = errno;
+ VLOG_WARN("opening \"%s\" failed: %s", tap_dev, strerror(error));
+ goto error;
+ }
+
+ /* Create tap device. */
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ error = netdev_linux_do_ioctl(&netdev->netdev, &ifr,
+ TUNSETIFF, "TUNSETIFF");
+ if (error) {
+ goto error;
+ }
+
+ /* Make non-blocking. */
+ error = set_nonblocking(netdev->tap_fd);
+ if (error) {
+ goto error;
+ }
+ }
+
+ error = netdev_get_flags(&netdev->netdev, &flags);
+ if (error == ENODEV) {
+ goto error;
+ }
+
+ if (netdev->tap_fd >= 0 || ethertype != NETDEV_ETH_TYPE_NONE) {
+ struct sockaddr_ll sll;
+ int protocol;
+ int ifindex;
+
+ /* Create file descriptor. */
+ protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
+ : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
+ : ethertype);
+ netdev->netdev_fd = socket(PF_PACKET, SOCK_RAW, htons(protocol));
+ if (netdev->netdev_fd < 0) {
+ error = errno;
+ goto error;
+ }
+ if (netdev->tap_fd < 0) {
+ netdev->tap_fd = netdev->netdev_fd;
+ }
+
+ /* Set non-blocking mode. */
+ error = set_nonblocking(netdev->netdev_fd);
+ if (error) {
+ goto error;
+ }
+
+ /* Get ethernet device index. */
+ error = get_ifindex(&netdev->netdev, &ifindex);
+ if (error) {
+ goto error;
+ }
+
+ /* Bind to specific ethernet device. */
+ memset(&sll, 0, sizeof sll);
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = ifindex;
+ if (bind(netdev->netdev_fd,
+ (struct sockaddr *) &sll, sizeof sll) < 0) {
+ error = errno;
+ VLOG_ERR("bind to %s failed: %s", suffix, strerror(error));
+ goto error;
+ }
+
+ /* Between the socket() and bind() calls above, the socket receives all
+ * packets of the requested type on all system interfaces. We do not
+ * want to receive that data, but there is no way to avoid it. So we
+ * must now drain out the receive queue. */
+ error = drain_rcvbuf(netdev->netdev_fd);
+ if (error) {
+ goto error;
+ }
+ }
+
+ *netdevp = &netdev->netdev;
+ return 0;
+
+error:
+ netdev_close(&netdev->netdev);
+ return error;
+}
+
+/* Closes and destroys 'netdev'. */
+static void
+netdev_linux_close(struct netdev *netdev_)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+
+ if (netdev->cache && !--netdev->cache->ref_cnt) {
+ shash_delete(&cache_map, netdev->cache->shash_node);
+ free(netdev->cache);
+
+ if (shash_is_empty(&cache_map)) {
+ linux_netdev_notifier_unregister(&netdev_linux_cache_notifier);
+ }
+ }
+ if (netdev->netdev_fd >= 0) {
+ close(netdev->netdev_fd);
+ }
+ if (netdev->tap_fd >= 0 && netdev->netdev_fd != netdev->tap_fd) {
+ close(netdev->tap_fd);
+ }
+ free(netdev);
+}
+/* Initializes 'svec' with a list of the names of all known network devices. */
+static int
+netdev_linux_enumerate(struct svec *svec)
+{
+ struct if_nameindex *names;
+
+ names = if_nameindex();
+ if (names) {
+ size_t i;
+
+ for (i = 0; names[i].if_name != NULL; i++) {
+ svec_add(svec, names[i].if_name);
+ }
+ if_freenameindex(names);
+ return 0;
+ } else {
+ VLOG_WARN("could not obtain list of network device names: %s",
+ strerror(errno));
+ return errno;
+ }
+}
+
+static int
+netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+
+ if (netdev->tap_fd < 0) {
+ /* Device was opened with NETDEV_ETH_TYPE_NONE. */
+ return EAGAIN;
+ }
+
+ for (;;) {
+ ssize_t retval = read(netdev->tap_fd, data, size);
+ if (retval >= 0) {
+ return retval;
+ } else if (errno != EINTR) {
+ if (errno != EAGAIN) {
+ VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
+ strerror(errno), netdev_get_name(netdev_));
+ }
+ return errno;
+ }
+ }
+}
+
+/* Registers with the poll loop to wake up from the next call to poll_block()
+ * when a packet is ready to be received with netdev_recv() on 'netdev'. */
+static void
+netdev_linux_recv_wait(struct netdev *netdev_)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (netdev->tap_fd >= 0) {
+ poll_fd_wait(netdev->tap_fd, POLLIN);
+ }
+}
+
+/* Discards all packets waiting to be received from 'netdev'. */
+static int
+netdev_linux_drain(struct netdev *netdev_)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) {
+ return 0;
+ } else if (netdev->tap_fd != netdev->netdev_fd) {
+ struct ifreq ifr;
+ int error = netdev_linux_do_ioctl(netdev_, &ifr,
+ SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
+ if (error) {
+ return error;
+ }
+ drain_fd(netdev->tap_fd, ifr.ifr_qlen);
+ return 0;
+ } else {
+ return drain_rcvbuf(netdev->netdev_fd);
+ }
+}
+
+/* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
+ * errno value. Returns EAGAIN without blocking if the packet cannot be queued
+ * immediately. Returns EMSGSIZE if a partial packet was transmitted or if
+ * the packet is too big or too small to transmit on the device.
+ *
+ * The caller retains ownership of 'buffer' in all cases.
+ *
+ * The kernel maintains a packet transmission queue, so the caller is not
+ * expected to do additional queuing of packets. */
+static int
+netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+
+ /* XXX should support sending even if 'ethertype' was NETDEV_ETH_TYPE_NONE.
+ */
+ if (netdev->tap_fd < 0) {
+ return EPIPE;
+ }
+
+ for (;;) {
+ ssize_t retval = write(netdev->tap_fd, data, size);
+ if (retval < 0) {
+ /* The Linux AF_PACKET implementation never blocks waiting for room
+ * for packets, instead returning ENOBUFS. Translate this into
+ * EAGAIN for the caller. */
+ if (errno == ENOBUFS) {
+ return EAGAIN;
+ } else if (errno == EINTR) {
+ continue;
+ } else if (errno != EAGAIN) {
+ VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+ netdev_get_name(netdev_), strerror(errno));
+ }
+ return errno;
+ } else if (retval != size) {
+ VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of "
+ "%zu) on %s", retval, size, netdev_get_name(netdev_));
+ return EMSGSIZE;
+ } else {
+ return 0;
+ }
+ }
+}
+
+/* Registers with the poll loop to wake up from the next call to poll_block()
+ * when the packet transmission queue has sufficient room to transmit a packet
+ * with netdev_send().
+ *
+ * The kernel maintains a packet transmission queue, so the client is not
+ * expected to do additional queuing of packets. Thus, this function is
+ * unlikely to ever be used. It is included for completeness. */
+static void
+netdev_linux_send_wait(struct netdev *netdev_)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) {
+ /* Nothing to do. */
+ } else if (netdev->tap_fd == netdev->netdev_fd) {
+ poll_fd_wait(netdev->tap_fd, POLLOUT);
+ } else {
+ /* TAP device always accepts packets.*/
+ poll_immediate_wake();
+ }
+}
+
+/* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful,
+ * otherwise a positive errno value. */
+static int
+netdev_linux_set_etheraddr(struct netdev *netdev_,
+ const uint8_t mac[ETH_ADDR_LEN])
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ int error = set_etheraddr(netdev_get_name(netdev_), ARPHRD_ETHER, mac);
+ if (!error) {
+ memcpy(netdev->cache->etheraddr, mac, ETH_ADDR_LEN);
+ }
+ return error;
+}
+
+/* Returns a pointer to 'netdev''s MAC address. The caller must not modify or
+ * free the returned buffer. */
+static int
+netdev_linux_get_etheraddr(const struct netdev *netdev_,
+ uint8_t mac[ETH_ADDR_LEN])
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (!(netdev->cache->valid & VALID_ETHERADDR)) {
+ int error = get_etheraddr(netdev_get_name(netdev_),
+ netdev->cache->etheraddr);
+ if (error) {
+ return error;
+ }
+ netdev->cache->valid |= VALID_ETHERADDR;
+ }
+ memcpy(mac, netdev->cache->etheraddr, ETH_ADDR_LEN);
+ return 0;
+}
+
+/* Returns the maximum size of transmitted (and received) packets on 'netdev',
+ * in bytes, not including the hardware header; thus, this is typically 1500
+ * bytes for Ethernet devices. */
+static int
+netdev_linux_get_mtu(const struct netdev *netdev_, int *mtup)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (!(netdev->cache->valid & VALID_MTU)) {
+ struct ifreq ifr;
+ int error;
+
+ error = netdev_linux_do_ioctl(netdev_, &ifr, SIOCGIFMTU, "SIOCGIFMTU");
+ if (error) {
+ return error;
+ }
+ netdev->cache->mtu = ifr.ifr_mtu;
+ netdev->cache->valid |= VALID_MTU;
+ }
+ *mtup = netdev->cache->mtu;
+ return 0;
+}
+
+static int
+netdev_linux_get_carrier(const struct netdev *netdev_, bool *carrier)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ int error = 0;
+ char *fn = NULL;
+ int fd = -1;
+
+ if (!(netdev->cache->valid & VALID_CARRIER)) {
+ char line[8];
+ int retval;
+
+ fn = xasprintf("/sys/class/net/%s/carrier", netdev_get_name(netdev_));
+ fd = open(fn, O_RDONLY);
+ if (fd < 0) {
+ error = errno;
+ VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(error));
+ goto exit;
+ }
+
+ retval = read(fd, line, sizeof line);
+ if (retval < 0) {
+ error = errno;
+ if (error == EINVAL) {
+ /* This is the normal return value when we try to check carrier
+ * if the network device is not up. */
+ } else {
+ VLOG_WARN_RL(&rl, "%s: read failed: %s", fn, strerror(error));
+ }
+ goto exit;
+ } else if (retval == 0) {
+ error = EPROTO;
+ VLOG_WARN_RL(&rl, "%s: unexpected end of file", fn);
+ goto exit;
+ }
+
+ if (line[0] != '0' && line[0] != '1') {
+ error = EPROTO;
+ VLOG_WARN_RL(&rl, "%s: value is %c (expected 0 or 1)",
+ fn, line[0]);
+ goto exit;
+ }
+ netdev->cache->carrier = line[0] != '0';
+ netdev->cache->valid |= VALID_CARRIER;
+ }
+ *carrier = netdev->cache->carrier;
+ error = 0;
+
+exit:
+ if (fd >= 0) {
+ close(fd);
+ }
+ free(fn);
+ return error;
+}
+
+/* Check whether we can we use RTM_GETLINK to get network device statistics.
+ * In pre-2.6.19 kernels, this was only available if wireless extensions were
+ * enabled. */
+static bool
+check_for_working_netlink_stats(void)
+{
+ /* Decide on the netdev_get_stats() implementation to use. Netlink is
+ * preferable, so if that works, we'll use it. */
+ int ifindex = do_get_ifindex("lo");
+ if (ifindex < 0) {
+ VLOG_WARN("failed to get ifindex for lo, "
+ "obtaining netdev stats from proc");
+ return false;
+ } else {
+ struct netdev_stats stats;
+ int error = get_stats_via_netlink(ifindex, &stats);
+ if (!error) {
+ VLOG_DBG("obtaining netdev stats via rtnetlink");
+ return true;
+ } else {
+ VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats "
+ "via proc (you are probably running a pre-2.6.19 "
+ "kernel)", strerror(error));
+ return false;
+ }
+ }
+}
+
+/* Retrieves current device stats for 'netdev'.
+ *
+ * XXX All of the members of struct netdev_stats are 64 bits wide, but on
+ * 32-bit architectures the Linux network stats are only 32 bits. */
+static int
+netdev_linux_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
+{
+ static int use_netlink_stats = -1;
+ int error;
+
+ COVERAGE_INC(netdev_get_stats);
+ if (use_netlink_stats < 0) {
+ use_netlink_stats = check_for_working_netlink_stats();
+ }
+ if (use_netlink_stats) {
+ int ifindex;
+
+ error = get_ifindex(netdev, &ifindex);
+ if (!error) {
+ error = get_stats_via_netlink(ifindex, stats);
+ }
+ } else {
+ error = get_stats_via_proc(netdev->name, stats);
+ }
+ return error;
+}
+
+/* Stores the features supported by 'netdev' into each of '*current',
+ * '*advertised', '*supported', and '*peer' that are non-null. Each value is a
+ * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if
+ * successful, otherwise a positive errno value. On failure, all of the
+ * passed-in values are set to 0. */
+static int
+netdev_linux_get_features(struct netdev *netdev,
+ uint32_t *current, uint32_t *advertised,
+ uint32_t *supported, uint32_t *peer)
+{
+ struct ethtool_cmd ecmd;
+ int error;
+
+ memset(&ecmd, 0, sizeof ecmd);
+ error = netdev_linux_do_ethtool(netdev, &ecmd,
+ ETHTOOL_GSET, "ETHTOOL_GSET");
+ if (error) {
+ return error;
+ }
+
+ /* Supported features. */
+ *supported = 0;
+ if (ecmd.supported & SUPPORTED_10baseT_Half) {
+ *supported |= OFPPF_10MB_HD;
+ }
+ if (ecmd.supported & SUPPORTED_10baseT_Full) {
+ *supported |= OFPPF_10MB_FD;
+ }
+ if (ecmd.supported & SUPPORTED_100baseT_Half) {
+ *supported |= OFPPF_100MB_HD;
+ }
+ if (ecmd.supported & SUPPORTED_100baseT_Full) {
+ *supported |= OFPPF_100MB_FD;
+ }
+ if (ecmd.supported & SUPPORTED_1000baseT_Half) {
+ *supported |= OFPPF_1GB_HD;
+ }
+ if (ecmd.supported & SUPPORTED_1000baseT_Full) {
+ *supported |= OFPPF_1GB_FD;
+ }
+ if (ecmd.supported & SUPPORTED_10000baseT_Full) {
+ *supported |= OFPPF_10GB_FD;
+ }
+ if (ecmd.supported & SUPPORTED_TP) {
+ *supported |= OFPPF_COPPER;
+ }
+ if (ecmd.supported & SUPPORTED_FIBRE) {
+ *supported |= OFPPF_FIBER;
+ }
+ if (ecmd.supported & SUPPORTED_Autoneg) {
+ *supported |= OFPPF_AUTONEG;
+ }
+ if (ecmd.supported & SUPPORTED_Pause) {
+ *supported |= OFPPF_PAUSE;
+ }
+ if (ecmd.supported & SUPPORTED_Asym_Pause) {
+ *supported |= OFPPF_PAUSE_ASYM;
+ }
+
+ /* Advertised features. */
+ *advertised = 0;
+ if (ecmd.advertising & ADVERTISED_10baseT_Half) {
+ *advertised |= OFPPF_10MB_HD;
+ }
+ if (ecmd.advertising & ADVERTISED_10baseT_Full) {
+ *advertised |= OFPPF_10MB_FD;
+ }
+ if (ecmd.advertising & ADVERTISED_100baseT_Half) {
+ *advertised |= OFPPF_100MB_HD;
+ }
+ if (ecmd.advertising & ADVERTISED_100baseT_Full) {
+ *advertised |= OFPPF_100MB_FD;
+ }
+ if (ecmd.advertising & ADVERTISED_1000baseT_Half) {
+ *advertised |= OFPPF_1GB_HD;
+ }
+ if (ecmd.advertising & ADVERTISED_1000baseT_Full) {
+ *advertised |= OFPPF_1GB_FD;
+ }
+ if (ecmd.advertising & ADVERTISED_10000baseT_Full) {
+ *advertised |= OFPPF_10GB_FD;
+ }
+ if (ecmd.advertising & ADVERTISED_TP) {
+ *advertised |= OFPPF_COPPER;
+ }
+ if (ecmd.advertising & ADVERTISED_FIBRE) {
+ *advertised |= OFPPF_FIBER;
+ }
+ if (ecmd.advertising & ADVERTISED_Autoneg) {
+ *advertised |= OFPPF_AUTONEG;
+ }
+ if (ecmd.advertising & ADVERTISED_Pause) {
+ *advertised |= OFPPF_PAUSE;
+ }
+ if (ecmd.advertising & ADVERTISED_Asym_Pause) {
+ *advertised |= OFPPF_PAUSE_ASYM;
+ }
+
+ /* Current settings. */
+ if (ecmd.speed == SPEED_10) {
+ *current = ecmd.duplex ? OFPPF_10MB_FD : OFPPF_10MB_HD;
+ } else if (ecmd.speed == SPEED_100) {
+ *current = ecmd.duplex ? OFPPF_100MB_FD : OFPPF_100MB_HD;
+ } else if (ecmd.speed == SPEED_1000) {
+ *current = ecmd.duplex ? OFPPF_1GB_FD : OFPPF_1GB_HD;
+ } else if (ecmd.speed == SPEED_10000) {
+ *current = OFPPF_10GB_FD;
+ } else {
+ *current = 0;
+ }
+
+ if (ecmd.port == PORT_TP) {
+ *current |= OFPPF_COPPER;
+ } else if (ecmd.port == PORT_FIBRE) {
+ *current |= OFPPF_FIBER;
+ }
+
+ if (ecmd.autoneg) {
+ *current |= OFPPF_AUTONEG;
+ }
+
+ /* Peer advertisements. */
+ *peer = 0; /* XXX */
+
+ return 0;
+}
+
+/* Set the features advertised by 'netdev' to 'advertise'. */
+static int
+netdev_linux_set_advertisements(struct netdev *netdev, uint32_t advertise)
+{
+ struct ethtool_cmd ecmd;
+ int error;
+
+ memset(&ecmd, 0, sizeof ecmd);
+ error = netdev_linux_do_ethtool(netdev, &ecmd,
+ ETHTOOL_GSET, "ETHTOOL_GSET");
+ if (error) {
+ return error;
+ }
+
+ ecmd.advertising = 0;
+ if (advertise & OFPPF_10MB_HD) {
+ ecmd.advertising |= ADVERTISED_10baseT_Half;
+ }
+ if (advertise & OFPPF_10MB_FD) {
+ ecmd.advertising |= ADVERTISED_10baseT_Full;
+ }
+ if (advertise & OFPPF_100MB_HD) {
+ ecmd.advertising |= ADVERTISED_100baseT_Half;
+ }
+ if (advertise & OFPPF_100MB_FD) {
+ ecmd.advertising |= ADVERTISED_100baseT_Full;
+ }
+ if (advertise & OFPPF_1GB_HD) {
+ ecmd.advertising |= ADVERTISED_1000baseT_Half;
+ }
+ if (advertise & OFPPF_1GB_FD) {
+ ecmd.advertising |= ADVERTISED_1000baseT_Full;
+ }
+ if (advertise & OFPPF_10GB_FD) {
+ ecmd.advertising |= ADVERTISED_10000baseT_Full;
+ }
+ if (advertise & OFPPF_COPPER) {
+ ecmd.advertising |= ADVERTISED_TP;
+ }
+ if (advertise & OFPPF_FIBER) {
+ ecmd.advertising |= ADVERTISED_FIBRE;
+ }
+ if (advertise & OFPPF_AUTONEG) {
+ ecmd.advertising |= ADVERTISED_Autoneg;
+ }
+ if (advertise & OFPPF_PAUSE) {
+ ecmd.advertising |= ADVERTISED_Pause;
+ }
+ if (advertise & OFPPF_PAUSE_ASYM) {
+ ecmd.advertising |= ADVERTISED_Asym_Pause;
+ }
+ return netdev_linux_do_ethtool(netdev, &ecmd,
+ ETHTOOL_SSET, "ETHTOOL_SSET");
+}
+
+/* If 'netdev_name' is the name of a VLAN network device (e.g. one created with
+ * vconfig(8)), sets '*vlan_vid' to the VLAN VID associated with that device
+ * and returns 0. Otherwise returns a errno value (specifically ENOENT if
+ * 'netdev_name' is the name of a network device that is not a VLAN device) and
+ * sets '*vlan_vid' to -1. */
+static int
+netdev_linux_get_vlan_vid(const struct netdev *netdev, int *vlan_vid)
+{
+ const char *netdev_name = netdev_get_name(netdev);
+ struct ds line = DS_EMPTY_INITIALIZER;
+ FILE *stream = NULL;
+ int error;
+ char *fn;
+
+ COVERAGE_INC(netdev_get_vlan_vid);
+ fn = xasprintf("/proc/net/vlan/%s", netdev_name);
+ stream = fopen(fn, "r");
+ if (!stream) {
+ error = errno;
+ goto done;
+ }
+
+ if (ds_get_line(&line, stream)) {
+ if (ferror(stream)) {
+ error = errno;
+ VLOG_ERR_RL(&rl, "error reading \"%s\": %s", fn, strerror(errno));
+ } else {
+ error = EPROTO;
+ VLOG_ERR_RL(&rl, "unexpected end of file reading \"%s\"", fn);
+ }
+ goto done;
+ }
+
+ if (!sscanf(ds_cstr(&line), "%*s VID: %d", vlan_vid)) {
+ error = EPROTO;
+ VLOG_ERR_RL(&rl, "parse error reading \"%s\" line 1: \"%s\"",
+ fn, ds_cstr(&line));
+ goto done;
+ }
+
+ error = 0;
+
+done:
+ free(fn);
+ if (stream) {
+ fclose(stream);
+ }
+ ds_destroy(&line);
+ if (error) {
+ *vlan_vid = -1;
+ }
+ return error;
+}
+
+#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress"
+#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1"
+/* We redirect stderr to /dev/null because we often want to remove all
+ * traffic control configuration on a port so its in a known state. If
+ * this done when there is no such configuration, tc complains, so we just
+ * always ignore it.
+ */
+#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null"
+
+/* Attempts to set input rate limiting (policing) policy. */
+static int
+netdev_linux_set_policing(struct netdev *netdev,
+ uint32_t kbits_rate, uint32_t kbits_burst)
+{
+ const char *netdev_name = netdev_get_name(netdev);
+ char command[1024];
+
+ COVERAGE_INC(netdev_set_policing);
+ if (kbits_rate) {
+ if (!kbits_burst) {
+ /* Default to 10 kilobits if not specified. */
+ kbits_burst = 10;
+ }
+
+ /* xxx This should be more careful about only adding if it
+ * xxx actually exists, as opposed to always deleting it. */
+ snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
+ if (system(command) == -1) {
+ VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
+ }
+
+ snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name);
+ if (system(command) != 0) {
+ VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name);
+ return -1;
+ }
+
+ snprintf(command, sizeof(command), POLICE_CONFIG_CMD, netdev_name,
+ kbits_rate, kbits_burst);
+ if (system(command) != 0) {
+ VLOG_WARN_RL(&rl, "%s: problem configuring policing",
+ netdev_name);
+ return -1;
+ }
+ } else {
+ snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
+ if (system(command) == -1) {
+ VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
+ }
+ }
+
+ return 0;
+}
+
+/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
+ * 'in4' is non-null) and returns true. Otherwise, returns false. */
+static int
+netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (!(netdev->cache->valid & VALID_IN4)) {
+ const struct sockaddr_in *sin;
+ struct ifreq ifr;
+ int error;
+
+ ifr.ifr_addr.sa_family = AF_INET;
+ error = netdev_linux_do_ioctl(netdev_, &ifr,
+ SIOCGIFADDR, "SIOCGIFADDR");
+ if (error) {
+ return error;
+ }
+
+ sin = (struct sockaddr_in *) &ifr.ifr_addr;
+ netdev->cache->in4 = sin->sin_addr;
+ netdev->cache->valid |= VALID_IN4;
+ }
+ *in4 = netdev->cache->in4;
+ return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0;
+}
+
+/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If
+ * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a
+ * positive errno value. */
+static int
+netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr,
+ struct in_addr mask)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ int error;
+
+ error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr);
+ if (!error) {
+ netdev->cache->valid |= VALID_IN4;
+ netdev->cache->in4 = addr;
+ if (addr.s_addr != INADDR_ANY) {
+ error = do_set_addr(netdev_, SIOCSIFNETMASK,
+ "SIOCSIFNETMASK", mask);
+ }
+ }
+ return error;
+}
+
+static bool
+parse_if_inet6_line(const char *line,
+ struct in6_addr *in6, char ifname[16 + 1])
+{
+ uint8_t *s6 = in6->s6_addr;
+#define X8 "%2"SCNx8
+ return sscanf(line,
+ " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8
+ "%*x %*x %*x %*x %16s\n",
+ &s6[0], &s6[1], &s6[2], &s6[3],
+ &s6[4], &s6[5], &s6[6], &s6[7],
+ &s6[8], &s6[9], &s6[10], &s6[11],
+ &s6[12], &s6[13], &s6[14], &s6[15],
+ ifname) == 17;
+}
+
+/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if
+ * 'in6' is non-null) and returns true. Otherwise, returns false. */
+static int
+netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ if (!(netdev->cache->valid & VALID_IN6)) {
+ FILE *file;
+ char line[128];
+
+ netdev->cache->in6 = in6addr_any;
+
+ file = fopen("/proc/net/if_inet6", "r");
+ if (file != NULL) {
+ const char *name = netdev_get_name(netdev_);
+ while (fgets(line, sizeof line, file)) {
+ struct in6_addr in6;
+ char ifname[16 + 1];
+ if (parse_if_inet6_line(line, &in6, ifname)
+ && !strcmp(name, ifname))
+ {
+ netdev->cache->in6 = in6;
+ break;
+ }
+ }
+ fclose(file);
+ }
+ netdev->cache->valid |= VALID_IN6;
+ }
+ *in6 = netdev->cache->in6;
+ return 0;
+}
+
+static void
+make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr)
+{
+ struct sockaddr_in sin;
+ memset(&sin, 0, sizeof sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr = addr;
+ sin.sin_port = 0;
+
+ memset(sa, 0, sizeof *sa);
+ memcpy(sa, &sin, sizeof sin);
+}
+
+static int
+do_set_addr(struct netdev *netdev,
+ int ioctl_nr, const char *ioctl_name, struct in_addr addr)
+{
+ struct ifreq ifr;
+ strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
+ make_in4_sockaddr(&ifr.ifr_addr, addr);
+ return netdev_linux_do_ioctl(netdev, &ifr, ioctl_nr, ioctl_name);
+}
+
+/* Adds 'router' as a default IP gateway. */
+static int
+netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router)
+{
+ struct in_addr any = { INADDR_ANY };
+ struct rtentry rt;
+ int error;
+
+ memset(&rt, 0, sizeof rt);
+ make_in4_sockaddr(&rt.rt_dst, any);
+ make_in4_sockaddr(&rt.rt_gateway, router);
+ make_in4_sockaddr(&rt.rt_genmask, any);
+ rt.rt_flags = RTF_UP | RTF_GATEWAY;
+ COVERAGE_INC(netdev_add_router);
+ error = ioctl(af_inet_sock, SIOCADDRT, &rt) < 0 ? errno : 0;
+ if (error) {
+ VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error));
+ }
+ return error;
+}
+
+/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
+ * successfully retrieved, it stores the corresponding MAC address in 'mac' and
+ * returns 0. Otherwise, it returns a positive errno value; in particular,
+ * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */
+static int
+netdev_linux_arp_lookup(const struct netdev *netdev,
+ uint32_t ip, uint8_t mac[ETH_ADDR_LEN])
+{
+ struct arpreq r;
+ struct sockaddr_in *pa;
+ int retval;
+
+ memset(&r, 0, sizeof r);
+ pa = (struct sockaddr_in *) &r.arp_pa;
+ pa->sin_family = AF_INET;
+ pa->sin_addr.s_addr = ip;
+ pa->sin_port = 0;
+ r.arp_ha.sa_family = ARPHRD_ETHER;
+ r.arp_flags = 0;
+ strncpy(r.arp_dev, netdev->name, sizeof r.arp_dev);
+ COVERAGE_INC(netdev_arp_lookup);
+ retval = ioctl(af_inet_sock, SIOCGARP, &r) < 0 ? errno : 0;
+ if (!retval) {
+ memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN);
+ } else if (retval != ENXIO) {
+ VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s",
+ netdev->name, IP_ARGS(&ip), strerror(retval));
+ }
+ return retval;
+}
+
+static int
+nd_to_iff_flags(enum netdev_flags nd)
+{
+ int iff = 0;
+ if (nd & NETDEV_UP) {
+ iff |= IFF_UP;
+ }
+ if (nd & NETDEV_PROMISC) {
+ iff |= IFF_PROMISC;
+ }
+ return iff;
+}
+
+static int
+iff_to_nd_flags(int iff)
+{
+ enum netdev_flags nd = 0;
+ if (iff & IFF_UP) {
+ nd |= NETDEV_UP;
+ }
+ if (iff & IFF_PROMISC) {
+ nd |= NETDEV_PROMISC;
+ }
+ return nd;
+}
+
+static int
+netdev_linux_update_flags(struct netdev *netdev, enum netdev_flags off,
+ enum netdev_flags on, enum netdev_flags *old_flagsp)
+{
+ int old_flags, new_flags;
+ int error;
+
+ error = get_flags(netdev, &old_flags);
+ if (!error) {
+ *old_flagsp = iff_to_nd_flags(old_flags);
+ new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on);
+ if (new_flags != old_flags) {
+ error = set_flags(netdev, new_flags);
+ }
+ }
+ return error;
+}
+
+static void
+poll_notify(struct list *list)
+{
+ struct netdev_linux_notifier *notifier;
+ LIST_FOR_EACH (notifier, struct netdev_linux_notifier, node, list) {
+ struct netdev_notifier *n = ¬ifier->notifier;
+ n->cb(n);
+ }
+}
+
+static void
+netdev_linux_poll_cb(const struct linux_netdev_change *change,
+ void *aux UNUSED)
+{
+ if (change) {
+ struct list *list = shash_find_data(&netdev_linux_notifiers,
+ change->ifname);
+ if (list) {
+ poll_notify(list);
+ }
+ } else {
+ struct shash_node *node;
+ SHASH_FOR_EACH (node, &netdev_linux_notifiers) {
+ poll_notify(node->data);
+ }
+ }
+}
+
+static int
+netdev_linux_poll_add(struct netdev *netdev,
+ void (*cb)(struct netdev_notifier *), void *aux,
+ struct netdev_notifier **notifierp)
+{
+ const char *netdev_name = netdev_get_name(netdev);
+ struct netdev_linux_notifier *notifier;
+ struct list *list;
+
+ if (shash_is_empty(&netdev_linux_notifiers)) {
+ int error = linux_netdev_notifier_register(&netdev_linux_poll_notifier,
+ netdev_linux_poll_cb, NULL);
+ if (error) {
+ return error;
+ }
+ }
+
+ list = shash_find_data(&netdev_linux_notifiers, netdev_name);
+ if (!list) {
+ list = xmalloc(sizeof *list);
+ list_init(list);
+ shash_add(&netdev_linux_notifiers, netdev_name, list);
+ }
+
+ notifier = xmalloc(sizeof *notifier);
+ netdev_notifier_init(¬ifier->notifier, netdev, cb, aux);
+ list_push_back(list, ¬ifier->node);
+ *notifierp = ¬ifier->notifier;
+ return 0;
+}
+
+static void
+netdev_linux_poll_remove(struct netdev_notifier *notifier_)
+{
+ struct netdev_linux_notifier *notifier =
+ CONTAINER_OF(notifier_, struct netdev_linux_notifier, notifier);
+ struct list *list;
+
+ /* Remove 'notifier' from its list. */
+ list = list_remove(¬ifier->node);
+ if (list_is_empty(list)) {
+ /* The list is now empty. Remove it from the hash and free it. */
+ const char *netdev_name = netdev_get_name(notifier->notifier.netdev);
+ shash_delete(&netdev_linux_notifiers,
+ shash_find(&netdev_linux_notifiers, netdev_name));
+ free(list);
+ }
+ free(notifier);
+
+ /* If that was the last notifier, unregister. */
+ if (shash_is_empty(&netdev_linux_notifiers)) {
+ linux_netdev_notifier_unregister(&netdev_linux_poll_notifier);
+ }
+}
+
+const struct netdev_class netdev_linux_class = {
+ "", /* prefix */
+ "linux", /* name */
+
+ netdev_linux_init,
+ netdev_linux_run,
+ netdev_linux_wait,
+
+ netdev_linux_open,
+ netdev_linux_close,
+
+ netdev_linux_enumerate,
+
+ netdev_linux_recv,
+ netdev_linux_recv_wait,
+ netdev_linux_drain,
+
+ netdev_linux_send,
+ netdev_linux_send_wait,
+
+ netdev_linux_set_etheraddr,
+ netdev_linux_get_etheraddr,
+ netdev_linux_get_mtu,
+ netdev_linux_get_carrier,
+ netdev_linux_get_stats,
+
+ netdev_linux_get_features,
+ netdev_linux_set_advertisements,
+ netdev_linux_get_vlan_vid,
+ netdev_linux_set_policing,
+
+ netdev_linux_get_in4,
+ netdev_linux_set_in4,
+ netdev_linux_get_in6,
+ netdev_linux_add_router,
+ netdev_linux_arp_lookup,
+
+ netdev_linux_update_flags,
+
+ netdev_linux_poll_add,
+ netdev_linux_poll_remove,
+};
+
+const struct netdev_class netdev_tap_class = {
+ "tap", /* prefix */
+ "tap", /* name */
+
+ netdev_linux_init,
+ NULL, /* run */
+ NULL, /* wait */
+
+ netdev_linux_open,
+ netdev_linux_close,
+
+ netdev_linux_enumerate,
+
+ netdev_linux_recv,
+ netdev_linux_recv_wait,
+ netdev_linux_drain,
+
+ netdev_linux_send,
+ netdev_linux_send_wait,
+
+ netdev_linux_set_etheraddr,
+ netdev_linux_get_etheraddr,
+ netdev_linux_get_mtu,
+ netdev_linux_get_carrier,
+ netdev_linux_get_stats,
+
+ netdev_linux_get_features,
+ netdev_linux_set_advertisements,
+ netdev_linux_get_vlan_vid,
+ netdev_linux_set_policing,
+
+ netdev_linux_get_in4,
+ netdev_linux_set_in4,
+ netdev_linux_get_in6,
+ netdev_linux_add_router,
+ netdev_linux_arp_lookup,
+
+ netdev_linux_update_flags,
+
+ netdev_linux_poll_add,
+ netdev_linux_poll_remove,
+};
+\f
+static int
+get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
+{
+ static struct nl_sock *rtnl_sock;
+ struct ofpbuf request;
+ struct ofpbuf *reply;
+ struct ifinfomsg *ifi;
+ const struct rtnl_link_stats *rtnl_stats;
+ struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
+ int error;
+
+ if (!rtnl_sock) {
+ error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock);
+ if (error) {
+ VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
+ strerror(error));
+ return error;
+ }
+ }
+
+ ofpbuf_init(&request, 0);
+ nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *ifi,
+ RTM_GETLINK, NLM_F_REQUEST);
+ ifi = ofpbuf_put_zeros(&request, sizeof *ifi);
+ ifi->ifi_family = PF_UNSPEC;
+ ifi->ifi_index = ifindex;
+ error = nl_sock_transact(rtnl_sock, &request, &reply);
+ ofpbuf_uninit(&request);
+ if (error) {
+ return error;
+ }
+
+ if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
+ rtnlgrp_link_policy,
+ attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
+ ofpbuf_delete(reply);
+ return EPROTO;
+ }
+
+ if (!attrs[IFLA_STATS]) {
+ VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats");
+ return EPROTO;
+ }
+
+ rtnl_stats = nl_attr_get(attrs[IFLA_STATS]);
+ stats->rx_packets = rtnl_stats->rx_packets;
+ stats->tx_packets = rtnl_stats->tx_packets;
+ stats->rx_bytes = rtnl_stats->rx_bytes;
+ stats->tx_bytes = rtnl_stats->tx_bytes;
+ stats->rx_errors = rtnl_stats->rx_errors;
+ stats->tx_errors = rtnl_stats->tx_errors;
+ stats->rx_dropped = rtnl_stats->rx_dropped;
+ stats->tx_dropped = rtnl_stats->tx_dropped;
+ stats->multicast = rtnl_stats->multicast;
+ stats->collisions = rtnl_stats->collisions;
+ stats->rx_length_errors = rtnl_stats->rx_length_errors;
+ stats->rx_over_errors = rtnl_stats->rx_over_errors;
+ stats->rx_crc_errors = rtnl_stats->rx_crc_errors;
+ stats->rx_frame_errors = rtnl_stats->rx_frame_errors;
+ stats->rx_fifo_errors = rtnl_stats->rx_fifo_errors;
+ stats->rx_missed_errors = rtnl_stats->rx_missed_errors;
+ stats->tx_aborted_errors = rtnl_stats->tx_aborted_errors;
+ stats->tx_carrier_errors = rtnl_stats->tx_carrier_errors;
+ stats->tx_fifo_errors = rtnl_stats->tx_fifo_errors;
+ stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors;
+ stats->tx_window_errors = rtnl_stats->tx_window_errors;
+
+ return 0;
+}
+
+static int
+get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats)
+{
+ static const char fn[] = "/proc/net/dev";
+ char line[1024];
+ FILE *stream;
+ int ln;
+
+ stream = fopen(fn, "r");
+ if (!stream) {
+ VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno));
+ return errno;
+ }
+
+ ln = 0;
+ while (fgets(line, sizeof line, stream)) {
+ if (++ln >= 3) {
+ char devname[16];
+#define X64 "%"SCNu64
+ if (sscanf(line,
+ " %15[^:]:"
+ X64 X64 X64 X64 X64 X64 X64 "%*u"
+ X64 X64 X64 X64 X64 X64 X64 "%*u",
+ devname,
+ &stats->rx_bytes,
+ &stats->rx_packets,
+ &stats->rx_errors,
+ &stats->rx_dropped,
+ &stats->rx_fifo_errors,
+ &stats->rx_frame_errors,
+ &stats->multicast,
+ &stats->tx_bytes,
+ &stats->tx_packets,
+ &stats->tx_errors,
+ &stats->tx_dropped,
+ &stats->tx_fifo_errors,
+ &stats->collisions,
+ &stats->tx_carrier_errors) != 15) {
+ VLOG_WARN_RL(&rl, "%s:%d: parse error", fn, ln);
+ } else if (!strcmp(devname, netdev_name)) {
+ stats->rx_length_errors = UINT64_MAX;
+ stats->rx_over_errors = UINT64_MAX;
+ stats->rx_crc_errors = UINT64_MAX;
+ stats->rx_missed_errors = UINT64_MAX;
+ stats->tx_aborted_errors = UINT64_MAX;
+ stats->tx_heartbeat_errors = UINT64_MAX;
+ stats->tx_window_errors = UINT64_MAX;
+ fclose(stream);
+ return 0;
+ }
+ }
+ }
+ VLOG_WARN_RL(&rl, "%s: no stats for %s", fn, netdev_name);
+ fclose(stream);
+ return ENODEV;
+}
+\f
+static int
+get_flags(const struct netdev *netdev, int *flags)
+{
+ struct ifreq ifr;
+ int error;
+
+ error = netdev_linux_do_ioctl(netdev, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS");
+ *flags = ifr.ifr_flags;
+ return error;
+}
+
+static int
+set_flags(struct netdev *netdev, int flags)
+{
+ struct ifreq ifr;
+
+ ifr.ifr_flags = flags;
+ return netdev_linux_do_ioctl(netdev, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS");
+}
+
+static int
+do_get_ifindex(const char *netdev_name)
+{
+ struct ifreq ifr;
+
+ strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
+ COVERAGE_INC(netdev_get_ifindex);
+ if (ioctl(af_inet_sock, SIOCGIFINDEX, &ifr) < 0) {
+ VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s",
+ netdev_name, strerror(errno));
+ return -errno;
+ }
+ return ifr.ifr_ifindex;
+}
+
+static int
+get_ifindex(const struct netdev *netdev_, int *ifindexp)
+{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+ *ifindexp = 0;
+ if (!(netdev->cache->valid & VALID_IFINDEX)) {
+ int ifindex = do_get_ifindex(netdev_get_name(netdev_));
+ if (ifindex < 0) {
+ return -ifindex;
+ }
+ netdev->cache->valid |= VALID_IFINDEX;
+ netdev->cache->ifindex = ifindex;
+ }
+ *ifindexp = netdev->cache->ifindex;
+ return 0;
+}
+
+static int
+get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN])
+{
+ struct ifreq ifr;
+ int hwaddr_family;
+
+ memset(&ifr, 0, sizeof ifr);
+ strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
+ COVERAGE_INC(netdev_get_hwaddr);
+ if (ioctl(af_inet_sock, SIOCGIFHWADDR, &ifr) < 0) {
+ VLOG_ERR("ioctl(SIOCGIFHWADDR) on %s device failed: %s",
+ netdev_name, strerror(errno));
+ return errno;
+ }
+ hwaddr_family = ifr.ifr_hwaddr.sa_family;
+ if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
+ VLOG_WARN("%s device has unknown hardware address family %d",
+ netdev_name, hwaddr_family);
+ }
+ memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN);
+ return 0;
+}
+
+static int
+set_etheraddr(const char *netdev_name, int hwaddr_family,
+ const uint8_t mac[ETH_ADDR_LEN])
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof ifr);
+ strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
+ ifr.ifr_hwaddr.sa_family = hwaddr_family;
+ memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ADDR_LEN);
+ COVERAGE_INC(netdev_set_hwaddr);
+ if (ioctl(af_inet_sock, SIOCSIFHWADDR, &ifr) < 0) {
+ VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s",
+ netdev_name, strerror(errno));
+ return errno;
+ }
+ return 0;
+}
+
+static int
+netdev_linux_do_ethtool(struct netdev *netdev, struct ethtool_cmd *ecmd,
+ int cmd, const char *cmd_name)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof ifr);
+ strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
+ ifr.ifr_data = (caddr_t) ecmd;
+
+ ecmd->cmd = cmd;
+ COVERAGE_INC(netdev_ethtool);
+ if (ioctl(af_inet_sock, SIOCETHTOOL, &ifr) == 0) {
+ return 0;
+ } else {
+ if (errno != EOPNOTSUPP) {
+ VLOG_WARN_RL(&rl, "ethtool command %s on network device %s "
+ "failed: %s", cmd_name, netdev->name,
+ strerror(errno));
+ } else {
+ /* The device doesn't support this operation. That's pretty
+ * common, so there's no point in logging anything. */
+ }
+ return errno;
+ }
+}
+
+static int
+netdev_linux_do_ioctl(const struct netdev *netdev, struct ifreq *ifr,
+ int cmd, const char *cmd_name)
+{
+ strncpy(ifr->ifr_name, netdev_get_name(netdev), sizeof ifr->ifr_name);
+ if (ioctl(af_inet_sock, cmd, ifr) == -1) {
+ VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s",
+ netdev_get_name(netdev), cmd_name, strerror(errno));
+ return errno;
+ }
+ return 0;
+}
+\f
/* rtnetlink socket. */
-static struct nl_sock *rtnl_sock;
+static struct nl_sock *notify_sock;
/* All registered notifiers. */
static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers);
static void linux_netdev_report_change(const struct nlmsghdr *,
const struct ifinfomsg *,
struct nlattr *attrs[]);
-static void linux_netdev_report_notify_error(int error);
+static void linux_netdev_report_notify_error(void);
int
linux_netdev_notifier_register(struct linux_netdev_notifier *notifier,
linux_netdev_notify_func *cb, void *aux)
{
- if (!rtnl_sock) {
+ if (!notify_sock) {
int error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0,
- &rtnl_sock);
+ ¬ify_sock);
if (error) {
VLOG_WARN("could not create rtnetlink socket: %s",
strerror(error));
}
list_push_back(&all_notifiers, ¬ifier->node);
- notifier->error = 0;
notifier->cb = cb;
notifier->aux = aux;
return 0;
{
list_remove(¬ifier->node);
if (list_is_empty(&all_notifiers)) {
- nl_sock_destroy(rtnl_sock);
- rtnl_sock = NULL;
+ nl_sock_destroy(notify_sock);
+ notify_sock = NULL;
}
}
-int
-linux_netdev_notifier_get_error(struct linux_netdev_notifier *notifier)
-{
- int error = notifier->error;
- notifier->error = 0;
- return error;
-}
-
-int
-linux_netdev_notifier_peek_error(const struct linux_netdev_notifier *notifier)
-{
- return notifier->error;
-}
-
-static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING },
- [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
-};
-
void
linux_netdev_notifier_run(void)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- if (!rtnl_sock) {
+ if (!notify_sock) {
return;
}
struct ofpbuf *buf;
int error;
- error = nl_sock_recv(rtnl_sock, &buf, false);
+ error = nl_sock_recv(notify_sock, &buf, false);
if (!error) {
if (nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
rtnlgrp_link_policy,
linux_netdev_report_change(buf->data, ifinfo, attrs);
} else {
VLOG_WARN_RL(&rl, "received bad rtnl message");
- linux_netdev_report_notify_error(ENOBUFS);
+ linux_netdev_report_notify_error();
}
ofpbuf_delete(buf);
} else if (error == EAGAIN) {
VLOG_WARN_RL(&rl, "error reading rtnetlink socket: %s",
strerror(error));
}
- linux_netdev_report_notify_error(error);
+ linux_netdev_report_notify_error();
}
}
}
void
linux_netdev_notifier_wait(void)
{
- if (rtnl_sock) {
- nl_sock_wait(rtnl_sock, POLLIN);
+ if (notify_sock) {
+ nl_sock_wait(notify_sock, POLLIN);
}
}
LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node,
&all_notifiers) {
- if (!notifier->error) {
- notifier->cb(&change, notifier->aux);
- }
+ notifier->cb(&change, notifier->aux);
}
}
static void
-linux_netdev_report_notify_error(int error)
+linux_netdev_report_notify_error(void)
{
struct linux_netdev_notifier *notifier;
LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node,
&all_notifiers) {
- if (error != ENOBUFS || !notifier->error) {
- notifier->error = error;
- }
+ notifier->cb(NULL, notifier->aux);
}
}
struct linux_netdev_notifier {
struct list node;
- int error;
linux_netdev_notify_func *cb;
void *aux;
};
int linux_netdev_notifier_register(struct linux_netdev_notifier *,
linux_netdev_notify_func *, void *aux);
void linux_netdev_notifier_unregister(struct linux_netdev_notifier *);
-int linux_netdev_notifier_get_error(struct linux_netdev_notifier *);
-int linux_netdev_notifier_peek_error(const struct linux_netdev_notifier *);
void linux_netdev_notifier_run(void);
void linux_netdev_notifier_wait(void);
--- /dev/null
+/*
+ * Copyright (c) 2009 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NETDEV_PROVIDER_H
+#define NETDEV_PROVIDER_H 1
+
+/* Generic interface to network devices. */
+
+#include <assert.h>
+#include "netdev.h"
+#include "list.h"
+
+/* A network device (e.g. an Ethernet device).
+ *
+ * This structure should be treated as opaque by network device
+ * implementations. */
+struct netdev {
+ const struct netdev_class *class;
+ char *name; /* e.g. "eth0" */
+ enum netdev_flags save_flags; /* Initial device flags. */
+ enum netdev_flags changed_flags; /* Flags that we changed. */
+ struct list node; /* Element in global list. */
+};
+
+void netdev_init(struct netdev *, const char *name,
+ const struct netdev_class *);
+static inline void netdev_assert_class(const struct netdev *netdev,
+ const struct netdev_class *class)
+{
+ assert(netdev->class == class);
+}
+
+/* A network device notifier.
+ *
+ * Network device implementations should use netdev_notifier_init() to
+ * initialize this structure, but they may freely read its members after
+ * initialization. */
+struct netdev_notifier {
+ struct netdev *netdev;
+ void (*cb)(struct netdev_notifier *);
+ void *aux;
+};
+void netdev_notifier_init(struct netdev_notifier *, struct netdev *,
+ void (*cb)(struct netdev_notifier *), void *aux);
+
+/* Network device class structure, to be defined by each implementation of a
+ * network device.
+ *
+ * These functions return 0 if successful or a positive errno value on failure,
+ * except where otherwise noted. */
+struct netdev_class {
+ /* Prefix for names of netdevs in this class, e.g. "ndunix:".
+ *
+ * One netdev class may have the empty string "" as its prefix, in which
+ * case that netdev class is associated with netdev names that do not
+ * contain a colon. */
+ const char *prefix;
+
+ /* Class name, for use in error messages. */
+ const char *name;
+
+ /* Called only once, at program startup. Returning an error from this
+ * function will prevent any network device, of any class, from being
+ * opened.
+ *
+ * This function may be set to null if a network device class needs no
+ * initialization at program startup. */
+ int (*init)(void);
+
+ /* Performs periodic work needed by netdevs of this class. May be null if
+ * no periodic work is necessary. */
+ void (*run)(void);
+
+ /* Arranges for poll_block() to wake up if the "run" member function needs
+ * to be called. May be null if nothing is needed here. */
+ void (*wait)(void);
+
+ /* Attempts to open a network device. On success, sets '*netdevp' to the
+ * new network device. 'name' is the full network device name provided by
+ * the user. This name is useful for error messages but must not be
+ * modified.
+ *
+ * 'suffix' is a copy of 'name' following the netdev's 'prefix'.
+ *
+ * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order
+ * to capture frames of that type received on the device. It may also be
+ * one of the 'enum netdev_pseudo_ethertype' values to receive frames in
+ * one of those categories. */
+ int (*open)(const char *name, char *suffix, int ethertype,
+ struct netdev **netdevp);
+
+ /* Closes 'netdev'. */
+ void (*close)(struct netdev *netdev);
+
+ /* Enumerates the names of all network devices of this class.
+ *
+ * The caller has already initialized 'all_names' and might already have
+ * added some names to it. This function should not disturb any existing
+ * names in 'all_names'.
+ *
+ * If this netdev class does not support enumeration, this may be a null
+ * pointer. */
+ int (*enumerate)(struct svec *all_anmes);
+
+ /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
+ * 'buffer'. If successful, returns the number of bytes in the received
+ * packet, otherwise a negative errno value. Returns -EAGAIN immediately
+ * if no packet is ready to be received. */
+ int (*recv)(struct netdev *netdev, void *buffer, size_t size);
+
+ /* Registers with the poll loop to wake up from the next call to
+ * poll_block() when a packet is ready to be received with netdev_recv() on
+ * 'netdev'. */
+ void (*recv_wait)(struct netdev *netdev);
+
+ /* Discards all packets waiting to be received from 'netdev'. */
+ int (*drain)(struct netdev *netdev);
+
+ /* Sends the 'size'-byte packet in 'buffer' on 'netdev'. Returns 0 if
+ * successful, otherwise a positive errno value. Returns EAGAIN without
+ * blocking if the packet cannot be queued immediately. Returns EMSGSIZE
+ * if a partial packet was transmitted or if the packet is too big or too
+ * small to transmit on the device.
+ *
+ * The caller retains ownership of 'buffer' in all cases.
+ *
+ * The network device is expected to maintain a packet transmission queue,
+ * so that the caller does not ordinarily have to do additional queuing of
+ * packets. */
+ int (*send)(struct netdev *netdev, const void *buffer, size_t size);
+
+ /* Registers with the poll loop to wake up from the next call to
+ * poll_block() when the packet transmission queue for 'netdev' has
+ * sufficient room to transmit a packet with netdev_send().
+ *
+ * The network device is expected to maintain a packet transmission queue,
+ * so that the caller does not ordinarily have to do additional queuing of
+ * packets. Thus, this function is unlikely to ever be useful. */
+ void (*send_wait)(struct netdev *netdev);
+
+ /* Sets 'netdev''s Ethernet address to 'mac' */
+ int (*set_etheraddr)(struct netdev *netdev, const uint8_t mac[6]);
+
+ /* Retrieves 'netdev''s Ethernet address into 'mac'. */
+ int (*get_etheraddr)(const struct netdev *netdev, uint8_t mac[6]);
+
+ /* Retrieves 'netdev''s MTU into '*mtup'.
+ *
+ * The MTU is the maximum size of transmitted (and received) packets, in
+ * bytes, not including the hardware header; thus, this is typically 1500
+ * bytes for Ethernet devices.*/
+ int (*get_mtu)(const struct netdev *, int *mtup);
+
+ /* Sets 'carrier' to true if carrier is active (link light is on) on
+ * 'netdev'. */
+ int (*get_carrier)(const struct netdev *netdev, bool *carrier);
+
+ /* Retrieves current device stats for 'netdev' into 'stats'.
+ *
+ * A network device that supports some statistics but not others, it should
+ * set the values of the unsupported statistics to all-1-bits
+ * (UINT64_MAX). */
+ int (*get_stats)(const struct netdev *netdev, struct netdev_stats *stats);
+
+ /* Stores the features supported by 'netdev' into each of '*current',
+ * '*advertised', '*supported', and '*peer'. Each value is a bitmap of
+ * "enum ofp_port_features" bits, in host byte order. */
+ int (*get_features)(struct netdev *netdev,
+ uint32_t *current, uint32_t *advertised,
+ uint32_t *supported, uint32_t *peer);
+
+ /* Set the features advertised by 'netdev' to 'advertise', which is a
+ * bitmap of "enum ofp_port_features" bits, in host byte order.
+ *
+ * This function may be set to null for a network device that does not
+ * support configuring advertisements. */
+ int (*set_advertisements)(struct netdev *, uint32_t advertise);
+
+ /* If 'netdev' is a VLAN network device (e.g. one created with vconfig(8)),
+ * sets '*vlan_vid' to the VLAN VID associated with that device and returns
+ * 0.
+ *
+ * Returns ENOENT if 'netdev_name' is the name of a network device that is
+ * not a VLAN device.
+ *
+ * This function should be set to null if it doesn't make any sense for
+ * your network device (it probably doesn't). */
+ int (*get_vlan_vid)(const struct netdev *netdev, int *vlan_vid);
+
+ /* Attempts to set input rate limiting (policing) policy, such that up to
+ * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative
+ * burst size of 'kbits' kb.
+ *
+ * This function may be set to null if policing is not supported. */
+ int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate,
+ unsigned int kbits_burst);
+
+ /* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address.
+ *
+ * The following error values have well-defined meanings:
+ *
+ * - EADDRNOTAVAIL: 'netdev' has no assigned IPv4 address.
+ *
+ * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'.
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
+ int (*get_in4)(const struct netdev *netdev, struct in_addr *in4);
+
+ /* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If
+ * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared.
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
+ int (*set_in4)(struct netdev *, struct in_addr addr, struct in_addr mask);
+
+ /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address.
+ *
+ * The following error values have well-defined meanings:
+ *
+ * - EADDRNOTAVAIL: 'netdev' has no assigned IPv6 address.
+ *
+ * - EOPNOTSUPP: No IPv6 network stack attached to 'netdev'.
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
+ int (*get_in6)(const struct netdev *netdev, struct in6_addr *in6);
+
+ /* Adds 'router' as a default IP gateway for the TCP/IP stack that
+ * corresponds to 'netdev'.
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
+ int (*add_router)(struct netdev *netdev, struct in_addr router);
+
+ /* Looks up the ARP table entry for 'ip' on 'netdev' and stores the
+ * corresponding MAC address in 'mac'. A return value of ENXIO, in
+ * particular, indicates that there is no ARP table entry for 'ip' on
+ * 'netdev'.
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
+ int (*arp_lookup)(const struct netdev *, uint32_t ip, uint8_t mac[6]);
+
+ /* Retrieves the current set of flags on 'netdev' into '*old_flags'. Then,
+ * turns off the flags that are set to 1 in 'off' and turns on the flags
+ * that are set to 1 in 'on'. (No bit will be set to 1 in both 'off' and
+ * 'on'; that is, off & on == 0.)
+ *
+ * This function may be invoked from a signal handler. Therefore, it
+ * should not do anything that is not signal-safe (such as logging). */
+ int (*update_flags)(struct netdev *netdev, enum netdev_flags off,
+ enum netdev_flags on, enum netdev_flags *old_flags);
+
+ /* Arranges for 'cb' to be called whenever one of the attributes of
+ * 'netdev' changes and sets '*notifierp' to a newly created
+ * netdev_notifier that represents this arrangement. The created notifier
+ * will have its 'netdev', 'cb', and 'aux' members set to the values of the
+ * corresponding parameters. */
+ int (*poll_add)(struct netdev *netdev,
+ void (*cb)(struct netdev_notifier *), void *aux,
+ struct netdev_notifier **notifierp);
+
+ /* Cancels poll notification for 'notifier'. */
+ void (*poll_remove)(struct netdev_notifier *notifier);
+};
+
+extern const struct netdev_class netdev_linux_class;
+extern const struct netdev_class netdev_tap_class;
+
+#endif /* netdev.h */
#include <assert.h>
#include <errno.h>
-#include <fcntl.h>
-#include <arpa/inet.h>
#include <inttypes.h>
-#include <linux/if_tun.h>
-#include <linux/types.h>
-#include <linux/ethtool.h>
-#include <linux/rtnetlink.h>
-#include <linux/sockios.h>
-#include <linux/version.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <netpacket/packet.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/if_packet.h>
-#include <net/route.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
#include "dynamic-string.h"
#include "fatal-signal.h"
#include "list.h"
-#include "netdev-linux.h"
-#include "netlink.h"
+#include "netdev-provider.h"
#include "ofpbuf.h"
-#include "openflow/openflow.h"
#include "packets.h"
#include "poll-loop.h"
#include "shash.h"
-#include "socket-util.h"
#include "svec.h"
-/* linux/if.h defines IFF_LOWER_UP, net/if.h doesn't.
- * net/if.h defines if_nameindex(), linux/if.h doesn't.
- * We can't include both headers, so define IFF_LOWER_UP ourselves. */
-#ifndef IFF_LOWER_UP
-#define IFF_LOWER_UP 0x10000
-#endif
-
-/* These were introduced in Linux 2.6.14, so they might be missing if we have
- * old headers. */
-#ifndef ADVERTISED_Pause
-#define ADVERTISED_Pause (1 << 13)
-#endif
-#ifndef ADVERTISED_Asym_Pause
-#define ADVERTISED_Asym_Pause (1 << 14)
-#endif
-
#define THIS_MODULE VLM_netdev
#include "vlog.h"
-struct netdev {
- struct list node;
- char *name;
-
- /* File descriptors. For ordinary network devices, the two fds below are
- * the same; for tap devices, they differ. */
- int netdev_fd; /* Network device. */
- int tap_fd; /* TAP character device, if any, otherwise the
- * network device. */
-
- /* Cached network device information. */
- int ifindex; /* -1 if not known. */
- uint8_t etheraddr[ETH_ADDR_LEN];
- struct in6_addr in6;
- int speed;
- int mtu;
- int txqlen;
- int hwaddr_family;
-
- int save_flags; /* Initial device flags. */
- int changed_flags; /* Flags that we changed. */
-};
-
-/* Policy for RTNLGRP_LINK messages.
- *
- * There are *many* more fields in these messages, but currently we only care
- * about interface names. */
-static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
- [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true,
- .min_len = sizeof(struct rtnl_link_stats) },
+static const struct netdev_class *netdev_classes[] = {
+ &netdev_linux_class,
+ &netdev_tap_class,
};
+enum { N_NETDEV_CLASSES = ARRAY_SIZE(netdev_classes) };
/* All open network devices. */
static struct list netdev_list = LIST_INITIALIZER(&netdev_list);
-/* An AF_INET socket (used for ioctl operations). */
-static int af_inet_sock = -1;
-
-/* NETLINK_ROUTE socket. */
-static struct nl_sock *rtnl_sock;
-
-/* Can we use RTM_GETLINK to get network device statistics? (In pre-2.6.19
- * kernels, this was only available if wireless extensions were enabled.) */
-static bool use_netlink_stats;
-
/* This is set pretty low because we probably won't learn anything from the
* additional log messages. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-static void init_netdev(void);
-static int do_open_netdev(const char *name, int ethertype, int tap_fd,
- struct netdev **netdev_);
+static void restore_all_flags(void *aux);
static int restore_flags(struct netdev *netdev);
-static int get_flags(const char *netdev_name, int *flagsp);
-static int set_flags(const char *netdev_name, int flags);
-static int do_get_ifindex(const char *netdev_name);
-static int get_ifindex(const struct netdev *, int *ifindexp);
-static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN],
- int *hwaddr_familyp);
-static int set_etheraddr(const char *netdev_name, int hwaddr_family,
- const uint8_t[ETH_ADDR_LEN]);
-
-/* Obtains the IPv6 address for 'name' into 'in6'. */
-static void
-get_ipv6_address(const char *name, struct in6_addr *in6)
+
+/* Attempts to initialize the netdev module. Returns 0 if successful,
+ * otherwise a positive errno value.
+ *
+ * Calling this function is optional. If not called explicitly, it will
+ * automatically be called upon the first attempt to open a network device. */
+int
+netdev_initialize(void)
{
- FILE *file;
- char line[128];
-
- file = fopen("/proc/net/if_inet6", "r");
- if (file == NULL) {
- /* This most likely indicates that the host doesn't have IPv6 support,
- * so it's not really a failure condition.*/
- *in6 = in6addr_any;
- return;
- }
+ static int status = -1;
+ if (status < 0) {
+ int i;
+
+ fatal_signal_add_hook(restore_all_flags, NULL, true);
- while (fgets(line, sizeof line, file)) {
- uint8_t *s6 = in6->s6_addr;
- char ifname[16 + 1];
-
-#define X8 "%2"SCNx8
- if (sscanf(line, " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8
- "%*x %*x %*x %*x %16s\n",
- &s6[0], &s6[1], &s6[2], &s6[3],
- &s6[4], &s6[5], &s6[6], &s6[7],
- &s6[8], &s6[9], &s6[10], &s6[11],
- &s6[12], &s6[13], &s6[14], &s6[15],
- ifname) == 17
- && !strcmp(name, ifname))
- {
- fclose(file);
- return;
+ status = 0;
+ for (i = 0; i < N_NETDEV_CLASSES; i++) {
+ const struct netdev_class *class = netdev_classes[i];
+ if (class->init) {
+ int retval = class->init();
+ if (retval) {
+ VLOG_ERR("failed to initialize %s network device "
+ "class: %s", class->name, strerror(retval));
+ if (!status) {
+ status = retval;
+ }
+ }
+ }
}
}
- *in6 = in6addr_any;
-
- fclose(file);
+ return status;
}
-static int
-do_ethtool(struct netdev *netdev, struct ethtool_cmd *ecmd,
- int cmd, const char *cmd_name)
+/* Performs periodic work needed by all the various kinds of netdevs.
+ *
+ * If your program opens any netdevs, it must call this function within its
+ * main poll loop. */
+void
+netdev_run(void)
{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof ifr);
- strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
- ifr.ifr_data = (caddr_t) ecmd;
-
- ecmd->cmd = cmd;
- COVERAGE_INC(netdev_ethtool);
- if (ioctl(netdev->netdev_fd, SIOCETHTOOL, &ifr) == 0) {
- return 0;
- } else {
- if (errno != EOPNOTSUPP) {
- VLOG_WARN_RL(&rl, "ethtool command %s on network device %s "
- "failed: %s", cmd_name, netdev->name,
- strerror(errno));
- } else {
- /* The device doesn't support this operation. That's pretty
- * common, so there's no point in logging anything. */
+ int i;
+ for (i = 0; i < N_NETDEV_CLASSES; i++) {
+ const struct netdev_class *class = netdev_classes[i];
+ if (class->run) {
+ class->run();
}
- return errno;
}
}
-static int
-do_get_features(struct netdev *netdev,
- uint32_t *current, uint32_t *advertised,
- uint32_t *supported, uint32_t *peer)
+/* Arranges for poll_block() to wake up when netdev_run() needs to be called.
+ *
+ * If your program opens any netdevs, it must call this function within its
+ * main poll loop. */
+void
+netdev_wait(void)
{
- struct ethtool_cmd ecmd;
- int error;
-
- *current = 0;
- *supported = 0;
- *advertised = 0;
- *peer = 0;
-
- memset(&ecmd, 0, sizeof ecmd);
- error = do_ethtool(netdev, &ecmd, ETHTOOL_GSET, "ETHTOOL_GSET");
- if (error) {
- return error;
- }
-
- if (ecmd.supported & SUPPORTED_10baseT_Half) {
- *supported |= OFPPF_10MB_HD;
- }
- if (ecmd.supported & SUPPORTED_10baseT_Full) {
- *supported |= OFPPF_10MB_FD;
- }
- if (ecmd.supported & SUPPORTED_100baseT_Half) {
- *supported |= OFPPF_100MB_HD;
- }
- if (ecmd.supported & SUPPORTED_100baseT_Full) {
- *supported |= OFPPF_100MB_FD;
- }
- if (ecmd.supported & SUPPORTED_1000baseT_Half) {
- *supported |= OFPPF_1GB_HD;
- }
- if (ecmd.supported & SUPPORTED_1000baseT_Full) {
- *supported |= OFPPF_1GB_FD;
- }
- if (ecmd.supported & SUPPORTED_10000baseT_Full) {
- *supported |= OFPPF_10GB_FD;
- }
- if (ecmd.supported & SUPPORTED_TP) {
- *supported |= OFPPF_COPPER;
- }
- if (ecmd.supported & SUPPORTED_FIBRE) {
- *supported |= OFPPF_FIBER;
- }
- if (ecmd.supported & SUPPORTED_Autoneg) {
- *supported |= OFPPF_AUTONEG;
- }
- if (ecmd.supported & SUPPORTED_Pause) {
- *supported |= OFPPF_PAUSE;
- }
- if (ecmd.supported & SUPPORTED_Asym_Pause) {
- *supported |= OFPPF_PAUSE_ASYM;
- }
-
- /* Set the advertised features */
- if (ecmd.advertising & ADVERTISED_10baseT_Half) {
- *advertised |= OFPPF_10MB_HD;
- }
- if (ecmd.advertising & ADVERTISED_10baseT_Full) {
- *advertised |= OFPPF_10MB_FD;
- }
- if (ecmd.advertising & ADVERTISED_100baseT_Half) {
- *advertised |= OFPPF_100MB_HD;
- }
- if (ecmd.advertising & ADVERTISED_100baseT_Full) {
- *advertised |= OFPPF_100MB_FD;
- }
- if (ecmd.advertising & ADVERTISED_1000baseT_Half) {
- *advertised |= OFPPF_1GB_HD;
- }
- if (ecmd.advertising & ADVERTISED_1000baseT_Full) {
- *advertised |= OFPPF_1GB_FD;
- }
- if (ecmd.advertising & ADVERTISED_10000baseT_Full) {
- *advertised |= OFPPF_10GB_FD;
- }
- if (ecmd.advertising & ADVERTISED_TP) {
- *advertised |= OFPPF_COPPER;
- }
- if (ecmd.advertising & ADVERTISED_FIBRE) {
- *advertised |= OFPPF_FIBER;
- }
- if (ecmd.advertising & ADVERTISED_Autoneg) {
- *advertised |= OFPPF_AUTONEG;
- }
- if (ecmd.advertising & ADVERTISED_Pause) {
- *advertised |= OFPPF_PAUSE;
- }
- if (ecmd.advertising & ADVERTISED_Asym_Pause) {
- *advertised |= OFPPF_PAUSE_ASYM;
- }
-
- /* Set the current features */
- if (ecmd.speed == SPEED_10) {
- *current = (ecmd.duplex) ? OFPPF_10MB_FD : OFPPF_10MB_HD;
- }
- else if (ecmd.speed == SPEED_100) {
- *current = (ecmd.duplex) ? OFPPF_100MB_FD : OFPPF_100MB_HD;
- }
- else if (ecmd.speed == SPEED_1000) {
- *current = (ecmd.duplex) ? OFPPF_1GB_FD : OFPPF_1GB_HD;
- }
- else if (ecmd.speed == SPEED_10000) {
- *current = OFPPF_10GB_FD;
- }
-
- if (ecmd.port == PORT_TP) {
- *current |= OFPPF_COPPER;
- }
- else if (ecmd.port == PORT_FIBRE) {
- *current |= OFPPF_FIBER;
- }
-
- if (ecmd.autoneg) {
- *current |= OFPPF_AUTONEG;
+ int i;
+ for (i = 0; i < N_NETDEV_CLASSES; i++) {
+ const struct netdev_class *class = netdev_classes[i];
+ if (class->wait) {
+ class->wait();
+ }
}
- return 0;
}
/* Opens the network device named 'name' (e.g. "eth0") and returns zero if
* the 'enum netdev_pseudo_ethertype' values to receive frames in one of those
* categories. */
int
-netdev_open(const char *name, int ethertype, struct netdev **netdevp)
+netdev_open(const char *name_, int ethertype, struct netdev **netdevp)
{
- if (strncmp(name, "tap:", 4)) {
- return do_open_netdev(name, ethertype, -1, netdevp);
- } else {
- static const char tap_dev[] = "/dev/net/tun";
- struct ifreq ifr;
- int error;
- int tap_fd;
-
- tap_fd = open(tap_dev, O_RDWR);
- if (tap_fd < 0) {
- ovs_error(errno, "opening \"%s\" failed", tap_dev);
- return errno;
- }
-
- memset(&ifr, 0, sizeof ifr);
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- if (name) {
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- }
- if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
- int error = errno;
- ovs_error(error, "ioctl(TUNSETIFF) on \"%s\" failed", tap_dev);
- close(tap_fd);
- return error;
- }
-
- error = set_nonblocking(tap_fd);
- if (error) {
- ovs_error(error, "set_nonblocking on \"%s\" failed", tap_dev);
- close(tap_fd);
- return error;
- }
-
- error = do_open_netdev(ifr.ifr_name, NETDEV_ETH_TYPE_NONE, tap_fd,
- netdevp);
- if (error) {
- close(tap_fd);
- }
- return error;
- }
-}
-
-
-static int
-do_open_netdev(const char *name, int ethertype, int tap_fd,
- struct netdev **netdev_)
-{
- int netdev_fd;
- struct sockaddr_ll sll;
- struct ifreq ifr;
- int ifindex = -1;
- uint8_t etheraddr[ETH_ADDR_LEN];
- struct in6_addr in6;
- int mtu;
- int txqlen;
- int hwaddr_family;
+ char *name = xstrdup(name_);
+ char *prefix, *suffix, *colon;
+ struct netdev *netdev = NULL;
int error;
- struct netdev *netdev;
-
- init_netdev();
- *netdev_ = NULL;
- COVERAGE_INC(netdev_open);
-
- /* Create raw socket. */
- netdev_fd = socket(PF_PACKET, SOCK_RAW,
- htons(ethertype == NETDEV_ETH_TYPE_NONE ? 0
- : ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
- : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
- : ethertype));
- if (netdev_fd < 0) {
- return errno;
- }
-
- if (ethertype != NETDEV_ETH_TYPE_NONE) {
- /* Set non-blocking mode. */
- error = set_nonblocking(netdev_fd);
- if (error) {
- goto error_already_set;
- }
-
- /* Get ethernet device index. */
- ifindex = do_get_ifindex(name);
- if (ifindex < 0) {
- return -ifindex;
- }
-
- /* Bind to specific ethernet device. */
- memset(&sll, 0, sizeof sll);
- sll.sll_family = AF_PACKET;
- sll.sll_ifindex = ifindex;
- if (bind(netdev_fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
- VLOG_ERR("bind to %s failed: %s", name, strerror(errno));
- goto error;
- }
-
- /* Between the socket() and bind() calls above, the socket receives all
- * packets of the requested type on all system interfaces. We do not
- * want to receive that data, but there is no way to avoid it. So we
- * must now drain out the receive queue. */
- error = drain_rcvbuf(netdev_fd);
- if (error) {
- goto error_already_set;
- }
- }
+ int i;
- /* Get MAC address. */
- error = get_etheraddr(name, etheraddr, &hwaddr_family);
+ error = netdev_initialize();
if (error) {
- goto error_already_set;
- }
-
- /* Get MTU. */
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- if (ioctl(netdev_fd, SIOCGIFMTU, &ifr) < 0) {
- VLOG_ERR("ioctl(SIOCGIFMTU) on %s device failed: %s",
- name, strerror(errno));
- goto error;
+ return error;
}
- mtu = ifr.ifr_mtu;
- /* Get TX queue length. */
- if (ioctl(netdev_fd, SIOCGIFTXQLEN, &ifr) < 0) {
- VLOG_ERR("ioctl(SIOCGIFTXQLEN) on %s device failed: %s",
- name, strerror(errno));
- goto error;
+ colon = strchr(name, ':');
+ if (colon) {
+ *colon = '\0';
+ prefix = name;
+ suffix = colon + 1;
+ } else {
+ prefix = "";
+ suffix = name;
}
- txqlen = ifr.ifr_qlen;
- get_ipv6_address(name, &in6);
-
- /* Allocate network device. */
- netdev = xmalloc(sizeof *netdev);
- netdev->name = xstrdup(name);
- netdev->ifindex = ifindex;
- netdev->txqlen = txqlen;
- netdev->hwaddr_family = hwaddr_family;
- netdev->netdev_fd = netdev_fd;
- netdev->tap_fd = tap_fd < 0 ? netdev_fd : tap_fd;
- memcpy(netdev->etheraddr, etheraddr, sizeof etheraddr);
- netdev->mtu = mtu;
- netdev->in6 = in6;
-
- /* Save flags to restore at close or exit. */
- error = get_flags(netdev->name, &netdev->save_flags);
- if (error) {
- goto error_already_set;
+ for (i = 0; i < N_NETDEV_CLASSES; i++) {
+ const struct netdev_class *class = netdev_classes[i];
+ if (!strcmp(prefix, class->prefix)) {
+ error = class->open(name_, suffix, ethertype, &netdev);
+ goto exit;
+ }
}
- netdev->changed_flags = 0;
- fatal_signal_block();
- list_push_back(&netdev_list, &netdev->node);
- fatal_signal_unblock();
+ error = EAFNOSUPPORT;
- /* Success! */
- *netdev_ = netdev;
- return 0;
-
-error:
- error = errno;
-error_already_set:
- close(netdev_fd);
- if (tap_fd >= 0) {
- close(tap_fd);
- }
+exit:
+ *netdevp = error ? NULL : netdev;
return error;
}
netdev_close(struct netdev *netdev)
{
if (netdev) {
- /* Bring down interface and drop promiscuous mode, if we brought up
- * the interface or enabled promiscuous mode. */
+ char *name;
int error;
+
+ /* Restore flags that we changed, if any. */
fatal_signal_block();
error = restore_flags(netdev);
list_remove(&netdev->node);
}
/* Free. */
- free(netdev->name);
- close(netdev->netdev_fd);
- if (netdev->netdev_fd != netdev->tap_fd) {
- close(netdev->tap_fd);
- }
- free(netdev);
+ name = netdev->name;
+ netdev->class->close(netdev);
+ free(name);
}
}
-/* Checks whether a network device named 'name' exists and returns true if so,
- * false otherwise. */
+/* Returns true if a network device named 'name' exists and may be opened,
+ * otherwise false. */
bool
netdev_exists(const char *name)
{
- struct stat s;
- char *filename;
+ struct netdev *netdev;
int error;
- filename = xasprintf("/sys/class/net/%s", name);
- error = stat(filename, &s);
- free(filename);
- return !error;
+ error = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev);
+ if (!error) {
+ netdev_close(netdev);
+ return true;
+ } else {
+ if (error != ENODEV) {
+ VLOG_WARN("failed to open network device %s: %s",
+ name, strerror(error));
+ }
+ return false;
+ }
}
-/* Pads 'buffer' out with zero-bytes to the minimum valid length of an
- * Ethernet packet, if necessary. */
-static void
-pad_to_minimum_length(struct ofpbuf *buffer)
+/* Initializes 'svec' with a list of the names of all known network devices. */
+int
+netdev_enumerate(struct svec *svec)
{
- if (buffer->size < ETH_TOTAL_MIN) {
- ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size);
+ int error;
+ int i;
+
+ svec_init(svec);
+
+ error = netdev_initialize();
+ if (error) {
+ return error;
}
+
+ error = 0;
+ for (i = 0; i < N_NETDEV_CLASSES; i++) {
+ const struct netdev_class *class = netdev_classes[i];
+ if (class->enumerate) {
+ int retval = class->enumerate(svec);
+ if (retval) {
+ VLOG_WARN("failed to enumerate %s network devices: %s",
+ class->name, strerror(retval));
+ if (!error) {
+ error = retval;
+ }
+ }
+ }
+ }
+ return error;
}
/* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
int
netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
{
- ssize_t n_bytes;
+ int retval;
assert(buffer->size == 0);
assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN);
- do {
- n_bytes = read(netdev->tap_fd,
- ofpbuf_tail(buffer), ofpbuf_tailroom(buffer));
- } while (n_bytes < 0 && errno == EINTR);
- if (n_bytes < 0) {
- if (errno != EAGAIN) {
- VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
- strerror(errno), netdev->name);
- }
- return errno;
- } else {
+
+ retval = netdev->class->recv(netdev,
+ buffer->data, ofpbuf_tailroom(buffer));
+ if (retval >= 0) {
COVERAGE_INC(netdev_received);
- buffer->size += n_bytes;
-
- /* When the kernel internally sends out an Ethernet frame on an
- * interface, it gives us a copy *before* padding the frame to the
- * minimum length. Thus, when it sends out something like an ARP
- * request, we see a too-short frame. So pad it out to the minimum
- * length. */
- pad_to_minimum_length(buffer);
+ buffer->size += retval;
+ if (buffer->size < ETH_TOTAL_MIN) {
+ ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size);
+ }
return 0;
+ } else {
+ return -retval;
}
}
void
netdev_recv_wait(struct netdev *netdev)
{
- poll_fd_wait(netdev->tap_fd, POLLIN);
+ netdev->class->recv_wait(netdev);
}
/* Discards all packets waiting to be received from 'netdev'. */
int
netdev_drain(struct netdev *netdev)
{
- if (netdev->tap_fd != netdev->netdev_fd) {
- drain_fd(netdev->tap_fd, netdev->txqlen);
- return 0;
- } else {
- return drain_rcvbuf(netdev->netdev_fd);
- }
+ return netdev->class->drain(netdev);
}
/* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
int
netdev_send(struct netdev *netdev, const struct ofpbuf *buffer)
{
- ssize_t n_bytes;
-
- do {
- n_bytes = write(netdev->tap_fd, buffer->data, buffer->size);
- } while (n_bytes < 0 && errno == EINTR);
-
- if (n_bytes < 0) {
- /* The Linux AF_PACKET implementation never blocks waiting for room
- * for packets, instead returning ENOBUFS. Translate this into EAGAIN
- * for the caller. */
- if (errno == ENOBUFS) {
- return EAGAIN;
- } else if (errno != EAGAIN) {
- VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
- netdev->name, strerror(errno));
- }
- return errno;
- } else if (n_bytes != buffer->size) {
- VLOG_WARN_RL(&rl,
- "send partial Ethernet packet (%d bytes of %zu) on %s",
- (int) n_bytes, buffer->size, netdev->name);
- return EMSGSIZE;
- } else {
+ int error = netdev->class->send(netdev, buffer->data, buffer->size);
+ if (!error) {
COVERAGE_INC(netdev_sent);
- return 0;
}
+ return error;
}
/* Registers with the poll loop to wake up from the next call to poll_block()
void
netdev_send_wait(struct netdev *netdev)
{
- if (netdev->tap_fd == netdev->netdev_fd) {
- poll_fd_wait(netdev->tap_fd, POLLOUT);
- } else {
- /* TAP device always accepts packets.*/
- poll_immediate_wake();
- }
+ return netdev->class->send_wait(netdev);
}
/* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful,
int
netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN])
{
- int error = set_etheraddr(netdev->name, netdev->hwaddr_family, mac);
- if (!error) {
- memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN);
- }
- return error;
+ return netdev->class->set_etheraddr(netdev, mac);
}
/* Retrieves 'netdev''s MAC address. If successful, returns 0 and copies the
int
netdev_get_etheraddr(const struct netdev *netdev, uint8_t mac[ETH_ADDR_LEN])
{
- memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN);
- return 0;
+ return netdev->class->get_etheraddr(netdev, mac);
}
/* Returns the name of the network device that 'netdev' represents,
int
netdev_get_mtu(const struct netdev *netdev, int *mtup)
{
- *mtup = netdev->mtu;
- return 0;
+ int error = netdev->class->get_mtu(netdev, mtup);
+ if (error) {
+ VLOG_WARN_RL(&rl, "failed to retrieve MTU for network device %s: %s",
+ netdev_get_name(netdev), strerror(error));
+ *mtup = ETH_PAYLOAD_MAX;
+ }
+ return error;
}
/* Stores the features supported by 'netdev' into each of '*current',
uint32_t *supported, uint32_t *peer)
{
uint32_t dummy[4];
- return do_get_features(netdev,
- current ? current : &dummy[0],
- advertised ? advertised : &dummy[1],
- supported ? supported : &dummy[2],
- peer ? peer : &dummy[3]);
+ return netdev->class->get_features(netdev,
+ current ? current : &dummy[0],
+ advertised ? advertised : &dummy[1],
+ supported ? supported : &dummy[2],
+ peer ? peer : &dummy[3]);
}
-/* Set the features advertised by 'netdev' to 'advertise'. */
+/* Set the features advertised by 'netdev' to 'advertise'. Returns 0 if
+ * successful, otherwise a positive errno value. */
int
netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
{
- struct ethtool_cmd ecmd;
- int error;
-
- memset(&ecmd, 0, sizeof ecmd);
- error = do_ethtool(netdev, &ecmd, ETHTOOL_GSET, "ETHTOOL_GSET");
- if (error) {
- return error;
- }
-
- ecmd.advertising = 0;
- if (advertise & OFPPF_10MB_HD) {
- ecmd.advertising |= ADVERTISED_10baseT_Half;
- }
- if (advertise & OFPPF_10MB_FD) {
- ecmd.advertising |= ADVERTISED_10baseT_Full;
- }
- if (advertise & OFPPF_100MB_HD) {
- ecmd.advertising |= ADVERTISED_100baseT_Half;
- }
- if (advertise & OFPPF_100MB_FD) {
- ecmd.advertising |= ADVERTISED_100baseT_Full;
- }
- if (advertise & OFPPF_1GB_HD) {
- ecmd.advertising |= ADVERTISED_1000baseT_Half;
- }
- if (advertise & OFPPF_1GB_FD) {
- ecmd.advertising |= ADVERTISED_1000baseT_Full;
- }
- if (advertise & OFPPF_10GB_FD) {
- ecmd.advertising |= ADVERTISED_10000baseT_Full;
- }
- if (advertise & OFPPF_COPPER) {
- ecmd.advertising |= ADVERTISED_TP;
- }
- if (advertise & OFPPF_FIBER) {
- ecmd.advertising |= ADVERTISED_FIBRE;
- }
- if (advertise & OFPPF_AUTONEG) {
- ecmd.advertising |= ADVERTISED_Autoneg;
- }
- if (advertise & OFPPF_PAUSE) {
- ecmd.advertising |= ADVERTISED_Pause;
- }
- if (advertise & OFPPF_PAUSE_ASYM) {
- ecmd.advertising |= ADVERTISED_Asym_Pause;
- }
- return do_ethtool(netdev, &ecmd, ETHTOOL_SSET, "ETHTOOL_SSET");
+ return (netdev->class->set_advertisements
+ ? netdev->class->set_advertisements(netdev, advertise)
+ : EOPNOTSUPP);
}
-/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
- * 'in4' is non-null) and returns 0. Otherwise, returns a positive errno value
- * and sets '*in4' to INADDR_ANY (0). */
+/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address and
+ * returns 0. Otherwise, returns a positive errno value and sets '*in4' to 0
+ * (INADDR_ANY).
+ *
+ * The following error values have well-defined meanings:
+ *
+ * - EADDRNOTAVAIL: 'netdev' has no assigned IPv4 address.
+ *
+ * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'.
+ *
+ * 'in4' may be null, in which case the address itself is not reported. */
int
netdev_get_in4(const struct netdev *netdev, struct in_addr *in4)
{
- const char *netdev_name = netdev_get_name(netdev);
- struct ifreq ifr;
- struct in_addr ip = { INADDR_ANY };
- int error;
-
- init_netdev();
-
- strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
- ifr.ifr_addr.sa_family = AF_INET;
- COVERAGE_INC(netdev_get_in4);
- if (ioctl(af_inet_sock, SIOCGIFADDR, &ifr) == 0) {
- struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr;
- ip = sin->sin_addr;
- error = ip.s_addr != INADDR_ANY ? 0 : EADDRNOTAVAIL;
- } else {
- VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFADDR) failed: %s",
- netdev_name, strerror(errno));
- error = errno;
- }
- if (in4) {
- *in4 = ip;
- }
- return error;
-}
-
-static void
-make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr)
-{
- struct sockaddr_in sin;
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- sin.sin_addr = addr;
- sin.sin_port = 0;
-
- memset(sa, 0, sizeof *sa);
- memcpy(sa, &sin, sizeof sin);
-}
-
-static int
-do_set_addr(struct netdev *netdev, int sock,
- int ioctl_nr, const char *ioctl_name, struct in_addr addr)
-{
- struct ifreq ifr;
+ struct in_addr dummy;
int error;
- strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
- make_in4_sockaddr(&ifr.ifr_addr, addr);
- COVERAGE_INC(netdev_set_in4);
- error = ioctl(sock, ioctl_nr, &ifr) < 0 ? errno : 0;
- if (error) {
- VLOG_WARN("ioctl(%s): %s", ioctl_name, strerror(error));
+ error = (netdev->class->get_in4
+ ? netdev->class->get_in4(netdev, in4 ? in4 : &dummy)
+ : EOPNOTSUPP);
+ if (error && in4) {
+ in4->s_addr = 0;
}
return error;
}
int
netdev_set_in4(struct netdev *netdev, struct in_addr addr, struct in_addr mask)
{
- int error;
-
- error = do_set_addr(netdev, af_inet_sock,
- SIOCSIFADDR, "SIOCSIFADDR", addr);
- if (!error && addr.s_addr != INADDR_ANY) {
- error = do_set_addr(netdev, af_inet_sock,
- SIOCSIFNETMASK, "SIOCSIFNETMASK", mask);
- }
- return error;
+ return (netdev->class->set_in4
+ ? netdev->class->set_in4(netdev, addr, mask)
+ : EOPNOTSUPP);
}
/* Adds 'router' as a default IP gateway for the TCP/IP stack that corresponds
* to 'netdev'. */
int
-netdev_add_router(struct netdev *netdev UNUSED, struct in_addr router)
+netdev_add_router(struct netdev *netdev, struct in_addr router)
{
- struct in_addr any = { INADDR_ANY };
- struct rtentry rt;
- int error;
-
- memset(&rt, 0, sizeof rt);
- make_in4_sockaddr(&rt.rt_dst, any);
- make_in4_sockaddr(&rt.rt_gateway, router);
- make_in4_sockaddr(&rt.rt_genmask, any);
- rt.rt_flags = RTF_UP | RTF_GATEWAY;
COVERAGE_INC(netdev_add_router);
- error = ioctl(af_inet_sock, SIOCADDRT, &rt) < 0 ? errno : 0;
- if (error) {
- VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error));
- }
- return error;
+ return (netdev->class->add_router
+ ? netdev->class->add_router(netdev, router)
+ : EOPNOTSUPP);
}
-/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if
- * 'in6' is non-null) and returns true. Otherwise, returns false. */
-bool
-netdev_get_in6(const struct netdev *netdev, struct in6_addr *in6)
-{
- if (in6) {
- *in6 = netdev->in6;
- }
- return memcmp(&netdev->in6, &in6addr_any, sizeof netdev->in6) != 0;
-}
-
-/* Obtains the current flags for 'netdev' and stores them into '*flagsp'.
- * Returns 0 if successful, otherwise a positive errno value. On failure,
- * stores 0 into '*flagsp'. */
+/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and
+ * returns 0. Otherwise, returns a positive errno value and sets '*in6' to
+ * all-zero-bits (in6addr_any).
+ *
+ * The following error values have well-defined meanings:
+ *
+ * - EADDRNOTAVAIL: 'netdev' has no assigned IPv6 address.
+ *
+ * - EOPNOTSUPP: No IPv6 network stack attached to 'netdev'.
+ *
+ * 'in6' may be null, in which case the address itself is not reported. */
int
-netdev_get_flags(const struct netdev *netdev, enum netdev_flags *flagsp)
+netdev_get_in6(const struct netdev *netdev, struct in6_addr *in6)
{
- int error, flags;
-
- init_netdev();
-
- *flagsp = 0;
- error = get_flags(netdev_get_name(netdev), &flags);
- if (error) {
- return error;
- }
+ struct in6_addr dummy;
+ int error;
- if (flags & IFF_UP) {
- *flagsp |= NETDEV_UP;
+ error = (netdev->class->get_in6
+ ? netdev->class->get_in6(netdev, in6 ? in6 : &dummy)
+ : EOPNOTSUPP);
+ if (error && in6) {
+ memset(in6, 0, sizeof *in6);
}
- if (flags & IFF_PROMISC) {
- *flagsp |= NETDEV_PROMISC;
- }
- return 0;
-}
-
-static int
-nd_to_iff_flags(enum netdev_flags nd)
-{
- int iff = 0;
- if (nd & NETDEV_UP) {
- iff |= IFF_UP;
- }
- if (nd & NETDEV_PROMISC) {
- iff |= IFF_PROMISC;
- }
- return iff;
+ return error;
}
/* On 'netdev', turns off the flags in 'off' and then turns on the flags in
* successful, otherwise a positive errno value. */
static int
do_update_flags(struct netdev *netdev, enum netdev_flags off,
- enum netdev_flags on, bool permanent)
+ enum netdev_flags on, enum netdev_flags *old_flagsp,
+ bool permanent)
{
- int old_flags, new_flags;
+ enum netdev_flags old_flags;
int error;
- error = get_flags(netdev->name, &old_flags);
+ error = netdev->class->update_flags(netdev, off & ~on, on, &old_flags);
if (error) {
- return error;
- }
-
- new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on);
- if (!permanent) {
- netdev->changed_flags |= new_flags ^ old_flags;
+ VLOG_WARN_RL(&rl, "failed to %s flags for network device %s: %s",
+ off || on ? "set" : "get", netdev_get_name(netdev),
+ strerror(error));
+ old_flags = 0;
+ } else if ((off || on) && !permanent) {
+ enum netdev_flags new_flags = (old_flags & ~off) | on;
+ enum netdev_flags changed_flags = old_flags ^ new_flags;
+ if (changed_flags) {
+ if (!netdev->changed_flags) {
+ netdev->save_flags = old_flags;
+ }
+ netdev->changed_flags |= changed_flags;
+ }
}
- if (new_flags != old_flags) {
- error = set_flags(netdev->name, new_flags);
+ if (old_flagsp) {
+ *old_flagsp = old_flags;
}
return error;
}
+/* Obtains the current flags for 'netdev' and stores them into '*flagsp'.
+ * Returns 0 if successful, otherwise a positive errno value. On failure,
+ * stores 0 into '*flagsp'. */
+int
+netdev_get_flags(const struct netdev *netdev_, enum netdev_flags *flagsp)
+{
+ struct netdev *netdev = (struct netdev *) netdev_;
+ return do_update_flags(netdev, 0, 0, flagsp, false);
+}
+
/* Sets the flags for 'netdev' to 'flags'.
* If 'permanent' is true, the changes will persist; otherwise, they
* will be reverted when 'netdev' is closed or the program exits.
netdev_set_flags(struct netdev *netdev, enum netdev_flags flags,
bool permanent)
{
- return do_update_flags(netdev, -1, flags, permanent);
+ return do_update_flags(netdev, -1, flags, NULL, permanent);
}
/* Turns on the specified 'flags' on 'netdev'.
netdev_turn_flags_on(struct netdev *netdev, enum netdev_flags flags,
bool permanent)
{
- return do_update_flags(netdev, 0, flags, permanent);
+ return do_update_flags(netdev, 0, flags, NULL, permanent);
}
/* Turns off the specified 'flags' on 'netdev'.
netdev_turn_flags_off(struct netdev *netdev, enum netdev_flags flags,
bool permanent)
{
- return do_update_flags(netdev, flags, 0, permanent);
+ return do_update_flags(netdev, flags, 0, NULL, permanent);
}
/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
* successfully retrieved, it stores the corresponding MAC address in 'mac' and
* returns 0. Otherwise, it returns a positive errno value; in particular,
- * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */
+ * ENXIO indicates that there is no ARP table entry for 'ip' on 'netdev'. */
int
-netdev_arp_lookup(const struct netdev *netdev, uint32_t ip,
- uint8_t mac[ETH_ADDR_LEN])
+netdev_arp_lookup(const struct netdev *netdev,
+ uint32_t ip, uint8_t mac[ETH_ADDR_LEN])
{
- const char *netdev_name = netdev_get_name(netdev);
- struct arpreq r;
- struct sockaddr_in *pa;
- int retval;
-
- init_netdev();
-
- memset(&r, 0, sizeof r);
- pa = (struct sockaddr_in *) &r.arp_pa;
- pa->sin_family = AF_INET;
- pa->sin_addr.s_addr = ip;
- pa->sin_port = 0;
- r.arp_ha.sa_family = ARPHRD_ETHER;
- r.arp_flags = 0;
- strncpy(r.arp_dev, netdev_name, sizeof r.arp_dev);
- COVERAGE_INC(netdev_arp_lookup);
- retval = ioctl(af_inet_sock, SIOCGARP, &r) < 0 ? errno : 0;
- if (!retval) {
- memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN);
- } else if (retval != ENXIO) {
- VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s",
- netdev_name, IP_ARGS(&ip), strerror(retval));
- }
- return retval;
-}
-
-static int
-get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
-{
- struct ofpbuf request;
- struct ofpbuf *reply;
- struct ifinfomsg *ifi;
- const struct rtnl_link_stats *rtnl_stats;
- struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
- int error;
-
- ofpbuf_init(&request, 0);
- nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *ifi,
- RTM_GETLINK, NLM_F_REQUEST);
- ifi = ofpbuf_put_zeros(&request, sizeof *ifi);
- ifi->ifi_family = PF_UNSPEC;
- ifi->ifi_index = ifindex;
- error = nl_sock_transact(rtnl_sock, &request, &reply);
- ofpbuf_uninit(&request);
+ int error = (netdev->class->arp_lookup
+ ? netdev->class->arp_lookup(netdev, ip, mac)
+ : EOPNOTSUPP);
if (error) {
- return error;
- }
-
- if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
- rtnlgrp_link_policy,
- attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
- ofpbuf_delete(reply);
- return EPROTO;
- }
-
- if (!attrs[IFLA_STATS]) {
- VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats");
- return EPROTO;
+ memset(mac, 0, ETH_ADDR_LEN);
}
-
- rtnl_stats = nl_attr_get(attrs[IFLA_STATS]);
- stats->rx_packets = rtnl_stats->rx_packets;
- stats->tx_packets = rtnl_stats->tx_packets;
- stats->rx_bytes = rtnl_stats->rx_bytes;
- stats->tx_bytes = rtnl_stats->tx_bytes;
- stats->rx_errors = rtnl_stats->rx_errors;
- stats->tx_errors = rtnl_stats->tx_errors;
- stats->rx_dropped = rtnl_stats->rx_dropped;
- stats->tx_dropped = rtnl_stats->tx_dropped;
- stats->multicast = rtnl_stats->multicast;
- stats->collisions = rtnl_stats->collisions;
- stats->rx_length_errors = rtnl_stats->rx_length_errors;
- stats->rx_over_errors = rtnl_stats->rx_over_errors;
- stats->rx_crc_errors = rtnl_stats->rx_crc_errors;
- stats->rx_frame_errors = rtnl_stats->rx_frame_errors;
- stats->rx_fifo_errors = rtnl_stats->rx_fifo_errors;
- stats->rx_missed_errors = rtnl_stats->rx_missed_errors;
- stats->tx_aborted_errors = rtnl_stats->tx_aborted_errors;
- stats->tx_carrier_errors = rtnl_stats->tx_carrier_errors;
- stats->tx_fifo_errors = rtnl_stats->tx_fifo_errors;
- stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors;
- stats->tx_window_errors = rtnl_stats->tx_window_errors;
-
- return 0;
-}
-
-static int
-get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats)
-{
- static const char fn[] = "/proc/net/dev";
- char line[1024];
- FILE *stream;
- int ln;
-
- stream = fopen(fn, "r");
- if (!stream) {
- VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno));
- return errno;
- }
-
- ln = 0;
- while (fgets(line, sizeof line, stream)) {
- if (++ln >= 3) {
- char devname[16];
-#define X64 "%"SCNu64
- if (sscanf(line,
- " %15[^:]:"
- X64 X64 X64 X64 X64 X64 X64 "%*u"
- X64 X64 X64 X64 X64 X64 X64 "%*u",
- devname,
- &stats->rx_bytes,
- &stats->rx_packets,
- &stats->rx_errors,
- &stats->rx_dropped,
- &stats->rx_fifo_errors,
- &stats->rx_frame_errors,
- &stats->multicast,
- &stats->tx_bytes,
- &stats->tx_packets,
- &stats->tx_errors,
- &stats->tx_dropped,
- &stats->tx_fifo_errors,
- &stats->collisions,
- &stats->tx_carrier_errors) != 15) {
- VLOG_WARN_RL(&rl, "%s:%d: parse error", fn, ln);
- } else if (!strcmp(devname, netdev_name)) {
- stats->rx_length_errors = UINT64_MAX;
- stats->rx_over_errors = UINT64_MAX;
- stats->rx_crc_errors = UINT64_MAX;
- stats->rx_missed_errors = UINT64_MAX;
- stats->tx_aborted_errors = UINT64_MAX;
- stats->tx_heartbeat_errors = UINT64_MAX;
- stats->tx_window_errors = UINT64_MAX;
- fclose(stream);
- return 0;
- }
- }
- }
- VLOG_WARN_RL(&rl, "%s: no stats for %s", fn, netdev_name);
- fclose(stream);
- return ENODEV;
+ return error;
}
-/* Sets 'carrier' to true if carrier is active (link light is on) on
+/* Sets 'carrier' to true if carrier is active (link light is on) on
* 'netdev'. */
int
netdev_get_carrier(const struct netdev *netdev, bool *carrier)
{
- char line[8];
- int retval;
- int error;
- char *fn;
- int fd;
-
- *carrier = false;
-
- fn = xasprintf("/sys/class/net/%s/carrier", netdev_get_name(netdev));
- fd = open(fn, O_RDONLY);
- if (fd < 0) {
- error = errno;
- VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(error));
- goto exit;
- }
-
- retval = read(fd, line, sizeof line);
- if (retval < 0) {
- error = errno;
- if (error == EINVAL) {
- /* This is the normal return value when we try to check carrier if
- * the network device is not up. */
- } else {
- VLOG_WARN_RL(&rl, "%s: read failed: %s", fn, strerror(error));
- }
- goto exit_close;
- } else if (retval == 0) {
- error = EPROTO;
- VLOG_WARN_RL(&rl, "%s: unexpected end of file", fn);
- goto exit_close;
- }
-
- if (line[0] != '0' && line[0] != '1') {
- error = EPROTO;
- VLOG_WARN_RL(&rl, "%s: value is %c (expected 0 or 1)", fn, line[0]);
- goto exit_close;
+ int error = (netdev->class->get_carrier
+ ? netdev->class->get_carrier(netdev, carrier)
+ : EOPNOTSUPP);
+ if (error) {
+ *carrier = false;
}
- *carrier = line[0] != '0';
- error = 0;
-
-exit_close:
- close(fd);
-exit:
- free(fn);
return error;
}
int error;
COVERAGE_INC(netdev_get_stats);
- if (use_netlink_stats) {
- int ifindex;
-
- error = get_ifindex(netdev, &ifindex);
- if (!error) {
- error = get_stats_via_netlink(ifindex, stats);
- }
- } else {
- error = get_stats_via_proc(netdev->name, stats);
- }
-
+ error = (netdev->class->get_stats
+ ? netdev->class->get_stats(netdev, stats)
+ : EOPNOTSUPP);
if (error) {
memset(stats, 0xff, sizeof *stats);
}
return error;
}
-#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress"
-#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1"
-/* We redirect stderr to /dev/null because we often want to remove all
- * traffic control configuration on a port so its in a known state. If
- * this done when there is no such configuration, tc complains, so we just
- * always ignore it.
- */
-#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null"
-
-/* Attempts to set input rate limiting (policing) policy. */
+/* Attempts to set input rate limiting (policing) policy, such that up to
+ * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst
+ * size of 'kbits' kb. */
int
netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate,
uint32_t kbits_burst)
{
- const char *netdev_name = netdev_get_name(netdev);
- char command[1024];
-
- init_netdev();
-
- COVERAGE_INC(netdev_set_policing);
- if (kbits_rate) {
- if (!kbits_burst) {
- /* Default to 10 kilobits if not specified. */
- kbits_burst = 10;
- }
-
- /* xxx This should be more careful about only adding if it
- * xxx actually exists, as opposed to always deleting it. */
- snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
- if (system(command) == -1) {
- VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
- }
-
- snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name);
- if (system(command) != 0) {
- VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name);
- return -1;
- }
-
- snprintf(command, sizeof(command), POLICE_CONFIG_CMD, netdev_name,
- kbits_rate, kbits_burst);
- if (system(command) != 0) {
- VLOG_WARN_RL(&rl, "%s: problem configuring policing",
- netdev_name);
- return -1;
- }
- } else {
- snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
- if (system(command) == -1) {
- VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
- }
- }
-
- return 0;
+ return (netdev->class->set_policing
+ ? netdev->class->set_policing(netdev, kbits_rate, kbits_burst)
+ : EOPNOTSUPP);
}
-/* Initializes 'svec' with a list of the names of all known network devices. */
-void
-netdev_enumerate(struct svec *svec)
+/* If 'netdev' is a VLAN network device (e.g. one created with vconfig(8)),
+ * sets '*vlan_vid' to the VLAN VID associated with that device and returns 0.
+ * Otherwise returns a errno value (specifically ENOENT if 'netdev_name' is the
+ * name of a network device that is not a VLAN device) and sets '*vlan_vid' to
+ * -1. */
+int
+netdev_get_vlan_vid(const struct netdev *netdev, int *vlan_vid)
{
- struct if_nameindex *names;
-
- svec_init(svec);
- names = if_nameindex();
- if (names) {
- size_t i;
-
- for (i = 0; names[i].if_name != NULL; i++) {
- svec_add(svec, names[i].if_name);
- }
- if_freenameindex(names);
- } else {
- VLOG_WARN("could not obtain list of network device names: %s",
- strerror(errno));
+ int error = (netdev->class->get_vlan_vid
+ ? netdev->class->get_vlan_vid(netdev, vlan_vid)
+ : ENOENT);
+ if (error) {
+ *vlan_vid = 0;
}
+ return error;
}
/* Returns a network device that has 'in4' as its IP address, if one exists,
svec_destroy(&dev_list);
return netdev;
}
-
-/* If 'netdev' is a VLAN network device (e.g. one created with vconfig(8)),
- * sets '*vlan_vid' to the VLAN VID associated with that device and returns 0.
- * Otherwise returns a errno value (specifically ENOENT if 'netdev_name' is the
- * name of a network device that is not a VLAN device) and sets '*vlan_vid' to
- * -1. */
-int
-netdev_get_vlan_vid(const struct netdev *netdev, int *vlan_vid)
+\f
+/* Initializes 'netdev' as a netdev named 'name' of the specified 'class'.
+ *
+ * This function adds 'netdev' to a netdev-owned linked list, so it is very
+ * important that 'netdev' only be freed after calling netdev_close(). */
+void
+netdev_init(struct netdev *netdev, const char *name,
+ const struct netdev_class *class)
{
- struct ds line = DS_EMPTY_INITIALIZER;
- FILE *stream = NULL;
- int error;
- char *fn;
-
- COVERAGE_INC(netdev_get_vlan_vid);
- fn = xasprintf("/proc/net/vlan/%s", netdev_get_name(netdev));
- stream = fopen(fn, "r");
- if (!stream) {
- error = errno;
- goto done;
- }
-
- if (ds_get_line(&line, stream)) {
- if (ferror(stream)) {
- error = errno;
- VLOG_ERR_RL(&rl, "error reading \"%s\": %s", fn, strerror(errno));
- } else {
- error = EPROTO;
- VLOG_ERR_RL(&rl, "unexpected end of file reading \"%s\"", fn);
- }
- goto done;
- }
-
- if (!sscanf(ds_cstr(&line), "%*s VID: %d", vlan_vid)) {
- error = EPROTO;
- VLOG_ERR_RL(&rl, "parse error reading \"%s\" line 1: \"%s\"",
- fn, ds_cstr(&line));
- goto done;
- }
-
- error = 0;
+ netdev->class = class;
+ netdev->name = xstrdup(name);
+ netdev->save_flags = 0;
+ netdev->changed_flags = 0;
+ list_push_back(&netdev_list, &netdev->node);
+}
-done:
- free(fn);
- if (stream) {
- fclose(stream);
- }
- ds_destroy(&line);
- if (error) {
- *vlan_vid = -1;
- }
- return error;
+/* Initializes 'notifier' as a netdev notifier for 'netdev', for which
+ * notification will consist of calling 'cb', with auxiliary data 'aux'. */
+void
+netdev_notifier_init(struct netdev_notifier *notifier, struct netdev *netdev,
+ void (*cb)(struct netdev_notifier *), void *aux)
+{
+ notifier->netdev = netdev;
+ notifier->cb = cb;
+ notifier->aux = aux;
}
\f
+/* Tracks changes in the status of a set of network devices. */
struct netdev_monitor {
- struct linux_netdev_notifier notifier;
struct shash polled_netdevs;
struct shash changed_netdevs;
};
-static void netdev_monitor_change(const struct linux_netdev_change *change,
- void *monitor);
-
-int
-netdev_monitor_create(struct netdev_monitor **monitorp)
+/* Creates and returns a new structure for monitor changes in the status of
+ * network devices. */
+struct netdev_monitor *
+netdev_monitor_create(void)
{
- struct netdev_monitor *monitor;
- int error;
-
- monitor = xmalloc(sizeof *monitor);
- error = linux_netdev_notifier_register(&monitor->notifier,
- netdev_monitor_change, monitor);
- if (error) {
- free(monitor);
- return error;
- }
+ struct netdev_monitor *monitor = xmalloc(sizeof *monitor);
shash_init(&monitor->polled_netdevs);
shash_init(&monitor->changed_netdevs);
- *monitorp = monitor;
- return 0;
+ return monitor;
}
+/* Destroys 'monitor'. */
void
netdev_monitor_destroy(struct netdev_monitor *monitor)
{
if (monitor) {
- linux_netdev_notifier_unregister(&monitor->notifier);
+ struct shash_node *node;
+
+ SHASH_FOR_EACH (node, &monitor->polled_netdevs) {
+ struct netdev_notifier *notifier = node->data;
+ notifier->netdev->class->poll_remove(notifier);
+ }
+
shash_destroy(&monitor->polled_netdevs);
+ shash_destroy(&monitor->changed_netdevs);
free(monitor);
}
}
-void
+static void
+netdev_monitor_cb(struct netdev_notifier *notifier)
+{
+ struct netdev_monitor *monitor = notifier->aux;
+ const char *name = netdev_get_name(notifier->netdev);
+ if (!shash_find(&monitor->changed_netdevs, name)) {
+ shash_add(&monitor->changed_netdevs, name, NULL);
+ }
+}
+
+/* Attempts to add 'netdev' as a netdev monitored by 'monitor'. Returns 0 if
+ * successful, otherwise a positive errno value.
+ *
+ * Adding a given 'netdev' to a monitor multiple times is equivalent to adding
+ * it once. */
+int
netdev_monitor_add(struct netdev_monitor *monitor, struct netdev *netdev)
{
- if (!shash_find(&monitor->polled_netdevs, netdev_get_name(netdev))) {
- shash_add(&monitor->polled_netdevs, netdev_get_name(netdev), NULL);
+ const char *netdev_name = netdev_get_name(netdev);
+ int error = 0;
+ if (!shash_find(&monitor->polled_netdevs, netdev_name)
+ && netdev->class->poll_add)
+ {
+ struct netdev_notifier *notifier;
+ error = netdev->class->poll_add(netdev, netdev_monitor_cb, monitor,
+ ¬ifier);
+ if (!error) {
+ assert(notifier->netdev == netdev);
+ shash_add(&monitor->polled_netdevs, netdev_name, notifier);
+ }
}
+ return error;
}
+/* Removes 'netdev' from the set of netdevs monitored by 'monitor'. (This has
+ * no effect if 'netdev' is not in the set of devices monitored by
+ * 'monitor'.) */
void
netdev_monitor_remove(struct netdev_monitor *monitor, struct netdev *netdev)
{
+ const char *netdev_name = netdev_get_name(netdev);
struct shash_node *node;
- node = shash_find(&monitor->polled_netdevs, netdev_get_name(netdev));
+ node = shash_find(&monitor->polled_netdevs, netdev_name);
if (node) {
+ /* Cancel future notifications. */
+ struct netdev_notifier *notifier = node->data;
+ netdev->class->poll_remove(notifier);
shash_delete(&monitor->polled_netdevs, node);
- node = shash_find(&monitor->changed_netdevs, netdev_get_name(netdev));
+
+ /* Drop any pending notification. */
+ node = shash_find(&monitor->changed_netdevs, netdev_name);
if (node) {
shash_delete(&monitor->changed_netdevs, node);
}
}
}
+/* Checks for changes to netdevs in the set monitored by 'monitor'. If any of
+ * the attributes (Ethernet address, carrier status, speed or peer-advertised
+ * speed, flags, etc.) of a network device monitored by 'monitor' has changed,
+ * sets '*devnamep' to the name of a device that has changed and returns 0.
+ * The caller is responsible for freeing '*devnamep' (with free()).
+ *
+ * If no devices have changed, sets '*devnamep' to NULL and returns EAGAIN.
+ */
int
netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep)
{
- int error = linux_netdev_notifier_get_error(&monitor->notifier);
- *devnamep = NULL;
- if (!error) {
- struct shash_node *node = shash_first(&monitor->changed_netdevs);
- if (!node) {
- return EAGAIN;
- }
+ struct shash_node *node = shash_first(&monitor->changed_netdevs);
+ if (!node) {
+ *devnamep = NULL;
+ return EAGAIN;
+ } else {
*devnamep = xstrdup(node->name);
shash_delete(&monitor->changed_netdevs, node);
- } else {
- shash_clear(&monitor->changed_netdevs);
+ return 0;
}
- return error;
}
+/* Registers with the poll loop to wake up from the next call to poll_block()
+ * when netdev_monitor_poll(monitor) would indicate that a device has
+ * changed. */
void
netdev_monitor_poll_wait(const struct netdev_monitor *monitor)
{
- if (!shash_is_empty(&monitor->changed_netdevs)
- || linux_netdev_notifier_peek_error(&monitor->notifier)) {
+ if (!shash_is_empty(&monitor->changed_netdevs)) {
poll_immediate_wake();
} else {
- linux_netdev_notifier_wait();
- }
-}
-
-static void
-netdev_monitor_change(const struct linux_netdev_change *change, void *monitor_)
-{
- struct netdev_monitor *monitor = monitor_;
- if (shash_find(&monitor->polled_netdevs, change->ifname)
- && !shash_find(&monitor->changed_netdevs, change->ifname)) {
- shash_add(&monitor->changed_netdevs, change->ifname, NULL);
+ /* XXX Nothing needed here for netdev_linux, but maybe other netdev
+ * classes need help. */
}
}
\f
-static void restore_all_flags(void *aux);
-
-/* Set up a signal hook to restore network device flags on program
- * termination. */
-static void
-init_netdev(void)
-{
- static bool inited;
- if (!inited) {
- int ifindex;
- int error;
-
- inited = true;
-
- fatal_signal_add_hook(restore_all_flags, NULL, true);
-
- af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (af_inet_sock < 0) {
- ovs_fatal(errno, "socket(AF_INET)");
- }
-
- error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock);
- if (error) {
- ovs_fatal(error, "socket(AF_NETLINK, NETLINK_ROUTE)");
- }
-
- /* Decide on the netdev_get_stats() implementation to use. Netlink is
- * preferable, so if that works, we'll use it. */
- ifindex = do_get_ifindex("lo");
- if (ifindex < 0) {
- VLOG_WARN("failed to get ifindex for lo, "
- "obtaining netdev stats from proc");
- use_netlink_stats = false;
- } else {
- struct netdev_stats stats;
- error = get_stats_via_netlink(ifindex, &stats);
- if (!error) {
- VLOG_DBG("obtaining netdev stats via rtnetlink");
- use_netlink_stats = true;
- } else {
- VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats "
- "via proc (you are probably running a pre-2.6.19 "
- "kernel)", strerror(error));
- use_netlink_stats = false;
- }
- }
- }
-}
-
/* Restore the network device flags on 'netdev' to those that were active
* before we changed them. Returns 0 if successful, otherwise a positive
* errno value.
static int
restore_flags(struct netdev *netdev)
{
- struct ifreq ifr;
- int restore_flags;
-
- /* Get current flags. */
- strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
- COVERAGE_INC(netdev_get_flags);
- if (ioctl(netdev->netdev_fd, SIOCGIFFLAGS, &ifr) < 0) {
- return errno;
- }
-
- /* Restore flags that we might have changed, if necessary. */
- restore_flags = netdev->changed_flags & (IFF_PROMISC | IFF_UP);
- if ((ifr.ifr_flags ^ netdev->save_flags) & restore_flags) {
- ifr.ifr_flags &= ~restore_flags;
- ifr.ifr_flags |= netdev->save_flags & restore_flags;
- COVERAGE_INC(netdev_set_flags);
- if (ioctl(netdev->netdev_fd, SIOCSIFFLAGS, &ifr) < 0) {
- return errno;
- }
+ if (netdev->changed_flags) {
+ enum netdev_flags restore = netdev->save_flags & netdev->changed_flags;
+ enum netdev_flags old_flags;
+ return netdev->class->update_flags(netdev,
+ netdev->changed_flags & ~restore,
+ restore, &old_flags);
}
-
return 0;
}
restore_flags(netdev);
}
}
-
-static int
-get_flags(const char *netdev_name, int *flags)
-{
- struct ifreq ifr;
- strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
- COVERAGE_INC(netdev_get_flags);
- if (ioctl(af_inet_sock, SIOCGIFFLAGS, &ifr) < 0) {
- VLOG_ERR("ioctl(SIOCGIFFLAGS) on %s device failed: %s",
- netdev_name, strerror(errno));
- return errno;
- }
- *flags = ifr.ifr_flags;
- return 0;
-}
-
-static int
-set_flags(const char *netdev_name, int flags)
-{
- struct ifreq ifr;
- strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
- ifr.ifr_flags = flags;
- COVERAGE_INC(netdev_set_flags);
- if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) < 0) {
- VLOG_ERR("ioctl(SIOCSIFFLAGS) on %s device failed: %s",
- netdev_name, strerror(errno));
- return errno;
- }
- return 0;
-}
-
-static int
-do_get_ifindex(const char *netdev_name)
-{
- struct ifreq ifr;
-
- strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
- COVERAGE_INC(netdev_get_ifindex);
- if (ioctl(af_inet_sock, SIOCGIFINDEX, &ifr) < 0) {
- VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s",
- netdev_name, strerror(errno));
- return -errno;
- }
- return ifr.ifr_ifindex;
-}
-
-static int
-get_ifindex(const struct netdev *netdev, int *ifindexp)
-{
- *ifindexp = 0;
- if (netdev->ifindex < 0) {
- int ifindex = do_get_ifindex(netdev->name);
- if (ifindex < 0) {
- return -ifindex;
- }
- ((struct netdev *) netdev)->ifindex = ifindex;
- }
- *ifindexp = netdev->ifindex;
- return 0;
-}
-
-static int
-get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN],
- int *hwaddr_familyp)
-{
- struct ifreq ifr;
-
- *hwaddr_familyp = 0;
- memset(&ifr, 0, sizeof ifr);
- strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
- COVERAGE_INC(netdev_get_hwaddr);
- if (ioctl(af_inet_sock, SIOCGIFHWADDR, &ifr) < 0) {
- VLOG_ERR("ioctl(SIOCGIFHWADDR) on %s device failed: %s",
- netdev_name, strerror(errno));
- return errno;
- }
- if (hwaddr_familyp) {
- int hwaddr_family = ifr.ifr_hwaddr.sa_family;
- *hwaddr_familyp = hwaddr_family;
- if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
- VLOG_WARN("%s device has unknown hardware address family %d",
- netdev_name, hwaddr_family);
- }
- }
- memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN);
- return 0;
-}
-
-static int
-set_etheraddr(const char *netdev_name, int hwaddr_family,
- const uint8_t mac[ETH_ADDR_LEN])
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof ifr);
- strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
- ifr.ifr_hwaddr.sa_family = hwaddr_family;
- memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ADDR_LEN);
- COVERAGE_INC(netdev_set_hwaddr);
- if (ioctl(af_inet_sock, SIOCSIFHWADDR, &ifr) < 0) {
- VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s",
- netdev_name, strerror(errno));
- return errno;
- }
- return 0;
-}
NETDEV_ETH_TYPE_802_2 /* Receive all IEEE 802.2 frames. */
};
+/* Network device statistics.
+ *
+ * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */
struct netdev_stats {
uint64_t rx_packets; /* Total packets received. */
uint64_t tx_packets; /* Total packets transmitted. */
struct netdev;
+int netdev_initialize(void);
+void netdev_run(void);
+void netdev_wait(void);
+
int netdev_open(const char *name, int ethertype, struct netdev **);
void netdev_close(struct netdev *);
-bool netdev_exists(const char *netdev_name);
+bool netdev_exists(const char *name);
+
+int netdev_enumerate(struct svec *);
+
+const char *netdev_get_name(const struct netdev *);
+int netdev_get_mtu(const struct netdev *, int *mtup);
int netdev_recv(struct netdev *, struct ofpbuf *);
void netdev_recv_wait(struct netdev *);
int netdev_drain(struct netdev *);
+
int netdev_send(struct netdev *, const struct ofpbuf *);
void netdev_send_wait(struct netdev *);
+
int netdev_set_etheraddr(struct netdev *, const uint8_t mac[6]);
int netdev_get_etheraddr(const struct netdev *, uint8_t mac[6]);
-const char *netdev_get_name(const struct netdev *);
-int netdev_get_mtu(const struct netdev *, int *mtup);
+
+int netdev_get_carrier(const struct netdev *, bool *carrier);
int netdev_get_features(struct netdev *,
uint32_t *current, uint32_t *advertised,
uint32_t *supported, uint32_t *peer);
int netdev_set_advertisements(struct netdev *, uint32_t advertise);
+
int netdev_get_in4(const struct netdev *, struct in_addr *);
int netdev_set_in4(struct netdev *, struct in_addr addr, struct in_addr mask);
+int netdev_get_in6(const struct netdev *, struct in6_addr *);
int netdev_add_router(struct netdev *, struct in_addr router);
-bool netdev_get_in6(const struct netdev *, struct in6_addr *);
+int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]);
+
int netdev_get_flags(const struct netdev *, enum netdev_flags *);
int netdev_set_flags(struct netdev *, enum netdev_flags, bool permanent);
int netdev_turn_flags_on(struct netdev *, enum netdev_flags, bool permanent);
int netdev_turn_flags_off(struct netdev *, enum netdev_flags, bool permanent);
-int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]);
-int netdev_get_carrier(const struct netdev *, bool *carrier);
+
int netdev_get_stats(const struct netdev *, struct netdev_stats *);
int netdev_set_policing(struct netdev *, uint32_t kbits_rate,
uint32_t kbits_burst);
-void netdev_enumerate(struct svec *);
-struct netdev *netdev_find_dev_by_in4(const struct in_addr *);
-
int netdev_get_vlan_vid(const struct netdev *, int *vlan_vid);
+struct netdev *netdev_find_dev_by_in4(const struct in_addr *);
-struct netdev_monitor;
-int netdev_monitor_create(struct netdev_monitor **);
+struct netdev_monitor *netdev_monitor_create(void);
void netdev_monitor_destroy(struct netdev_monitor *);
-void netdev_monitor_add(struct netdev_monitor *, struct netdev *);
+int netdev_monitor_add(struct netdev_monitor *, struct netdev *);
void netdev_monitor_remove(struct netdev_monitor *, struct netdev *);
int netdev_monitor_poll(struct netdev_monitor *, char **devnamep);
void netdev_monitor_poll_wait(const struct netdev_monitor *);
ofproto_create(const char *datapath, const struct ofhooks *ofhooks, void *aux,
struct ofproto **ofprotop)
{
- struct netdev_monitor *netdev_monitor;
struct odp_stats stats;
struct ofproto *p;
struct dpif *dpif;
dpif_flow_flush(dpif);
dpif_recv_purge(dpif);
- /* Arrange to monitor datapath ports for status changes. */
- error = netdev_monitor_create(&netdev_monitor);
- if (error) {
- VLOG_ERR("failed to starting monitoring datapath %s: %s",
- datapath, strerror(error));
- dpif_close(dpif);
- return error;
- }
-
/* Initialize settings. */
p = xcalloc(1, sizeof *p);
p->fallback_dpid = pick_fallback_dpid();
/* Initialize datapath. */
p->dpif = dpif;
- p->netdev_monitor = netdev_monitor;
+ p->netdev_monitor = netdev_monitor_create();
port_array_init(&p->ports);
shash_init(&p->port_by_name);
p->max_ports = stats.max_ports;
}
unixctl_server_run(unixctl);
dp_run();
+ netdev_run();
ofproto_wait(ofproto);
unixctl_server_wait(unixctl);
dp_wait();
+ netdev_wait();
poll_block();
}
#include "coverage.h"
#include "daemon.h"
#include "dirs.h"
-#include "dpif.h"
#include "dynamic-string.h"
#include "fatal-signal.h"
#include "fault.h"
for (;;) {
unixctl_server_run(unixctl);
brc_recv_update();
+ netdev_run();
/* If 'prune_timeout' is non-zero, we actively prune from the
* config file any 'bridge.<br_name>.port' entries that are no
nl_sock_wait(brc_sock, POLLIN);
unixctl_server_wait(unixctl);
+ netdev_wait();
poll_block();
}
#include "fault.h"
#include "leak-checker.h"
#include "mgmt.h"
+#include "netdev.h"
#include "ovs-vswitchd.h"
#include "poll-loop.h"
#include "proc-net-compat.h"
}
unixctl_server_run(unixctl);
dp_run();
+ netdev_run();
if (need_reconfigure) {
poll_immediate_wake();
bridge_wait();
unixctl_server_wait(unixctl);
dp_wait();
+ netdev_wait();
poll_block();
}