From 8b61709d5ec6c4ef58a04fcaefde617ff63fa10d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 30 Jul 2009 16:04:45 -0700 Subject: [PATCH] netdev: Implement an abstract interface to network devices. This new abstraction layer allows multiple implementations of network devices in a single running process. This will be useful, for example, to support network devices that are simulated entirely in the running process or that communicate with other processes over Unix domain sockets, etc. The reimplemented tap device support in this commit has not been tested. --- lib/dpif-linux.c | 58 +- lib/dpif.c | 8 +- lib/netdev-linux.c | 1620 ++++++++++++++++++++++++++++++++++++- lib/netdev-linux.h | 3 - lib/netdev-provider.h | 284 +++++++ lib/netdev.c | 1606 +++++++++--------------------------- lib/netdev.h | 37 +- ofproto/ofproto.c | 12 +- utilities/ovs-openflowd.c | 2 + vswitchd/ovs-brcompatd.c | 3 +- vswitchd/ovs-vswitchd.c | 3 + 11 files changed, 2306 insertions(+), 1330 deletions(-) create mode 100644 lib/netdev-provider.h diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 0973b5d9..e075c8b0 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -53,6 +53,7 @@ struct dpif_linux { int local_ifindex; /* Ifindex of local port. */ struct svec changed_ports; /* Ports that have changed. */ struct linux_netdev_notifier port_notifier; + bool change_error; }; static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); @@ -73,18 +74,6 @@ dpif_linux_cast(const struct dpif *dpif) return CONTAINER_OF(dpif, struct dpif_linux, dpif); } -static void -dpif_linux_run(void) -{ - linux_netdev_notifier_run(); -} - -static void -dpif_linux_wait(void) -{ - linux_netdev_notifier_wait(); -} - static int dpif_linux_enumerate(struct svec *all_dps) { @@ -285,26 +274,24 @@ static int dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - int error; - error = linux_netdev_notifier_get_error(&dpif->port_notifier); - if (!error) { - if (!dpif->changed_ports.n) { - return EAGAIN; - } + if (dpif->change_error) { + dpif->change_error = false; + svec_clear(&dpif->changed_ports); + return ENOBUFS; + } else if (dpif->changed_ports.n) { *devnamep = dpif->changed_ports.names[--dpif->changed_ports.n]; + return 0; } else { - svec_clear(&dpif->changed_ports); + return EAGAIN; } - return error; } static void dpif_linux_port_poll_wait(const struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - if (dpif->changed_ports.n - || linux_netdev_notifier_peek_error(&dpif->port_notifier)) { + if (dpif->changed_ports.n || dpif->change_error) { poll_immediate_wake(); } else { linux_netdev_notifier_wait(); @@ -452,8 +439,8 @@ dpif_linux_recv_wait(struct dpif *dpif_) const struct dpif_class dpif_linux_class = { "", /* This is the default class. */ "linux", - dpif_linux_run, - dpif_linux_wait, + NULL, + NULL, dpif_linux_enumerate, dpif_linux_open, dpif_linux_close, @@ -725,6 +712,7 @@ open_minor(int minor, struct dpif **dpifp) dpif->minor = minor; dpif->local_ifindex = 0; svec_init(&dpif->changed_ports); + dpif->change_error = false; *dpifp = &dpif->dpif; } else { free(dpif); @@ -743,15 +731,19 @@ dpif_linux_port_changed(const struct linux_netdev_change *change, void *dpif_) { struct dpif_linux *dpif = dpif_; - if (change->master_ifindex == dpif->local_ifindex - && (change->nlmsg_type == RTM_NEWLINK - || change->nlmsg_type == RTM_DELLINK)) - { - /* Our datapath changed, either adding a new port or deleting an - * existing one. */ - if (!svec_contains(&dpif->changed_ports, change->ifname)) { - svec_add(&dpif->changed_ports, change->ifname); - svec_sort(&dpif->changed_ports); + if (change) { + if (change->master_ifindex == dpif->local_ifindex + && (change->nlmsg_type == RTM_NEWLINK + || change->nlmsg_type == RTM_DELLINK)) + { + /* Our datapath changed, either adding a new port or deleting an + * existing one. */ + if (!svec_contains(&dpif->changed_ports, change->ifname)) { + svec_add(&dpif->changed_ports, change->ifname); + svec_sort(&dpif->changed_ports); + } } + } else { + dpif->change_error = true; } } diff --git a/lib/dpif.c b/lib/dpif.c index 73fa4b98..14b424e6 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -65,8 +65,8 @@ static void check_rw_odp_flow(struct odp_flow *); /* Performs periodic work needed by all the various kinds of dpifs. * - * If your program opens any dpifs, it must call this function within its main - * poll loop. */ + * If your program opens any dpifs, it must call both this function and + * netdev_run() within its main poll loop. */ void dp_run(void) { @@ -81,8 +81,8 @@ dp_run(void) /* Arranges for poll_block() to wake up when dp_run() needs to be called. * - * If your program opens any dpifs, it must call this function within its main - * poll loop. */ + * If your program opens any dpifs, it must call both this function and + * netdev_wait() within its main poll loop. */ void dp_wait(void) { diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index c753e28a..6189bf79 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -18,20 +18,1584 @@ #include "netdev-linux.h" +#include #include -#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include "coverage.h" +#include "dynamic-string.h" +#include "fatal-signal.h" +#include "netdev-provider.h" #include "netlink.h" #include "ofpbuf.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "poll-loop.h" +#include "socket-util.h" +#include "shash.h" +#include "svec.h" #define THIS_MODULE VLM_netdev_linux #include "vlog.h" + +/* These were introduced in Linux 2.6.14, so they might be missing if we have + * old headers. */ +#ifndef ADVERTISED_Pause +#define ADVERTISED_Pause (1 << 13) +#endif +#ifndef ADVERTISED_Asym_Pause +#define ADVERTISED_Asym_Pause (1 << 14) +#endif + +struct netdev_linux { + struct netdev netdev; + + /* File descriptors. For ordinary network devices, the two fds below are + * the same; for tap devices, they differ. */ + int netdev_fd; /* Network device. */ + int tap_fd; /* TAP character device, if any, otherwise the + * network device. */ + + struct netdev_linux_cache *cache; +}; + +enum { + VALID_IFINDEX = 1 << 0, + VALID_ETHERADDR = 1 << 1, + VALID_IN4 = 1 << 2, + VALID_IN6 = 1 << 3, + VALID_MTU = 1 << 4, + VALID_CARRIER = 1 << 5 +}; + +/* Cached network device information. */ +struct netdev_linux_cache { + struct shash_node *shash_node; + unsigned int valid; + int ref_cnt; + + int ifindex; + uint8_t etheraddr[ETH_ADDR_LEN]; + struct in_addr in4; + struct in6_addr in6; + int mtu; + int carrier; +}; + +static struct shash cache_map = SHASH_INITIALIZER(&cache_map); +static struct linux_netdev_notifier netdev_linux_cache_notifier; + +/* Policy for RTNLGRP_LINK messages. + * + * There are *many* more fields in these messages, but currently we only care + * about interface names. */ +static const struct nl_policy rtnlgrp_link_policy[] = { + [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, + [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, + [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(struct rtnl_link_stats) }, +}; + +/* An AF_INET socket (used for ioctl operations). */ +static int af_inet_sock = -1; + +struct netdev_linux_notifier { + struct netdev_notifier notifier; + struct list node; +}; + +static struct shash netdev_linux_notifiers = + SHASH_INITIALIZER(&netdev_linux_notifiers); +static struct linux_netdev_notifier netdev_linux_poll_notifier; + +/* This is set pretty low because we probably won't learn anything from the + * additional log messages. */ +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + +static int netdev_linux_do_ethtool(struct netdev *, struct ethtool_cmd *, + int cmd, const char *cmd_name); +static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *, + int cmd, const char *cmd_name); +static int get_flags(const struct netdev *, int *flagsp); +static int set_flags(struct netdev *, int flags); +static int do_get_ifindex(const char *netdev_name); +static int get_ifindex(const struct netdev *, int *ifindexp); +static int do_set_addr(struct netdev *netdev, + int ioctl_nr, const char *ioctl_name, + struct in_addr addr); +static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]); +static int set_etheraddr(const char *netdev_name, int hwaddr_family, + const uint8_t[ETH_ADDR_LEN]); +static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats); +static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats); + +static struct netdev_linux * +netdev_linux_cast(const struct netdev *netdev) +{ + netdev_assert_class(netdev, &netdev_linux_class); + return CONTAINER_OF(netdev, struct netdev_linux, netdev); +} + +static int +netdev_linux_init(void) +{ + static int status = -1; + if (status < 0) { + af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0); + status = af_inet_sock >= 0 ? 0 : errno; + if (status) { + VLOG_ERR("failed to create inet socket: %s", strerror(status)); + } + } + return status; +} + +static void +netdev_linux_run(void) +{ + linux_netdev_notifier_run(); +} + +static void +netdev_linux_wait(void) +{ + linux_netdev_notifier_wait(); +} + +static void +netdev_linux_cache_cb(const struct linux_netdev_change *change, + void *aux UNUSED) +{ + struct netdev_linux_cache *cache; + if (change) { + cache = shash_find_data(&cache_map, change->ifname); + if (cache) { + cache->valid = 0; + } + } else { + struct shash_node *node; + SHASH_FOR_EACH (node, &cache_map) { + cache = node->data; + cache->valid = 0; + } + } +} + +static int +netdev_linux_open(const char *name, char *suffix, int ethertype, + struct netdev **netdevp) +{ + struct netdev_linux *netdev; + enum netdev_flags flags; + int error; + + /* Allocate network device. */ + netdev = xcalloc(1, sizeof *netdev); + netdev_init(&netdev->netdev, suffix, &netdev_linux_class); + netdev->netdev_fd = -1; + netdev->tap_fd = -1; + netdev->cache = shash_find_data(&cache_map, suffix); + if (!netdev->cache) { + if (shash_is_empty(&cache_map)) { + int error = linux_netdev_notifier_register( + &netdev_linux_cache_notifier, netdev_linux_cache_cb, NULL); + if (error) { + netdev_close(&netdev->netdev); + return error; + } + } + netdev->cache = xmalloc(sizeof *netdev->cache); + netdev->cache->shash_node = shash_add(&cache_map, suffix, + netdev->cache); + netdev->cache->valid = 0; + netdev->cache->ref_cnt = 0; + } + netdev->cache->ref_cnt++; + + if (!strncmp(name, "tap:", 4)) { + static const char tap_dev[] = "/dev/net/tun"; + struct ifreq ifr; + + /* Open tap device. */ + netdev->tap_fd = open(tap_dev, O_RDWR); + if (netdev->tap_fd < 0) { + error = errno; + VLOG_WARN("opening \"%s\" failed: %s", tap_dev, strerror(error)); + goto error; + } + + /* Create tap device. */ + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + error = netdev_linux_do_ioctl(&netdev->netdev, &ifr, + TUNSETIFF, "TUNSETIFF"); + if (error) { + goto error; + } + + /* Make non-blocking. */ + error = set_nonblocking(netdev->tap_fd); + if (error) { + goto error; + } + } + + error = netdev_get_flags(&netdev->netdev, &flags); + if (error == ENODEV) { + goto error; + } + + if (netdev->tap_fd >= 0 || ethertype != NETDEV_ETH_TYPE_NONE) { + struct sockaddr_ll sll; + int protocol; + int ifindex; + + /* Create file descriptor. */ + protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL + : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2 + : ethertype); + netdev->netdev_fd = socket(PF_PACKET, SOCK_RAW, htons(protocol)); + if (netdev->netdev_fd < 0) { + error = errno; + goto error; + } + if (netdev->tap_fd < 0) { + netdev->tap_fd = netdev->netdev_fd; + } + + /* Set non-blocking mode. */ + error = set_nonblocking(netdev->netdev_fd); + if (error) { + goto error; + } + + /* Get ethernet device index. */ + error = get_ifindex(&netdev->netdev, &ifindex); + if (error) { + goto error; + } + + /* Bind to specific ethernet device. */ + memset(&sll, 0, sizeof sll); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = ifindex; + if (bind(netdev->netdev_fd, + (struct sockaddr *) &sll, sizeof sll) < 0) { + error = errno; + VLOG_ERR("bind to %s failed: %s", suffix, strerror(error)); + goto error; + } + + /* Between the socket() and bind() calls above, the socket receives all + * packets of the requested type on all system interfaces. We do not + * want to receive that data, but there is no way to avoid it. So we + * must now drain out the receive queue. */ + error = drain_rcvbuf(netdev->netdev_fd); + if (error) { + goto error; + } + } + + *netdevp = &netdev->netdev; + return 0; + +error: + netdev_close(&netdev->netdev); + return error; +} + +/* Closes and destroys 'netdev'. */ +static void +netdev_linux_close(struct netdev *netdev_) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + + if (netdev->cache && !--netdev->cache->ref_cnt) { + shash_delete(&cache_map, netdev->cache->shash_node); + free(netdev->cache); + + if (shash_is_empty(&cache_map)) { + linux_netdev_notifier_unregister(&netdev_linux_cache_notifier); + } + } + if (netdev->netdev_fd >= 0) { + close(netdev->netdev_fd); + } + if (netdev->tap_fd >= 0 && netdev->netdev_fd != netdev->tap_fd) { + close(netdev->tap_fd); + } + free(netdev); +} +/* Initializes 'svec' with a list of the names of all known network devices. */ +static int +netdev_linux_enumerate(struct svec *svec) +{ + struct if_nameindex *names; + + names = if_nameindex(); + if (names) { + size_t i; + + for (i = 0; names[i].if_name != NULL; i++) { + svec_add(svec, names[i].if_name); + } + if_freenameindex(names); + return 0; + } else { + VLOG_WARN("could not obtain list of network device names: %s", + strerror(errno)); + return errno; + } +} + +static int +netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + + if (netdev->tap_fd < 0) { + /* Device was opened with NETDEV_ETH_TYPE_NONE. */ + return EAGAIN; + } + + for (;;) { + ssize_t retval = read(netdev->tap_fd, data, size); + if (retval >= 0) { + return retval; + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", + strerror(errno), netdev_get_name(netdev_)); + } + return errno; + } + } +} + +/* Registers with the poll loop to wake up from the next call to poll_block() + * when a packet is ready to be received with netdev_recv() on 'netdev'. */ +static void +netdev_linux_recv_wait(struct netdev *netdev_) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (netdev->tap_fd >= 0) { + poll_fd_wait(netdev->tap_fd, POLLIN); + } +} + +/* Discards all packets waiting to be received from 'netdev'. */ +static int +netdev_linux_drain(struct netdev *netdev_) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) { + return 0; + } else if (netdev->tap_fd != netdev->netdev_fd) { + struct ifreq ifr; + int error = netdev_linux_do_ioctl(netdev_, &ifr, + SIOCGIFTXQLEN, "SIOCGIFTXQLEN"); + if (error) { + return error; + } + drain_fd(netdev->tap_fd, ifr.ifr_qlen); + return 0; + } else { + return drain_rcvbuf(netdev->netdev_fd); + } +} + +/* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive + * errno value. Returns EAGAIN without blocking if the packet cannot be queued + * immediately. Returns EMSGSIZE if a partial packet was transmitted or if + * the packet is too big or too small to transmit on the device. + * + * The caller retains ownership of 'buffer' in all cases. + * + * The kernel maintains a packet transmission queue, so the caller is not + * expected to do additional queuing of packets. */ +static int +netdev_linux_send(struct netdev *netdev_, const void *data, size_t size) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + + /* XXX should support sending even if 'ethertype' was NETDEV_ETH_TYPE_NONE. + */ + if (netdev->tap_fd < 0) { + return EPIPE; + } + + for (;;) { + ssize_t retval = write(netdev->tap_fd, data, size); + if (retval < 0) { + /* The Linux AF_PACKET implementation never blocks waiting for room + * for packets, instead returning ENOBUFS. Translate this into + * EAGAIN for the caller. */ + if (errno == ENOBUFS) { + return EAGAIN; + } else if (errno == EINTR) { + continue; + } else if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), strerror(errno)); + } + return errno; + } else if (retval != size) { + VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of " + "%zu) on %s", retval, size, netdev_get_name(netdev_)); + return EMSGSIZE; + } else { + return 0; + } + } +} + +/* Registers with the poll loop to wake up from the next call to poll_block() + * when the packet transmission queue has sufficient room to transmit a packet + * with netdev_send(). + * + * The kernel maintains a packet transmission queue, so the client is not + * expected to do additional queuing of packets. Thus, this function is + * unlikely to ever be used. It is included for completeness. */ +static void +netdev_linux_send_wait(struct netdev *netdev_) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) { + /* Nothing to do. */ + } else if (netdev->tap_fd == netdev->netdev_fd) { + poll_fd_wait(netdev->tap_fd, POLLOUT); + } else { + /* TAP device always accepts packets.*/ + poll_immediate_wake(); + } +} + +/* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, + * otherwise a positive errno value. */ +static int +netdev_linux_set_etheraddr(struct netdev *netdev_, + const uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + int error = set_etheraddr(netdev_get_name(netdev_), ARPHRD_ETHER, mac); + if (!error) { + memcpy(netdev->cache->etheraddr, mac, ETH_ADDR_LEN); + } + return error; +} + +/* Returns a pointer to 'netdev''s MAC address. The caller must not modify or + * free the returned buffer. */ +static int +netdev_linux_get_etheraddr(const struct netdev *netdev_, + uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (!(netdev->cache->valid & VALID_ETHERADDR)) { + int error = get_etheraddr(netdev_get_name(netdev_), + netdev->cache->etheraddr); + if (error) { + return error; + } + netdev->cache->valid |= VALID_ETHERADDR; + } + memcpy(mac, netdev->cache->etheraddr, ETH_ADDR_LEN); + return 0; +} + +/* Returns the maximum size of transmitted (and received) packets on 'netdev', + * in bytes, not including the hardware header; thus, this is typically 1500 + * bytes for Ethernet devices. */ +static int +netdev_linux_get_mtu(const struct netdev *netdev_, int *mtup) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (!(netdev->cache->valid & VALID_MTU)) { + struct ifreq ifr; + int error; + + error = netdev_linux_do_ioctl(netdev_, &ifr, SIOCGIFMTU, "SIOCGIFMTU"); + if (error) { + return error; + } + netdev->cache->mtu = ifr.ifr_mtu; + netdev->cache->valid |= VALID_MTU; + } + *mtup = netdev->cache->mtu; + return 0; +} + +static int +netdev_linux_get_carrier(const struct netdev *netdev_, bool *carrier) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + int error = 0; + char *fn = NULL; + int fd = -1; + + if (!(netdev->cache->valid & VALID_CARRIER)) { + char line[8]; + int retval; + + fn = xasprintf("/sys/class/net/%s/carrier", netdev_get_name(netdev_)); + fd = open(fn, O_RDONLY); + if (fd < 0) { + error = errno; + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(error)); + goto exit; + } + + retval = read(fd, line, sizeof line); + if (retval < 0) { + error = errno; + if (error == EINVAL) { + /* This is the normal return value when we try to check carrier + * if the network device is not up. */ + } else { + VLOG_WARN_RL(&rl, "%s: read failed: %s", fn, strerror(error)); + } + goto exit; + } else if (retval == 0) { + error = EPROTO; + VLOG_WARN_RL(&rl, "%s: unexpected end of file", fn); + goto exit; + } + + if (line[0] != '0' && line[0] != '1') { + error = EPROTO; + VLOG_WARN_RL(&rl, "%s: value is %c (expected 0 or 1)", + fn, line[0]); + goto exit; + } + netdev->cache->carrier = line[0] != '0'; + netdev->cache->valid |= VALID_CARRIER; + } + *carrier = netdev->cache->carrier; + error = 0; + +exit: + if (fd >= 0) { + close(fd); + } + free(fn); + return error; +} + +/* Check whether we can we use RTM_GETLINK to get network device statistics. + * In pre-2.6.19 kernels, this was only available if wireless extensions were + * enabled. */ +static bool +check_for_working_netlink_stats(void) +{ + /* Decide on the netdev_get_stats() implementation to use. Netlink is + * preferable, so if that works, we'll use it. */ + int ifindex = do_get_ifindex("lo"); + if (ifindex < 0) { + VLOG_WARN("failed to get ifindex for lo, " + "obtaining netdev stats from proc"); + return false; + } else { + struct netdev_stats stats; + int error = get_stats_via_netlink(ifindex, &stats); + if (!error) { + VLOG_DBG("obtaining netdev stats via rtnetlink"); + return true; + } else { + VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats " + "via proc (you are probably running a pre-2.6.19 " + "kernel)", strerror(error)); + return false; + } + } +} + +/* Retrieves current device stats for 'netdev'. + * + * XXX All of the members of struct netdev_stats are 64 bits wide, but on + * 32-bit architectures the Linux network stats are only 32 bits. */ +static int +netdev_linux_get_stats(const struct netdev *netdev, struct netdev_stats *stats) +{ + static int use_netlink_stats = -1; + int error; + + COVERAGE_INC(netdev_get_stats); + if (use_netlink_stats < 0) { + use_netlink_stats = check_for_working_netlink_stats(); + } + if (use_netlink_stats) { + int ifindex; + + error = get_ifindex(netdev, &ifindex); + if (!error) { + error = get_stats_via_netlink(ifindex, stats); + } + } else { + error = get_stats_via_proc(netdev->name, stats); + } + return error; +} + +/* Stores the features supported by 'netdev' into each of '*current', + * '*advertised', '*supported', and '*peer' that are non-null. Each value is a + * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if + * successful, otherwise a positive errno value. On failure, all of the + * passed-in values are set to 0. */ +static int +netdev_linux_get_features(struct netdev *netdev, + uint32_t *current, uint32_t *advertised, + uint32_t *supported, uint32_t *peer) +{ + struct ethtool_cmd ecmd; + int error; + + memset(&ecmd, 0, sizeof ecmd); + error = netdev_linux_do_ethtool(netdev, &ecmd, + ETHTOOL_GSET, "ETHTOOL_GSET"); + if (error) { + return error; + } + + /* Supported features. */ + *supported = 0; + if (ecmd.supported & SUPPORTED_10baseT_Half) { + *supported |= OFPPF_10MB_HD; + } + if (ecmd.supported & SUPPORTED_10baseT_Full) { + *supported |= OFPPF_10MB_FD; + } + if (ecmd.supported & SUPPORTED_100baseT_Half) { + *supported |= OFPPF_100MB_HD; + } + if (ecmd.supported & SUPPORTED_100baseT_Full) { + *supported |= OFPPF_100MB_FD; + } + if (ecmd.supported & SUPPORTED_1000baseT_Half) { + *supported |= OFPPF_1GB_HD; + } + if (ecmd.supported & SUPPORTED_1000baseT_Full) { + *supported |= OFPPF_1GB_FD; + } + if (ecmd.supported & SUPPORTED_10000baseT_Full) { + *supported |= OFPPF_10GB_FD; + } + if (ecmd.supported & SUPPORTED_TP) { + *supported |= OFPPF_COPPER; + } + if (ecmd.supported & SUPPORTED_FIBRE) { + *supported |= OFPPF_FIBER; + } + if (ecmd.supported & SUPPORTED_Autoneg) { + *supported |= OFPPF_AUTONEG; + } + if (ecmd.supported & SUPPORTED_Pause) { + *supported |= OFPPF_PAUSE; + } + if (ecmd.supported & SUPPORTED_Asym_Pause) { + *supported |= OFPPF_PAUSE_ASYM; + } + + /* Advertised features. */ + *advertised = 0; + if (ecmd.advertising & ADVERTISED_10baseT_Half) { + *advertised |= OFPPF_10MB_HD; + } + if (ecmd.advertising & ADVERTISED_10baseT_Full) { + *advertised |= OFPPF_10MB_FD; + } + if (ecmd.advertising & ADVERTISED_100baseT_Half) { + *advertised |= OFPPF_100MB_HD; + } + if (ecmd.advertising & ADVERTISED_100baseT_Full) { + *advertised |= OFPPF_100MB_FD; + } + if (ecmd.advertising & ADVERTISED_1000baseT_Half) { + *advertised |= OFPPF_1GB_HD; + } + if (ecmd.advertising & ADVERTISED_1000baseT_Full) { + *advertised |= OFPPF_1GB_FD; + } + if (ecmd.advertising & ADVERTISED_10000baseT_Full) { + *advertised |= OFPPF_10GB_FD; + } + if (ecmd.advertising & ADVERTISED_TP) { + *advertised |= OFPPF_COPPER; + } + if (ecmd.advertising & ADVERTISED_FIBRE) { + *advertised |= OFPPF_FIBER; + } + if (ecmd.advertising & ADVERTISED_Autoneg) { + *advertised |= OFPPF_AUTONEG; + } + if (ecmd.advertising & ADVERTISED_Pause) { + *advertised |= OFPPF_PAUSE; + } + if (ecmd.advertising & ADVERTISED_Asym_Pause) { + *advertised |= OFPPF_PAUSE_ASYM; + } + + /* Current settings. */ + if (ecmd.speed == SPEED_10) { + *current = ecmd.duplex ? OFPPF_10MB_FD : OFPPF_10MB_HD; + } else if (ecmd.speed == SPEED_100) { + *current = ecmd.duplex ? OFPPF_100MB_FD : OFPPF_100MB_HD; + } else if (ecmd.speed == SPEED_1000) { + *current = ecmd.duplex ? OFPPF_1GB_FD : OFPPF_1GB_HD; + } else if (ecmd.speed == SPEED_10000) { + *current = OFPPF_10GB_FD; + } else { + *current = 0; + } + + if (ecmd.port == PORT_TP) { + *current |= OFPPF_COPPER; + } else if (ecmd.port == PORT_FIBRE) { + *current |= OFPPF_FIBER; + } + + if (ecmd.autoneg) { + *current |= OFPPF_AUTONEG; + } + + /* Peer advertisements. */ + *peer = 0; /* XXX */ + + return 0; +} + +/* Set the features advertised by 'netdev' to 'advertise'. */ +static int +netdev_linux_set_advertisements(struct netdev *netdev, uint32_t advertise) +{ + struct ethtool_cmd ecmd; + int error; + + memset(&ecmd, 0, sizeof ecmd); + error = netdev_linux_do_ethtool(netdev, &ecmd, + ETHTOOL_GSET, "ETHTOOL_GSET"); + if (error) { + return error; + } + + ecmd.advertising = 0; + if (advertise & OFPPF_10MB_HD) { + ecmd.advertising |= ADVERTISED_10baseT_Half; + } + if (advertise & OFPPF_10MB_FD) { + ecmd.advertising |= ADVERTISED_10baseT_Full; + } + if (advertise & OFPPF_100MB_HD) { + ecmd.advertising |= ADVERTISED_100baseT_Half; + } + if (advertise & OFPPF_100MB_FD) { + ecmd.advertising |= ADVERTISED_100baseT_Full; + } + if (advertise & OFPPF_1GB_HD) { + ecmd.advertising |= ADVERTISED_1000baseT_Half; + } + if (advertise & OFPPF_1GB_FD) { + ecmd.advertising |= ADVERTISED_1000baseT_Full; + } + if (advertise & OFPPF_10GB_FD) { + ecmd.advertising |= ADVERTISED_10000baseT_Full; + } + if (advertise & OFPPF_COPPER) { + ecmd.advertising |= ADVERTISED_TP; + } + if (advertise & OFPPF_FIBER) { + ecmd.advertising |= ADVERTISED_FIBRE; + } + if (advertise & OFPPF_AUTONEG) { + ecmd.advertising |= ADVERTISED_Autoneg; + } + if (advertise & OFPPF_PAUSE) { + ecmd.advertising |= ADVERTISED_Pause; + } + if (advertise & OFPPF_PAUSE_ASYM) { + ecmd.advertising |= ADVERTISED_Asym_Pause; + } + return netdev_linux_do_ethtool(netdev, &ecmd, + ETHTOOL_SSET, "ETHTOOL_SSET"); +} + +/* If 'netdev_name' is the name of a VLAN network device (e.g. one created with + * vconfig(8)), sets '*vlan_vid' to the VLAN VID associated with that device + * and returns 0. Otherwise returns a errno value (specifically ENOENT if + * 'netdev_name' is the name of a network device that is not a VLAN device) and + * sets '*vlan_vid' to -1. */ +static int +netdev_linux_get_vlan_vid(const struct netdev *netdev, int *vlan_vid) +{ + const char *netdev_name = netdev_get_name(netdev); + struct ds line = DS_EMPTY_INITIALIZER; + FILE *stream = NULL; + int error; + char *fn; + + COVERAGE_INC(netdev_get_vlan_vid); + fn = xasprintf("/proc/net/vlan/%s", netdev_name); + stream = fopen(fn, "r"); + if (!stream) { + error = errno; + goto done; + } + + if (ds_get_line(&line, stream)) { + if (ferror(stream)) { + error = errno; + VLOG_ERR_RL(&rl, "error reading \"%s\": %s", fn, strerror(errno)); + } else { + error = EPROTO; + VLOG_ERR_RL(&rl, "unexpected end of file reading \"%s\"", fn); + } + goto done; + } + + if (!sscanf(ds_cstr(&line), "%*s VID: %d", vlan_vid)) { + error = EPROTO; + VLOG_ERR_RL(&rl, "parse error reading \"%s\" line 1: \"%s\"", + fn, ds_cstr(&line)); + goto done; + } + + error = 0; + +done: + free(fn); + if (stream) { + fclose(stream); + } + ds_destroy(&line); + if (error) { + *vlan_vid = -1; + } + return error; +} + +#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress" +#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1" +/* We redirect stderr to /dev/null because we often want to remove all + * traffic control configuration on a port so its in a known state. If + * this done when there is no such configuration, tc complains, so we just + * always ignore it. + */ +#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null" + +/* Attempts to set input rate limiting (policing) policy. */ +static int +netdev_linux_set_policing(struct netdev *netdev, + uint32_t kbits_rate, uint32_t kbits_burst) +{ + const char *netdev_name = netdev_get_name(netdev); + char command[1024]; + + COVERAGE_INC(netdev_set_policing); + if (kbits_rate) { + if (!kbits_burst) { + /* Default to 10 kilobits if not specified. */ + kbits_burst = 10; + } + + /* xxx This should be more careful about only adding if it + * xxx actually exists, as opposed to always deleting it. */ + snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); + if (system(command) == -1) { + VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); + } + + snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name); + if (system(command) != 0) { + VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name); + return -1; + } + + snprintf(command, sizeof(command), POLICE_CONFIG_CMD, netdev_name, + kbits_rate, kbits_burst); + if (system(command) != 0) { + VLOG_WARN_RL(&rl, "%s: problem configuring policing", + netdev_name); + return -1; + } + } else { + snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); + if (system(command) == -1) { + VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); + } + } + + return 0; +} + +/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if + * 'in4' is non-null) and returns true. Otherwise, returns false. */ +static int +netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (!(netdev->cache->valid & VALID_IN4)) { + const struct sockaddr_in *sin; + struct ifreq ifr; + int error; + + ifr.ifr_addr.sa_family = AF_INET; + error = netdev_linux_do_ioctl(netdev_, &ifr, + SIOCGIFADDR, "SIOCGIFADDR"); + if (error) { + return error; + } + + sin = (struct sockaddr_in *) &ifr.ifr_addr; + netdev->cache->in4 = sin->sin_addr; + netdev->cache->valid |= VALID_IN4; + } + *in4 = netdev->cache->in4; + return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; +} + +/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If + * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a + * positive errno value. */ +static int +netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr, + struct in_addr mask) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + int error; + + error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr); + if (!error) { + netdev->cache->valid |= VALID_IN4; + netdev->cache->in4 = addr; + if (addr.s_addr != INADDR_ANY) { + error = do_set_addr(netdev_, SIOCSIFNETMASK, + "SIOCSIFNETMASK", mask); + } + } + return error; +} + +static bool +parse_if_inet6_line(const char *line, + struct in6_addr *in6, char ifname[16 + 1]) +{ + uint8_t *s6 = in6->s6_addr; +#define X8 "%2"SCNx8 + return sscanf(line, + " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 + "%*x %*x %*x %*x %16s\n", + &s6[0], &s6[1], &s6[2], &s6[3], + &s6[4], &s6[5], &s6[6], &s6[7], + &s6[8], &s6[9], &s6[10], &s6[11], + &s6[12], &s6[13], &s6[14], &s6[15], + ifname) == 17; +} + +/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if + * 'in6' is non-null) and returns true. Otherwise, returns false. */ +static int +netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + if (!(netdev->cache->valid & VALID_IN6)) { + FILE *file; + char line[128]; + + netdev->cache->in6 = in6addr_any; + + file = fopen("/proc/net/if_inet6", "r"); + if (file != NULL) { + const char *name = netdev_get_name(netdev_); + while (fgets(line, sizeof line, file)) { + struct in6_addr in6; + char ifname[16 + 1]; + if (parse_if_inet6_line(line, &in6, ifname) + && !strcmp(name, ifname)) + { + netdev->cache->in6 = in6; + break; + } + } + fclose(file); + } + netdev->cache->valid |= VALID_IN6; + } + *in6 = netdev->cache->in6; + return 0; +} + +static void +make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr) +{ + struct sockaddr_in sin; + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr = addr; + sin.sin_port = 0; + + memset(sa, 0, sizeof *sa); + memcpy(sa, &sin, sizeof sin); +} + +static int +do_set_addr(struct netdev *netdev, + int ioctl_nr, const char *ioctl_name, struct in_addr addr) +{ + struct ifreq ifr; + strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); + make_in4_sockaddr(&ifr.ifr_addr, addr); + return netdev_linux_do_ioctl(netdev, &ifr, ioctl_nr, ioctl_name); +} + +/* Adds 'router' as a default IP gateway. */ +static int +netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router) +{ + struct in_addr any = { INADDR_ANY }; + struct rtentry rt; + int error; + + memset(&rt, 0, sizeof rt); + make_in4_sockaddr(&rt.rt_dst, any); + make_in4_sockaddr(&rt.rt_gateway, router); + make_in4_sockaddr(&rt.rt_genmask, any); + rt.rt_flags = RTF_UP | RTF_GATEWAY; + COVERAGE_INC(netdev_add_router); + error = ioctl(af_inet_sock, SIOCADDRT, &rt) < 0 ? errno : 0; + if (error) { + VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error)); + } + return error; +} + +/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be + * successfully retrieved, it stores the corresponding MAC address in 'mac' and + * returns 0. Otherwise, it returns a positive errno value; in particular, + * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */ +static int +netdev_linux_arp_lookup(const struct netdev *netdev, + uint32_t ip, uint8_t mac[ETH_ADDR_LEN]) +{ + struct arpreq r; + struct sockaddr_in *pa; + int retval; + + memset(&r, 0, sizeof r); + pa = (struct sockaddr_in *) &r.arp_pa; + pa->sin_family = AF_INET; + pa->sin_addr.s_addr = ip; + pa->sin_port = 0; + r.arp_ha.sa_family = ARPHRD_ETHER; + r.arp_flags = 0; + strncpy(r.arp_dev, netdev->name, sizeof r.arp_dev); + COVERAGE_INC(netdev_arp_lookup); + retval = ioctl(af_inet_sock, SIOCGARP, &r) < 0 ? errno : 0; + if (!retval) { + memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN); + } else if (retval != ENXIO) { + VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s", + netdev->name, IP_ARGS(&ip), strerror(retval)); + } + return retval; +} + +static int +nd_to_iff_flags(enum netdev_flags nd) +{ + int iff = 0; + if (nd & NETDEV_UP) { + iff |= IFF_UP; + } + if (nd & NETDEV_PROMISC) { + iff |= IFF_PROMISC; + } + return iff; +} + +static int +iff_to_nd_flags(int iff) +{ + enum netdev_flags nd = 0; + if (iff & IFF_UP) { + nd |= NETDEV_UP; + } + if (iff & IFF_PROMISC) { + nd |= NETDEV_PROMISC; + } + return nd; +} + +static int +netdev_linux_update_flags(struct netdev *netdev, enum netdev_flags off, + enum netdev_flags on, enum netdev_flags *old_flagsp) +{ + int old_flags, new_flags; + int error; + + error = get_flags(netdev, &old_flags); + if (!error) { + *old_flagsp = iff_to_nd_flags(old_flags); + new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); + if (new_flags != old_flags) { + error = set_flags(netdev, new_flags); + } + } + return error; +} + +static void +poll_notify(struct list *list) +{ + struct netdev_linux_notifier *notifier; + LIST_FOR_EACH (notifier, struct netdev_linux_notifier, node, list) { + struct netdev_notifier *n = ¬ifier->notifier; + n->cb(n); + } +} + +static void +netdev_linux_poll_cb(const struct linux_netdev_change *change, + void *aux UNUSED) +{ + if (change) { + struct list *list = shash_find_data(&netdev_linux_notifiers, + change->ifname); + if (list) { + poll_notify(list); + } + } else { + struct shash_node *node; + SHASH_FOR_EACH (node, &netdev_linux_notifiers) { + poll_notify(node->data); + } + } +} + +static int +netdev_linux_poll_add(struct netdev *netdev, + void (*cb)(struct netdev_notifier *), void *aux, + struct netdev_notifier **notifierp) +{ + const char *netdev_name = netdev_get_name(netdev); + struct netdev_linux_notifier *notifier; + struct list *list; + + if (shash_is_empty(&netdev_linux_notifiers)) { + int error = linux_netdev_notifier_register(&netdev_linux_poll_notifier, + netdev_linux_poll_cb, NULL); + if (error) { + return error; + } + } + + list = shash_find_data(&netdev_linux_notifiers, netdev_name); + if (!list) { + list = xmalloc(sizeof *list); + list_init(list); + shash_add(&netdev_linux_notifiers, netdev_name, list); + } + + notifier = xmalloc(sizeof *notifier); + netdev_notifier_init(¬ifier->notifier, netdev, cb, aux); + list_push_back(list, ¬ifier->node); + *notifierp = ¬ifier->notifier; + return 0; +} + +static void +netdev_linux_poll_remove(struct netdev_notifier *notifier_) +{ + struct netdev_linux_notifier *notifier = + CONTAINER_OF(notifier_, struct netdev_linux_notifier, notifier); + struct list *list; + + /* Remove 'notifier' from its list. */ + list = list_remove(¬ifier->node); + if (list_is_empty(list)) { + /* The list is now empty. Remove it from the hash and free it. */ + const char *netdev_name = netdev_get_name(notifier->notifier.netdev); + shash_delete(&netdev_linux_notifiers, + shash_find(&netdev_linux_notifiers, netdev_name)); + free(list); + } + free(notifier); + + /* If that was the last notifier, unregister. */ + if (shash_is_empty(&netdev_linux_notifiers)) { + linux_netdev_notifier_unregister(&netdev_linux_poll_notifier); + } +} + +const struct netdev_class netdev_linux_class = { + "", /* prefix */ + "linux", /* name */ + + netdev_linux_init, + netdev_linux_run, + netdev_linux_wait, + + netdev_linux_open, + netdev_linux_close, + + netdev_linux_enumerate, + + netdev_linux_recv, + netdev_linux_recv_wait, + netdev_linux_drain, + + netdev_linux_send, + netdev_linux_send_wait, + + netdev_linux_set_etheraddr, + netdev_linux_get_etheraddr, + netdev_linux_get_mtu, + netdev_linux_get_carrier, + netdev_linux_get_stats, + + netdev_linux_get_features, + netdev_linux_set_advertisements, + netdev_linux_get_vlan_vid, + netdev_linux_set_policing, + + netdev_linux_get_in4, + netdev_linux_set_in4, + netdev_linux_get_in6, + netdev_linux_add_router, + netdev_linux_arp_lookup, + + netdev_linux_update_flags, + + netdev_linux_poll_add, + netdev_linux_poll_remove, +}; + +const struct netdev_class netdev_tap_class = { + "tap", /* prefix */ + "tap", /* name */ + + netdev_linux_init, + NULL, /* run */ + NULL, /* wait */ + + netdev_linux_open, + netdev_linux_close, + + netdev_linux_enumerate, + + netdev_linux_recv, + netdev_linux_recv_wait, + netdev_linux_drain, + + netdev_linux_send, + netdev_linux_send_wait, + + netdev_linux_set_etheraddr, + netdev_linux_get_etheraddr, + netdev_linux_get_mtu, + netdev_linux_get_carrier, + netdev_linux_get_stats, + + netdev_linux_get_features, + netdev_linux_set_advertisements, + netdev_linux_get_vlan_vid, + netdev_linux_set_policing, + + netdev_linux_get_in4, + netdev_linux_set_in4, + netdev_linux_get_in6, + netdev_linux_add_router, + netdev_linux_arp_lookup, + + netdev_linux_update_flags, + + netdev_linux_poll_add, + netdev_linux_poll_remove, +}; + +static int +get_stats_via_netlink(int ifindex, struct netdev_stats *stats) +{ + static struct nl_sock *rtnl_sock; + struct ofpbuf request; + struct ofpbuf *reply; + struct ifinfomsg *ifi; + const struct rtnl_link_stats *rtnl_stats; + struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; + int error; + + if (!rtnl_sock) { + error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock); + if (error) { + VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s", + strerror(error)); + return error; + } + } + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *ifi, + RTM_GETLINK, NLM_F_REQUEST); + ifi = ofpbuf_put_zeros(&request, sizeof *ifi); + ifi->ifi_family = PF_UNSPEC; + ifi->ifi_index = ifindex; + error = nl_sock_transact(rtnl_sock, &request, &reply); + ofpbuf_uninit(&request); + if (error) { + return error; + } + + if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg), + rtnlgrp_link_policy, + attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { + ofpbuf_delete(reply); + return EPROTO; + } + + if (!attrs[IFLA_STATS]) { + VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats"); + return EPROTO; + } + + rtnl_stats = nl_attr_get(attrs[IFLA_STATS]); + stats->rx_packets = rtnl_stats->rx_packets; + stats->tx_packets = rtnl_stats->tx_packets; + stats->rx_bytes = rtnl_stats->rx_bytes; + stats->tx_bytes = rtnl_stats->tx_bytes; + stats->rx_errors = rtnl_stats->rx_errors; + stats->tx_errors = rtnl_stats->tx_errors; + stats->rx_dropped = rtnl_stats->rx_dropped; + stats->tx_dropped = rtnl_stats->tx_dropped; + stats->multicast = rtnl_stats->multicast; + stats->collisions = rtnl_stats->collisions; + stats->rx_length_errors = rtnl_stats->rx_length_errors; + stats->rx_over_errors = rtnl_stats->rx_over_errors; + stats->rx_crc_errors = rtnl_stats->rx_crc_errors; + stats->rx_frame_errors = rtnl_stats->rx_frame_errors; + stats->rx_fifo_errors = rtnl_stats->rx_fifo_errors; + stats->rx_missed_errors = rtnl_stats->rx_missed_errors; + stats->tx_aborted_errors = rtnl_stats->tx_aborted_errors; + stats->tx_carrier_errors = rtnl_stats->tx_carrier_errors; + stats->tx_fifo_errors = rtnl_stats->tx_fifo_errors; + stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors; + stats->tx_window_errors = rtnl_stats->tx_window_errors; + + return 0; +} + +static int +get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats) +{ + static const char fn[] = "/proc/net/dev"; + char line[1024]; + FILE *stream; + int ln; + + stream = fopen(fn, "r"); + if (!stream) { + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); + return errno; + } + + ln = 0; + while (fgets(line, sizeof line, stream)) { + if (++ln >= 3) { + char devname[16]; +#define X64 "%"SCNu64 + if (sscanf(line, + " %15[^:]:" + X64 X64 X64 X64 X64 X64 X64 "%*u" + X64 X64 X64 X64 X64 X64 X64 "%*u", + devname, + &stats->rx_bytes, + &stats->rx_packets, + &stats->rx_errors, + &stats->rx_dropped, + &stats->rx_fifo_errors, + &stats->rx_frame_errors, + &stats->multicast, + &stats->tx_bytes, + &stats->tx_packets, + &stats->tx_errors, + &stats->tx_dropped, + &stats->tx_fifo_errors, + &stats->collisions, + &stats->tx_carrier_errors) != 15) { + VLOG_WARN_RL(&rl, "%s:%d: parse error", fn, ln); + } else if (!strcmp(devname, netdev_name)) { + stats->rx_length_errors = UINT64_MAX; + stats->rx_over_errors = UINT64_MAX; + stats->rx_crc_errors = UINT64_MAX; + stats->rx_missed_errors = UINT64_MAX; + stats->tx_aborted_errors = UINT64_MAX; + stats->tx_heartbeat_errors = UINT64_MAX; + stats->tx_window_errors = UINT64_MAX; + fclose(stream); + return 0; + } + } + } + VLOG_WARN_RL(&rl, "%s: no stats for %s", fn, netdev_name); + fclose(stream); + return ENODEV; +} + +static int +get_flags(const struct netdev *netdev, int *flags) +{ + struct ifreq ifr; + int error; + + error = netdev_linux_do_ioctl(netdev, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS"); + *flags = ifr.ifr_flags; + return error; +} + +static int +set_flags(struct netdev *netdev, int flags) +{ + struct ifreq ifr; + + ifr.ifr_flags = flags; + return netdev_linux_do_ioctl(netdev, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS"); +} + +static int +do_get_ifindex(const char *netdev_name) +{ + struct ifreq ifr; + + strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); + COVERAGE_INC(netdev_get_ifindex); + if (ioctl(af_inet_sock, SIOCGIFINDEX, &ifr) < 0) { + VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s", + netdev_name, strerror(errno)); + return -errno; + } + return ifr.ifr_ifindex; +} + +static int +get_ifindex(const struct netdev *netdev_, int *ifindexp) +{ + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + *ifindexp = 0; + if (!(netdev->cache->valid & VALID_IFINDEX)) { + int ifindex = do_get_ifindex(netdev_get_name(netdev_)); + if (ifindex < 0) { + return -ifindex; + } + netdev->cache->valid |= VALID_IFINDEX; + netdev->cache->ifindex = ifindex; + } + *ifindexp = netdev->cache->ifindex; + return 0; +} + +static int +get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]) +{ + struct ifreq ifr; + int hwaddr_family; + + memset(&ifr, 0, sizeof ifr); + strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); + COVERAGE_INC(netdev_get_hwaddr); + if (ioctl(af_inet_sock, SIOCGIFHWADDR, &ifr) < 0) { + VLOG_ERR("ioctl(SIOCGIFHWADDR) on %s device failed: %s", + netdev_name, strerror(errno)); + return errno; + } + hwaddr_family = ifr.ifr_hwaddr.sa_family; + if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) { + VLOG_WARN("%s device has unknown hardware address family %d", + netdev_name, hwaddr_family); + } + memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN); + return 0; +} + +static int +set_etheraddr(const char *netdev_name, int hwaddr_family, + const uint8_t mac[ETH_ADDR_LEN]) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof ifr); + strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); + ifr.ifr_hwaddr.sa_family = hwaddr_family; + memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ADDR_LEN); + COVERAGE_INC(netdev_set_hwaddr); + if (ioctl(af_inet_sock, SIOCSIFHWADDR, &ifr) < 0) { + VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s", + netdev_name, strerror(errno)); + return errno; + } + return 0; +} + +static int +netdev_linux_do_ethtool(struct netdev *netdev, struct ethtool_cmd *ecmd, + int cmd, const char *cmd_name) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof ifr); + strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); + ifr.ifr_data = (caddr_t) ecmd; + + ecmd->cmd = cmd; + COVERAGE_INC(netdev_ethtool); + if (ioctl(af_inet_sock, SIOCETHTOOL, &ifr) == 0) { + return 0; + } else { + if (errno != EOPNOTSUPP) { + VLOG_WARN_RL(&rl, "ethtool command %s on network device %s " + "failed: %s", cmd_name, netdev->name, + strerror(errno)); + } else { + /* The device doesn't support this operation. That's pretty + * common, so there's no point in logging anything. */ + } + return errno; + } +} + +static int +netdev_linux_do_ioctl(const struct netdev *netdev, struct ifreq *ifr, + int cmd, const char *cmd_name) +{ + strncpy(ifr->ifr_name, netdev_get_name(netdev), sizeof ifr->ifr_name); + if (ioctl(af_inet_sock, cmd, ifr) == -1) { + VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", + netdev_get_name(netdev), cmd_name, strerror(errno)); + return errno; + } + return 0; +} + /* rtnetlink socket. */ -static struct nl_sock *rtnl_sock; +static struct nl_sock *notify_sock; /* All registered notifiers. */ static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers); @@ -39,15 +1603,15 @@ static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers); static void linux_netdev_report_change(const struct nlmsghdr *, const struct ifinfomsg *, struct nlattr *attrs[]); -static void linux_netdev_report_notify_error(int error); +static void linux_netdev_report_notify_error(void); int linux_netdev_notifier_register(struct linux_netdev_notifier *notifier, linux_netdev_notify_func *cb, void *aux) { - if (!rtnl_sock) { + if (!notify_sock) { int error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, - &rtnl_sock); + ¬ify_sock); if (error) { VLOG_WARN("could not create rtnetlink socket: %s", strerror(error)); @@ -60,7 +1624,6 @@ linux_netdev_notifier_register(struct linux_netdev_notifier *notifier, } list_push_back(&all_notifiers, ¬ifier->node); - notifier->error = 0; notifier->cb = cb; notifier->aux = aux; return 0; @@ -71,36 +1634,17 @@ linux_netdev_notifier_unregister(struct linux_netdev_notifier *notifier) { list_remove(¬ifier->node); if (list_is_empty(&all_notifiers)) { - nl_sock_destroy(rtnl_sock); - rtnl_sock = NULL; + nl_sock_destroy(notify_sock); + notify_sock = NULL; } } -int -linux_netdev_notifier_get_error(struct linux_netdev_notifier *notifier) -{ - int error = notifier->error; - notifier->error = 0; - return error; -} - -int -linux_netdev_notifier_peek_error(const struct linux_netdev_notifier *notifier) -{ - return notifier->error; -} - -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING }, - [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, -}; - void linux_netdev_notifier_run(void) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - if (!rtnl_sock) { + if (!notify_sock) { return; } @@ -109,7 +1653,7 @@ linux_netdev_notifier_run(void) struct ofpbuf *buf; int error; - error = nl_sock_recv(rtnl_sock, &buf, false); + error = nl_sock_recv(notify_sock, &buf, false); if (!error) { if (nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), rtnlgrp_link_policy, @@ -120,7 +1664,7 @@ linux_netdev_notifier_run(void) linux_netdev_report_change(buf->data, ifinfo, attrs); } else { VLOG_WARN_RL(&rl, "received bad rtnl message"); - linux_netdev_report_notify_error(ENOBUFS); + linux_netdev_report_notify_error(); } ofpbuf_delete(buf); } else if (error == EAGAIN) { @@ -132,7 +1676,7 @@ linux_netdev_notifier_run(void) VLOG_WARN_RL(&rl, "error reading rtnetlink socket: %s", strerror(error)); } - linux_netdev_report_notify_error(error); + linux_netdev_report_notify_error(); } } } @@ -140,8 +1684,8 @@ linux_netdev_notifier_run(void) void linux_netdev_notifier_wait(void) { - if (rtnl_sock) { - nl_sock_wait(rtnl_sock, POLLIN); + if (notify_sock) { + nl_sock_wait(notify_sock, POLLIN); } } @@ -163,21 +1707,17 @@ linux_netdev_report_change(const struct nlmsghdr *nlmsg, LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node, &all_notifiers) { - if (!notifier->error) { - notifier->cb(&change, notifier->aux); - } + notifier->cb(&change, notifier->aux); } } static void -linux_netdev_report_notify_error(int error) +linux_netdev_report_notify_error(void) { struct linux_netdev_notifier *notifier; LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node, &all_notifiers) { - if (error != ENOBUFS || !notifier->error) { - notifier->error = error; - } + notifier->cb(NULL, notifier->aux); } } diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h index 93ddfcb6..e8e204b6 100644 --- a/lib/netdev-linux.h +++ b/lib/netdev-linux.h @@ -39,7 +39,6 @@ typedef void linux_netdev_notify_func(const struct linux_netdev_change *, struct linux_netdev_notifier { struct list node; - int error; linux_netdev_notify_func *cb; void *aux; }; @@ -47,8 +46,6 @@ struct linux_netdev_notifier { int linux_netdev_notifier_register(struct linux_netdev_notifier *, linux_netdev_notify_func *, void *aux); void linux_netdev_notifier_unregister(struct linux_netdev_notifier *); -int linux_netdev_notifier_get_error(struct linux_netdev_notifier *); -int linux_netdev_notifier_peek_error(const struct linux_netdev_notifier *); void linux_netdev_notifier_run(void); void linux_netdev_notifier_wait(void); diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h new file mode 100644 index 00000000..752fd82a --- /dev/null +++ b/lib/netdev-provider.h @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NETDEV_PROVIDER_H +#define NETDEV_PROVIDER_H 1 + +/* Generic interface to network devices. */ + +#include +#include "netdev.h" +#include "list.h" + +/* A network device (e.g. an Ethernet device). + * + * This structure should be treated as opaque by network device + * implementations. */ +struct netdev { + const struct netdev_class *class; + char *name; /* e.g. "eth0" */ + enum netdev_flags save_flags; /* Initial device flags. */ + enum netdev_flags changed_flags; /* Flags that we changed. */ + struct list node; /* Element in global list. */ +}; + +void netdev_init(struct netdev *, const char *name, + const struct netdev_class *); +static inline void netdev_assert_class(const struct netdev *netdev, + const struct netdev_class *class) +{ + assert(netdev->class == class); +} + +/* A network device notifier. + * + * Network device implementations should use netdev_notifier_init() to + * initialize this structure, but they may freely read its members after + * initialization. */ +struct netdev_notifier { + struct netdev *netdev; + void (*cb)(struct netdev_notifier *); + void *aux; +}; +void netdev_notifier_init(struct netdev_notifier *, struct netdev *, + void (*cb)(struct netdev_notifier *), void *aux); + +/* Network device class structure, to be defined by each implementation of a + * network device. + * + * These functions return 0 if successful or a positive errno value on failure, + * except where otherwise noted. */ +struct netdev_class { + /* Prefix for names of netdevs in this class, e.g. "ndunix:". + * + * One netdev class may have the empty string "" as its prefix, in which + * case that netdev class is associated with netdev names that do not + * contain a colon. */ + const char *prefix; + + /* Class name, for use in error messages. */ + const char *name; + + /* Called only once, at program startup. Returning an error from this + * function will prevent any network device, of any class, from being + * opened. + * + * This function may be set to null if a network device class needs no + * initialization at program startup. */ + int (*init)(void); + + /* Performs periodic work needed by netdevs of this class. May be null if + * no periodic work is necessary. */ + void (*run)(void); + + /* Arranges for poll_block() to wake up if the "run" member function needs + * to be called. May be null if nothing is needed here. */ + void (*wait)(void); + + /* Attempts to open a network device. On success, sets '*netdevp' to the + * new network device. 'name' is the full network device name provided by + * the user. This name is useful for error messages but must not be + * modified. + * + * 'suffix' is a copy of 'name' following the netdev's 'prefix'. + * + * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order + * to capture frames of that type received on the device. It may also be + * one of the 'enum netdev_pseudo_ethertype' values to receive frames in + * one of those categories. */ + int (*open)(const char *name, char *suffix, int ethertype, + struct netdev **netdevp); + + /* Closes 'netdev'. */ + void (*close)(struct netdev *netdev); + + /* Enumerates the names of all network devices of this class. + * + * The caller has already initialized 'all_names' and might already have + * added some names to it. This function should not disturb any existing + * names in 'all_names'. + * + * If this netdev class does not support enumeration, this may be a null + * pointer. */ + int (*enumerate)(struct svec *all_anmes); + + /* Attempts to receive a packet from 'netdev' into the 'size' bytes in + * 'buffer'. If successful, returns the number of bytes in the received + * packet, otherwise a negative errno value. Returns -EAGAIN immediately + * if no packet is ready to be received. */ + int (*recv)(struct netdev *netdev, void *buffer, size_t size); + + /* Registers with the poll loop to wake up from the next call to + * poll_block() when a packet is ready to be received with netdev_recv() on + * 'netdev'. */ + void (*recv_wait)(struct netdev *netdev); + + /* Discards all packets waiting to be received from 'netdev'. */ + int (*drain)(struct netdev *netdev); + + /* Sends the 'size'-byte packet in 'buffer' on 'netdev'. Returns 0 if + * successful, otherwise a positive errno value. Returns EAGAIN without + * blocking if the packet cannot be queued immediately. Returns EMSGSIZE + * if a partial packet was transmitted or if the packet is too big or too + * small to transmit on the device. + * + * The caller retains ownership of 'buffer' in all cases. + * + * The network device is expected to maintain a packet transmission queue, + * so that the caller does not ordinarily have to do additional queuing of + * packets. */ + int (*send)(struct netdev *netdev, const void *buffer, size_t size); + + /* Registers with the poll loop to wake up from the next call to + * poll_block() when the packet transmission queue for 'netdev' has + * sufficient room to transmit a packet with netdev_send(). + * + * The network device is expected to maintain a packet transmission queue, + * so that the caller does not ordinarily have to do additional queuing of + * packets. Thus, this function is unlikely to ever be useful. */ + void (*send_wait)(struct netdev *netdev); + + /* Sets 'netdev''s Ethernet address to 'mac' */ + int (*set_etheraddr)(struct netdev *netdev, const uint8_t mac[6]); + + /* Retrieves 'netdev''s Ethernet address into 'mac'. */ + int (*get_etheraddr)(const struct netdev *netdev, uint8_t mac[6]); + + /* Retrieves 'netdev''s MTU into '*mtup'. + * + * The MTU is the maximum size of transmitted (and received) packets, in + * bytes, not including the hardware header; thus, this is typically 1500 + * bytes for Ethernet devices.*/ + int (*get_mtu)(const struct netdev *, int *mtup); + + /* Sets 'carrier' to true if carrier is active (link light is on) on + * 'netdev'. */ + int (*get_carrier)(const struct netdev *netdev, bool *carrier); + + /* Retrieves current device stats for 'netdev' into 'stats'. + * + * A network device that supports some statistics but not others, it should + * set the values of the unsupported statistics to all-1-bits + * (UINT64_MAX). */ + int (*get_stats)(const struct netdev *netdev, struct netdev_stats *stats); + + /* Stores the features supported by 'netdev' into each of '*current', + * '*advertised', '*supported', and '*peer'. Each value is a bitmap of + * "enum ofp_port_features" bits, in host byte order. */ + int (*get_features)(struct netdev *netdev, + uint32_t *current, uint32_t *advertised, + uint32_t *supported, uint32_t *peer); + + /* Set the features advertised by 'netdev' to 'advertise', which is a + * bitmap of "enum ofp_port_features" bits, in host byte order. + * + * This function may be set to null for a network device that does not + * support configuring advertisements. */ + int (*set_advertisements)(struct netdev *, uint32_t advertise); + + /* If 'netdev' is a VLAN network device (e.g. one created with vconfig(8)), + * sets '*vlan_vid' to the VLAN VID associated with that device and returns + * 0. + * + * Returns ENOENT if 'netdev_name' is the name of a network device that is + * not a VLAN device. + * + * This function should be set to null if it doesn't make any sense for + * your network device (it probably doesn't). */ + int (*get_vlan_vid)(const struct netdev *netdev, int *vlan_vid); + + /* Attempts to set input rate limiting (policing) policy, such that up to + * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative + * burst size of 'kbits' kb. + * + * This function may be set to null if policing is not supported. */ + int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate, + unsigned int kbits_burst); + + /* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address. + * + * The following error values have well-defined meanings: + * + * - EADDRNOTAVAIL: 'netdev' has no assigned IPv4 address. + * + * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'. + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*get_in4)(const struct netdev *netdev, struct in_addr *in4); + + /* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If + * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*set_in4)(struct netdev *, struct in_addr addr, struct in_addr mask); + + /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address. + * + * The following error values have well-defined meanings: + * + * - EADDRNOTAVAIL: 'netdev' has no assigned IPv6 address. + * + * - EOPNOTSUPP: No IPv6 network stack attached to 'netdev'. + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*get_in6)(const struct netdev *netdev, struct in6_addr *in6); + + /* Adds 'router' as a default IP gateway for the TCP/IP stack that + * corresponds to 'netdev'. + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*add_router)(struct netdev *netdev, struct in_addr router); + + /* Looks up the ARP table entry for 'ip' on 'netdev' and stores the + * corresponding MAC address in 'mac'. A return value of ENXIO, in + * particular, indicates that there is no ARP table entry for 'ip' on + * 'netdev'. + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*arp_lookup)(const struct netdev *, uint32_t ip, uint8_t mac[6]); + + /* Retrieves the current set of flags on 'netdev' into '*old_flags'. Then, + * turns off the flags that are set to 1 in 'off' and turns on the flags + * that are set to 1 in 'on'. (No bit will be set to 1 in both 'off' and + * 'on'; that is, off & on == 0.) + * + * This function may be invoked from a signal handler. Therefore, it + * should not do anything that is not signal-safe (such as logging). */ + int (*update_flags)(struct netdev *netdev, enum netdev_flags off, + enum netdev_flags on, enum netdev_flags *old_flags); + + /* Arranges for 'cb' to be called whenever one of the attributes of + * 'netdev' changes and sets '*notifierp' to a newly created + * netdev_notifier that represents this arrangement. The created notifier + * will have its 'netdev', 'cb', and 'aux' members set to the values of the + * corresponding parameters. */ + int (*poll_add)(struct netdev *netdev, + void (*cb)(struct netdev_notifier *), void *aux, + struct netdev_notifier **notifierp); + + /* Cancels poll notification for 'notifier'. */ + void (*poll_remove)(struct netdev_notifier *notifier); +}; + +extern const struct netdev_class netdev_linux_class; +extern const struct netdev_class netdev_tap_class; + +#endif /* netdev.h */ diff --git a/lib/netdev.c b/lib/netdev.c index 5df43c4e..57529147 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -19,24 +19,7 @@ #include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -46,282 +29,94 @@ #include "dynamic-string.h" #include "fatal-signal.h" #include "list.h" -#include "netdev-linux.h" -#include "netlink.h" +#include "netdev-provider.h" #include "ofpbuf.h" -#include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" -#include "socket-util.h" #include "svec.h" -/* linux/if.h defines IFF_LOWER_UP, net/if.h doesn't. - * net/if.h defines if_nameindex(), linux/if.h doesn't. - * We can't include both headers, so define IFF_LOWER_UP ourselves. */ -#ifndef IFF_LOWER_UP -#define IFF_LOWER_UP 0x10000 -#endif - -/* These were introduced in Linux 2.6.14, so they might be missing if we have - * old headers. */ -#ifndef ADVERTISED_Pause -#define ADVERTISED_Pause (1 << 13) -#endif -#ifndef ADVERTISED_Asym_Pause -#define ADVERTISED_Asym_Pause (1 << 14) -#endif - #define THIS_MODULE VLM_netdev #include "vlog.h" -struct netdev { - struct list node; - char *name; - - /* File descriptors. For ordinary network devices, the two fds below are - * the same; for tap devices, they differ. */ - int netdev_fd; /* Network device. */ - int tap_fd; /* TAP character device, if any, otherwise the - * network device. */ - - /* Cached network device information. */ - int ifindex; /* -1 if not known. */ - uint8_t etheraddr[ETH_ADDR_LEN]; - struct in6_addr in6; - int speed; - int mtu; - int txqlen; - int hwaddr_family; - - int save_flags; /* Initial device flags. */ - int changed_flags; /* Flags that we changed. */ -}; - -/* Policy for RTNLGRP_LINK messages. - * - * There are *many* more fields in these messages, but currently we only care - * about interface names. */ -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, - [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, - .min_len = sizeof(struct rtnl_link_stats) }, +static const struct netdev_class *netdev_classes[] = { + &netdev_linux_class, + &netdev_tap_class, }; +enum { N_NETDEV_CLASSES = ARRAY_SIZE(netdev_classes) }; /* All open network devices. */ static struct list netdev_list = LIST_INITIALIZER(&netdev_list); -/* An AF_INET socket (used for ioctl operations). */ -static int af_inet_sock = -1; - -/* NETLINK_ROUTE socket. */ -static struct nl_sock *rtnl_sock; - -/* Can we use RTM_GETLINK to get network device statistics? (In pre-2.6.19 - * kernels, this was only available if wireless extensions were enabled.) */ -static bool use_netlink_stats; - /* This is set pretty low because we probably won't learn anything from the * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); -static void init_netdev(void); -static int do_open_netdev(const char *name, int ethertype, int tap_fd, - struct netdev **netdev_); +static void restore_all_flags(void *aux); static int restore_flags(struct netdev *netdev); -static int get_flags(const char *netdev_name, int *flagsp); -static int set_flags(const char *netdev_name, int flags); -static int do_get_ifindex(const char *netdev_name); -static int get_ifindex(const struct netdev *, int *ifindexp); -static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN], - int *hwaddr_familyp); -static int set_etheraddr(const char *netdev_name, int hwaddr_family, - const uint8_t[ETH_ADDR_LEN]); - -/* Obtains the IPv6 address for 'name' into 'in6'. */ -static void -get_ipv6_address(const char *name, struct in6_addr *in6) + +/* Attempts to initialize the netdev module. Returns 0 if successful, + * otherwise a positive errno value. + * + * Calling this function is optional. If not called explicitly, it will + * automatically be called upon the first attempt to open a network device. */ +int +netdev_initialize(void) { - FILE *file; - char line[128]; - - file = fopen("/proc/net/if_inet6", "r"); - if (file == NULL) { - /* This most likely indicates that the host doesn't have IPv6 support, - * so it's not really a failure condition.*/ - *in6 = in6addr_any; - return; - } + static int status = -1; + if (status < 0) { + int i; + + fatal_signal_add_hook(restore_all_flags, NULL, true); - while (fgets(line, sizeof line, file)) { - uint8_t *s6 = in6->s6_addr; - char ifname[16 + 1]; - -#define X8 "%2"SCNx8 - if (sscanf(line, " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 - "%*x %*x %*x %*x %16s\n", - &s6[0], &s6[1], &s6[2], &s6[3], - &s6[4], &s6[5], &s6[6], &s6[7], - &s6[8], &s6[9], &s6[10], &s6[11], - &s6[12], &s6[13], &s6[14], &s6[15], - ifname) == 17 - && !strcmp(name, ifname)) - { - fclose(file); - return; + status = 0; + for (i = 0; i < N_NETDEV_CLASSES; i++) { + const struct netdev_class *class = netdev_classes[i]; + if (class->init) { + int retval = class->init(); + if (retval) { + VLOG_ERR("failed to initialize %s network device " + "class: %s", class->name, strerror(retval)); + if (!status) { + status = retval; + } + } + } } } - *in6 = in6addr_any; - - fclose(file); + return status; } -static int -do_ethtool(struct netdev *netdev, struct ethtool_cmd *ecmd, - int cmd, const char *cmd_name) +/* Performs periodic work needed by all the various kinds of netdevs. + * + * If your program opens any netdevs, it must call this function within its + * main poll loop. */ +void +netdev_run(void) { - struct ifreq ifr; - - memset(&ifr, 0, sizeof ifr); - strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); - ifr.ifr_data = (caddr_t) ecmd; - - ecmd->cmd = cmd; - COVERAGE_INC(netdev_ethtool); - if (ioctl(netdev->netdev_fd, SIOCETHTOOL, &ifr) == 0) { - return 0; - } else { - if (errno != EOPNOTSUPP) { - VLOG_WARN_RL(&rl, "ethtool command %s on network device %s " - "failed: %s", cmd_name, netdev->name, - strerror(errno)); - } else { - /* The device doesn't support this operation. That's pretty - * common, so there's no point in logging anything. */ + int i; + for (i = 0; i < N_NETDEV_CLASSES; i++) { + const struct netdev_class *class = netdev_classes[i]; + if (class->run) { + class->run(); } - return errno; } } -static int -do_get_features(struct netdev *netdev, - uint32_t *current, uint32_t *advertised, - uint32_t *supported, uint32_t *peer) +/* Arranges for poll_block() to wake up when netdev_run() needs to be called. + * + * If your program opens any netdevs, it must call this function within its + * main poll loop. */ +void +netdev_wait(void) { - struct ethtool_cmd ecmd; - int error; - - *current = 0; - *supported = 0; - *advertised = 0; - *peer = 0; - - memset(&ecmd, 0, sizeof ecmd); - error = do_ethtool(netdev, &ecmd, ETHTOOL_GSET, "ETHTOOL_GSET"); - if (error) { - return error; - } - - if (ecmd.supported & SUPPORTED_10baseT_Half) { - *supported |= OFPPF_10MB_HD; - } - if (ecmd.supported & SUPPORTED_10baseT_Full) { - *supported |= OFPPF_10MB_FD; - } - if (ecmd.supported & SUPPORTED_100baseT_Half) { - *supported |= OFPPF_100MB_HD; - } - if (ecmd.supported & SUPPORTED_100baseT_Full) { - *supported |= OFPPF_100MB_FD; - } - if (ecmd.supported & SUPPORTED_1000baseT_Half) { - *supported |= OFPPF_1GB_HD; - } - if (ecmd.supported & SUPPORTED_1000baseT_Full) { - *supported |= OFPPF_1GB_FD; - } - if (ecmd.supported & SUPPORTED_10000baseT_Full) { - *supported |= OFPPF_10GB_FD; - } - if (ecmd.supported & SUPPORTED_TP) { - *supported |= OFPPF_COPPER; - } - if (ecmd.supported & SUPPORTED_FIBRE) { - *supported |= OFPPF_FIBER; - } - if (ecmd.supported & SUPPORTED_Autoneg) { - *supported |= OFPPF_AUTONEG; - } - if (ecmd.supported & SUPPORTED_Pause) { - *supported |= OFPPF_PAUSE; - } - if (ecmd.supported & SUPPORTED_Asym_Pause) { - *supported |= OFPPF_PAUSE_ASYM; - } - - /* Set the advertised features */ - if (ecmd.advertising & ADVERTISED_10baseT_Half) { - *advertised |= OFPPF_10MB_HD; - } - if (ecmd.advertising & ADVERTISED_10baseT_Full) { - *advertised |= OFPPF_10MB_FD; - } - if (ecmd.advertising & ADVERTISED_100baseT_Half) { - *advertised |= OFPPF_100MB_HD; - } - if (ecmd.advertising & ADVERTISED_100baseT_Full) { - *advertised |= OFPPF_100MB_FD; - } - if (ecmd.advertising & ADVERTISED_1000baseT_Half) { - *advertised |= OFPPF_1GB_HD; - } - if (ecmd.advertising & ADVERTISED_1000baseT_Full) { - *advertised |= OFPPF_1GB_FD; - } - if (ecmd.advertising & ADVERTISED_10000baseT_Full) { - *advertised |= OFPPF_10GB_FD; - } - if (ecmd.advertising & ADVERTISED_TP) { - *advertised |= OFPPF_COPPER; - } - if (ecmd.advertising & ADVERTISED_FIBRE) { - *advertised |= OFPPF_FIBER; - } - if (ecmd.advertising & ADVERTISED_Autoneg) { - *advertised |= OFPPF_AUTONEG; - } - if (ecmd.advertising & ADVERTISED_Pause) { - *advertised |= OFPPF_PAUSE; - } - if (ecmd.advertising & ADVERTISED_Asym_Pause) { - *advertised |= OFPPF_PAUSE_ASYM; - } - - /* Set the current features */ - if (ecmd.speed == SPEED_10) { - *current = (ecmd.duplex) ? OFPPF_10MB_FD : OFPPF_10MB_HD; - } - else if (ecmd.speed == SPEED_100) { - *current = (ecmd.duplex) ? OFPPF_100MB_FD : OFPPF_100MB_HD; - } - else if (ecmd.speed == SPEED_1000) { - *current = (ecmd.duplex) ? OFPPF_1GB_FD : OFPPF_1GB_HD; - } - else if (ecmd.speed == SPEED_10000) { - *current = OFPPF_10GB_FD; - } - - if (ecmd.port == PORT_TP) { - *current |= OFPPF_COPPER; - } - else if (ecmd.port == PORT_FIBRE) { - *current |= OFPPF_FIBER; - } - - if (ecmd.autoneg) { - *current |= OFPPF_AUTONEG; + int i; + for (i = 0; i < N_NETDEV_CLASSES; i++) { + const struct netdev_class *class = netdev_classes[i]; + if (class->wait) { + class->wait(); + } } - return 0; } /* Opens the network device named 'name' (e.g. "eth0") and returns zero if @@ -333,171 +128,40 @@ do_get_features(struct netdev *netdev, * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those * categories. */ int -netdev_open(const char *name, int ethertype, struct netdev **netdevp) +netdev_open(const char *name_, int ethertype, struct netdev **netdevp) { - if (strncmp(name, "tap:", 4)) { - return do_open_netdev(name, ethertype, -1, netdevp); - } else { - static const char tap_dev[] = "/dev/net/tun"; - struct ifreq ifr; - int error; - int tap_fd; - - tap_fd = open(tap_dev, O_RDWR); - if (tap_fd < 0) { - ovs_error(errno, "opening \"%s\" failed", tap_dev); - return errno; - } - - memset(&ifr, 0, sizeof ifr); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - if (name) { - strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); - } - if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) { - int error = errno; - ovs_error(error, "ioctl(TUNSETIFF) on \"%s\" failed", tap_dev); - close(tap_fd); - return error; - } - - error = set_nonblocking(tap_fd); - if (error) { - ovs_error(error, "set_nonblocking on \"%s\" failed", tap_dev); - close(tap_fd); - return error; - } - - error = do_open_netdev(ifr.ifr_name, NETDEV_ETH_TYPE_NONE, tap_fd, - netdevp); - if (error) { - close(tap_fd); - } - return error; - } -} - - -static int -do_open_netdev(const char *name, int ethertype, int tap_fd, - struct netdev **netdev_) -{ - int netdev_fd; - struct sockaddr_ll sll; - struct ifreq ifr; - int ifindex = -1; - uint8_t etheraddr[ETH_ADDR_LEN]; - struct in6_addr in6; - int mtu; - int txqlen; - int hwaddr_family; + char *name = xstrdup(name_); + char *prefix, *suffix, *colon; + struct netdev *netdev = NULL; int error; - struct netdev *netdev; - - init_netdev(); - *netdev_ = NULL; - COVERAGE_INC(netdev_open); - - /* Create raw socket. */ - netdev_fd = socket(PF_PACKET, SOCK_RAW, - htons(ethertype == NETDEV_ETH_TYPE_NONE ? 0 - : ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL - : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2 - : ethertype)); - if (netdev_fd < 0) { - return errno; - } - - if (ethertype != NETDEV_ETH_TYPE_NONE) { - /* Set non-blocking mode. */ - error = set_nonblocking(netdev_fd); - if (error) { - goto error_already_set; - } - - /* Get ethernet device index. */ - ifindex = do_get_ifindex(name); - if (ifindex < 0) { - return -ifindex; - } - - /* Bind to specific ethernet device. */ - memset(&sll, 0, sizeof sll); - sll.sll_family = AF_PACKET; - sll.sll_ifindex = ifindex; - if (bind(netdev_fd, (struct sockaddr *) &sll, sizeof sll) < 0) { - VLOG_ERR("bind to %s failed: %s", name, strerror(errno)); - goto error; - } - - /* Between the socket() and bind() calls above, the socket receives all - * packets of the requested type on all system interfaces. We do not - * want to receive that data, but there is no way to avoid it. So we - * must now drain out the receive queue. */ - error = drain_rcvbuf(netdev_fd); - if (error) { - goto error_already_set; - } - } + int i; - /* Get MAC address. */ - error = get_etheraddr(name, etheraddr, &hwaddr_family); + error = netdev_initialize(); if (error) { - goto error_already_set; - } - - /* Get MTU. */ - strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); - if (ioctl(netdev_fd, SIOCGIFMTU, &ifr) < 0) { - VLOG_ERR("ioctl(SIOCGIFMTU) on %s device failed: %s", - name, strerror(errno)); - goto error; + return error; } - mtu = ifr.ifr_mtu; - /* Get TX queue length. */ - if (ioctl(netdev_fd, SIOCGIFTXQLEN, &ifr) < 0) { - VLOG_ERR("ioctl(SIOCGIFTXQLEN) on %s device failed: %s", - name, strerror(errno)); - goto error; + colon = strchr(name, ':'); + if (colon) { + *colon = '\0'; + prefix = name; + suffix = colon + 1; + } else { + prefix = ""; + suffix = name; } - txqlen = ifr.ifr_qlen; - get_ipv6_address(name, &in6); - - /* Allocate network device. */ - netdev = xmalloc(sizeof *netdev); - netdev->name = xstrdup(name); - netdev->ifindex = ifindex; - netdev->txqlen = txqlen; - netdev->hwaddr_family = hwaddr_family; - netdev->netdev_fd = netdev_fd; - netdev->tap_fd = tap_fd < 0 ? netdev_fd : tap_fd; - memcpy(netdev->etheraddr, etheraddr, sizeof etheraddr); - netdev->mtu = mtu; - netdev->in6 = in6; - - /* Save flags to restore at close or exit. */ - error = get_flags(netdev->name, &netdev->save_flags); - if (error) { - goto error_already_set; + for (i = 0; i < N_NETDEV_CLASSES; i++) { + const struct netdev_class *class = netdev_classes[i]; + if (!strcmp(prefix, class->prefix)) { + error = class->open(name_, suffix, ethertype, &netdev); + goto exit; + } } - netdev->changed_flags = 0; - fatal_signal_block(); - list_push_back(&netdev_list, &netdev->node); - fatal_signal_unblock(); + error = EAFNOSUPPORT; - /* Success! */ - *netdev_ = netdev; - return 0; - -error: - error = errno; -error_already_set: - close(netdev_fd); - if (tap_fd >= 0) { - close(tap_fd); - } +exit: + *netdevp = error ? NULL : netdev; return error; } @@ -506,9 +170,10 @@ void netdev_close(struct netdev *netdev) { if (netdev) { - /* Bring down interface and drop promiscuous mode, if we brought up - * the interface or enabled promiscuous mode. */ + char *name; int error; + + /* Restore flags that we changed, if any. */ fatal_signal_block(); error = restore_flags(netdev); list_remove(&netdev->node); @@ -519,38 +184,62 @@ netdev_close(struct netdev *netdev) } /* Free. */ - free(netdev->name); - close(netdev->netdev_fd); - if (netdev->netdev_fd != netdev->tap_fd) { - close(netdev->tap_fd); - } - free(netdev); + name = netdev->name; + netdev->class->close(netdev); + free(name); } } -/* Checks whether a network device named 'name' exists and returns true if so, - * false otherwise. */ +/* Returns true if a network device named 'name' exists and may be opened, + * otherwise false. */ bool netdev_exists(const char *name) { - struct stat s; - char *filename; + struct netdev *netdev; int error; - filename = xasprintf("/sys/class/net/%s", name); - error = stat(filename, &s); - free(filename); - return !error; + error = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev); + if (!error) { + netdev_close(netdev); + return true; + } else { + if (error != ENODEV) { + VLOG_WARN("failed to open network device %s: %s", + name, strerror(error)); + } + return false; + } } -/* Pads 'buffer' out with zero-bytes to the minimum valid length of an - * Ethernet packet, if necessary. */ -static void -pad_to_minimum_length(struct ofpbuf *buffer) +/* Initializes 'svec' with a list of the names of all known network devices. */ +int +netdev_enumerate(struct svec *svec) { - if (buffer->size < ETH_TOTAL_MIN) { - ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size); + int error; + int i; + + svec_init(svec); + + error = netdev_initialize(); + if (error) { + return error; } + + error = 0; + for (i = 0; i < N_NETDEV_CLASSES; i++) { + const struct netdev_class *class = netdev_classes[i]; + if (class->enumerate) { + int retval = class->enumerate(svec); + if (retval) { + VLOG_WARN("failed to enumerate %s network devices: %s", + class->name, strerror(retval)); + if (!error) { + error = retval; + } + } + } + } + return error; } /* Attempts to receive a packet from 'netdev' into 'buffer', which the caller @@ -568,31 +257,22 @@ pad_to_minimum_length(struct ofpbuf *buffer) int netdev_recv(struct netdev *netdev, struct ofpbuf *buffer) { - ssize_t n_bytes; + int retval; assert(buffer->size == 0); assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN); - do { - n_bytes = read(netdev->tap_fd, - ofpbuf_tail(buffer), ofpbuf_tailroom(buffer)); - } while (n_bytes < 0 && errno == EINTR); - if (n_bytes < 0) { - if (errno != EAGAIN) { - VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", - strerror(errno), netdev->name); - } - return errno; - } else { + + retval = netdev->class->recv(netdev, + buffer->data, ofpbuf_tailroom(buffer)); + if (retval >= 0) { COVERAGE_INC(netdev_received); - buffer->size += n_bytes; - - /* When the kernel internally sends out an Ethernet frame on an - * interface, it gives us a copy *before* padding the frame to the - * minimum length. Thus, when it sends out something like an ARP - * request, we see a too-short frame. So pad it out to the minimum - * length. */ - pad_to_minimum_length(buffer); + buffer->size += retval; + if (buffer->size < ETH_TOTAL_MIN) { + ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size); + } return 0; + } else { + return -retval; } } @@ -601,19 +281,14 @@ netdev_recv(struct netdev *netdev, struct ofpbuf *buffer) void netdev_recv_wait(struct netdev *netdev) { - poll_fd_wait(netdev->tap_fd, POLLIN); + netdev->class->recv_wait(netdev); } /* Discards all packets waiting to be received from 'netdev'. */ int netdev_drain(struct netdev *netdev) { - if (netdev->tap_fd != netdev->netdev_fd) { - drain_fd(netdev->tap_fd, netdev->txqlen); - return 0; - } else { - return drain_rcvbuf(netdev->netdev_fd); - } + return netdev->class->drain(netdev); } /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive @@ -628,32 +303,11 @@ netdev_drain(struct netdev *netdev) int netdev_send(struct netdev *netdev, const struct ofpbuf *buffer) { - ssize_t n_bytes; - - do { - n_bytes = write(netdev->tap_fd, buffer->data, buffer->size); - } while (n_bytes < 0 && errno == EINTR); - - if (n_bytes < 0) { - /* The Linux AF_PACKET implementation never blocks waiting for room - * for packets, instead returning ENOBUFS. Translate this into EAGAIN - * for the caller. */ - if (errno == ENOBUFS) { - return EAGAIN; - } else if (errno != EAGAIN) { - VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", - netdev->name, strerror(errno)); - } - return errno; - } else if (n_bytes != buffer->size) { - VLOG_WARN_RL(&rl, - "send partial Ethernet packet (%d bytes of %zu) on %s", - (int) n_bytes, buffer->size, netdev->name); - return EMSGSIZE; - } else { + int error = netdev->class->send(netdev, buffer->data, buffer->size); + if (!error) { COVERAGE_INC(netdev_sent); - return 0; } + return error; } /* Registers with the poll loop to wake up from the next call to poll_block() @@ -666,12 +320,7 @@ netdev_send(struct netdev *netdev, const struct ofpbuf *buffer) void netdev_send_wait(struct netdev *netdev) { - if (netdev->tap_fd == netdev->netdev_fd) { - poll_fd_wait(netdev->tap_fd, POLLOUT); - } else { - /* TAP device always accepts packets.*/ - poll_immediate_wake(); - } + return netdev->class->send_wait(netdev); } /* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, @@ -679,11 +328,7 @@ netdev_send_wait(struct netdev *netdev) int netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN]) { - int error = set_etheraddr(netdev->name, netdev->hwaddr_family, mac); - if (!error) { - memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN); - } - return error; + return netdev->class->set_etheraddr(netdev, mac); } /* Retrieves 'netdev''s MAC address. If successful, returns 0 and copies the @@ -692,8 +337,7 @@ netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN]) int netdev_get_etheraddr(const struct netdev *netdev, uint8_t mac[ETH_ADDR_LEN]) { - memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN); - return 0; + return netdev->class->get_etheraddr(netdev, mac); } /* Returns the name of the network device that 'netdev' represents, @@ -714,8 +358,13 @@ netdev_get_name(const struct netdev *netdev) int netdev_get_mtu(const struct netdev *netdev, int *mtup) { - *mtup = netdev->mtu; - return 0; + int error = netdev->class->get_mtu(netdev, mtup); + if (error) { + VLOG_WARN_RL(&rl, "failed to retrieve MTU for network device %s: %s", + netdev_get_name(netdev), strerror(error)); + *mtup = ETH_PAYLOAD_MAX; + } + return error; } /* Stores the features supported by 'netdev' into each of '*current', @@ -729,123 +378,45 @@ netdev_get_features(struct netdev *netdev, uint32_t *supported, uint32_t *peer) { uint32_t dummy[4]; - return do_get_features(netdev, - current ? current : &dummy[0], - advertised ? advertised : &dummy[1], - supported ? supported : &dummy[2], - peer ? peer : &dummy[3]); + return netdev->class->get_features(netdev, + current ? current : &dummy[0], + advertised ? advertised : &dummy[1], + supported ? supported : &dummy[2], + peer ? peer : &dummy[3]); } -/* Set the features advertised by 'netdev' to 'advertise'. */ +/* Set the features advertised by 'netdev' to 'advertise'. Returns 0 if + * successful, otherwise a positive errno value. */ int netdev_set_advertisements(struct netdev *netdev, uint32_t advertise) { - struct ethtool_cmd ecmd; - int error; - - memset(&ecmd, 0, sizeof ecmd); - error = do_ethtool(netdev, &ecmd, ETHTOOL_GSET, "ETHTOOL_GSET"); - if (error) { - return error; - } - - ecmd.advertising = 0; - if (advertise & OFPPF_10MB_HD) { - ecmd.advertising |= ADVERTISED_10baseT_Half; - } - if (advertise & OFPPF_10MB_FD) { - ecmd.advertising |= ADVERTISED_10baseT_Full; - } - if (advertise & OFPPF_100MB_HD) { - ecmd.advertising |= ADVERTISED_100baseT_Half; - } - if (advertise & OFPPF_100MB_FD) { - ecmd.advertising |= ADVERTISED_100baseT_Full; - } - if (advertise & OFPPF_1GB_HD) { - ecmd.advertising |= ADVERTISED_1000baseT_Half; - } - if (advertise & OFPPF_1GB_FD) { - ecmd.advertising |= ADVERTISED_1000baseT_Full; - } - if (advertise & OFPPF_10GB_FD) { - ecmd.advertising |= ADVERTISED_10000baseT_Full; - } - if (advertise & OFPPF_COPPER) { - ecmd.advertising |= ADVERTISED_TP; - } - if (advertise & OFPPF_FIBER) { - ecmd.advertising |= ADVERTISED_FIBRE; - } - if (advertise & OFPPF_AUTONEG) { - ecmd.advertising |= ADVERTISED_Autoneg; - } - if (advertise & OFPPF_PAUSE) { - ecmd.advertising |= ADVERTISED_Pause; - } - if (advertise & OFPPF_PAUSE_ASYM) { - ecmd.advertising |= ADVERTISED_Asym_Pause; - } - return do_ethtool(netdev, &ecmd, ETHTOOL_SSET, "ETHTOOL_SSET"); + return (netdev->class->set_advertisements + ? netdev->class->set_advertisements(netdev, advertise) + : EOPNOTSUPP); } -/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if - * 'in4' is non-null) and returns 0. Otherwise, returns a positive errno value - * and sets '*in4' to INADDR_ANY (0). */ +/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address and + * returns 0. Otherwise, returns a positive errno value and sets '*in4' to 0 + * (INADDR_ANY). + * + * The following error values have well-defined meanings: + * + * - EADDRNOTAVAIL: 'netdev' has no assigned IPv4 address. + * + * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'. + * + * 'in4' may be null, in which case the address itself is not reported. */ int netdev_get_in4(const struct netdev *netdev, struct in_addr *in4) { - const char *netdev_name = netdev_get_name(netdev); - struct ifreq ifr; - struct in_addr ip = { INADDR_ANY }; - int error; - - init_netdev(); - - strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); - ifr.ifr_addr.sa_family = AF_INET; - COVERAGE_INC(netdev_get_in4); - if (ioctl(af_inet_sock, SIOCGIFADDR, &ifr) == 0) { - struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; - ip = sin->sin_addr; - error = ip.s_addr != INADDR_ANY ? 0 : EADDRNOTAVAIL; - } else { - VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFADDR) failed: %s", - netdev_name, strerror(errno)); - error = errno; - } - if (in4) { - *in4 = ip; - } - return error; -} - -static void -make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr) -{ - struct sockaddr_in sin; - memset(&sin, 0, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_addr = addr; - sin.sin_port = 0; - - memset(sa, 0, sizeof *sa); - memcpy(sa, &sin, sizeof sin); -} - -static int -do_set_addr(struct netdev *netdev, int sock, - int ioctl_nr, const char *ioctl_name, struct in_addr addr) -{ - struct ifreq ifr; + struct in_addr dummy; int error; - strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); - make_in4_sockaddr(&ifr.ifr_addr, addr); - COVERAGE_INC(netdev_set_in4); - error = ioctl(sock, ioctl_nr, &ifr) < 0 ? errno : 0; - if (error) { - VLOG_WARN("ioctl(%s): %s", ioctl_name, strerror(error)); + error = (netdev->class->get_in4 + ? netdev->class->get_in4(netdev, in4 ? in4 : &dummy) + : EOPNOTSUPP); + if (error && in4) { + in4->s_addr = 0; } return error; } @@ -856,86 +427,46 @@ do_set_addr(struct netdev *netdev, int sock, int netdev_set_in4(struct netdev *netdev, struct in_addr addr, struct in_addr mask) { - int error; - - error = do_set_addr(netdev, af_inet_sock, - SIOCSIFADDR, "SIOCSIFADDR", addr); - if (!error && addr.s_addr != INADDR_ANY) { - error = do_set_addr(netdev, af_inet_sock, - SIOCSIFNETMASK, "SIOCSIFNETMASK", mask); - } - return error; + return (netdev->class->set_in4 + ? netdev->class->set_in4(netdev, addr, mask) + : EOPNOTSUPP); } /* Adds 'router' as a default IP gateway for the TCP/IP stack that corresponds * to 'netdev'. */ int -netdev_add_router(struct netdev *netdev UNUSED, struct in_addr router) +netdev_add_router(struct netdev *netdev, struct in_addr router) { - struct in_addr any = { INADDR_ANY }; - struct rtentry rt; - int error; - - memset(&rt, 0, sizeof rt); - make_in4_sockaddr(&rt.rt_dst, any); - make_in4_sockaddr(&rt.rt_gateway, router); - make_in4_sockaddr(&rt.rt_genmask, any); - rt.rt_flags = RTF_UP | RTF_GATEWAY; COVERAGE_INC(netdev_add_router); - error = ioctl(af_inet_sock, SIOCADDRT, &rt) < 0 ? errno : 0; - if (error) { - VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error)); - } - return error; + return (netdev->class->add_router + ? netdev->class->add_router(netdev, router) + : EOPNOTSUPP); } -/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if - * 'in6' is non-null) and returns true. Otherwise, returns false. */ -bool -netdev_get_in6(const struct netdev *netdev, struct in6_addr *in6) -{ - if (in6) { - *in6 = netdev->in6; - } - return memcmp(&netdev->in6, &in6addr_any, sizeof netdev->in6) != 0; -} - -/* Obtains the current flags for 'netdev' and stores them into '*flagsp'. - * Returns 0 if successful, otherwise a positive errno value. On failure, - * stores 0 into '*flagsp'. */ +/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and + * returns 0. Otherwise, returns a positive errno value and sets '*in6' to + * all-zero-bits (in6addr_any). + * + * The following error values have well-defined meanings: + * + * - EADDRNOTAVAIL: 'netdev' has no assigned IPv6 address. + * + * - EOPNOTSUPP: No IPv6 network stack attached to 'netdev'. + * + * 'in6' may be null, in which case the address itself is not reported. */ int -netdev_get_flags(const struct netdev *netdev, enum netdev_flags *flagsp) +netdev_get_in6(const struct netdev *netdev, struct in6_addr *in6) { - int error, flags; - - init_netdev(); - - *flagsp = 0; - error = get_flags(netdev_get_name(netdev), &flags); - if (error) { - return error; - } + struct in6_addr dummy; + int error; - if (flags & IFF_UP) { - *flagsp |= NETDEV_UP; + error = (netdev->class->get_in6 + ? netdev->class->get_in6(netdev, in6 ? in6 : &dummy) + : EOPNOTSUPP); + if (error && in6) { + memset(in6, 0, sizeof *in6); } - if (flags & IFF_PROMISC) { - *flagsp |= NETDEV_PROMISC; - } - return 0; -} - -static int -nd_to_iff_flags(enum netdev_flags nd) -{ - int iff = 0; - if (nd & NETDEV_UP) { - iff |= IFF_UP; - } - if (nd & NETDEV_PROMISC) { - iff |= IFF_PROMISC; - } - return iff; + return error; } /* On 'netdev', turns off the flags in 'off' and then turns on the flags in @@ -944,26 +475,44 @@ nd_to_iff_flags(enum netdev_flags nd) * successful, otherwise a positive errno value. */ static int do_update_flags(struct netdev *netdev, enum netdev_flags off, - enum netdev_flags on, bool permanent) + enum netdev_flags on, enum netdev_flags *old_flagsp, + bool permanent) { - int old_flags, new_flags; + enum netdev_flags old_flags; int error; - error = get_flags(netdev->name, &old_flags); + error = netdev->class->update_flags(netdev, off & ~on, on, &old_flags); if (error) { - return error; - } - - new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); - if (!permanent) { - netdev->changed_flags |= new_flags ^ old_flags; + VLOG_WARN_RL(&rl, "failed to %s flags for network device %s: %s", + off || on ? "set" : "get", netdev_get_name(netdev), + strerror(error)); + old_flags = 0; + } else if ((off || on) && !permanent) { + enum netdev_flags new_flags = (old_flags & ~off) | on; + enum netdev_flags changed_flags = old_flags ^ new_flags; + if (changed_flags) { + if (!netdev->changed_flags) { + netdev->save_flags = old_flags; + } + netdev->changed_flags |= changed_flags; + } } - if (new_flags != old_flags) { - error = set_flags(netdev->name, new_flags); + if (old_flagsp) { + *old_flagsp = old_flags; } return error; } +/* Obtains the current flags for 'netdev' and stores them into '*flagsp'. + * Returns 0 if successful, otherwise a positive errno value. On failure, + * stores 0 into '*flagsp'. */ +int +netdev_get_flags(const struct netdev *netdev_, enum netdev_flags *flagsp) +{ + struct netdev *netdev = (struct netdev *) netdev_; + return do_update_flags(netdev, 0, 0, flagsp, false); +} + /* Sets the flags for 'netdev' to 'flags'. * If 'permanent' is true, the changes will persist; otherwise, they * will be reverted when 'netdev' is closed or the program exits. @@ -972,7 +521,7 @@ int netdev_set_flags(struct netdev *netdev, enum netdev_flags flags, bool permanent) { - return do_update_flags(netdev, -1, flags, permanent); + return do_update_flags(netdev, -1, flags, NULL, permanent); } /* Turns on the specified 'flags' on 'netdev'. @@ -983,7 +532,7 @@ int netdev_turn_flags_on(struct netdev *netdev, enum netdev_flags flags, bool permanent) { - return do_update_flags(netdev, 0, flags, permanent); + return do_update_flags(netdev, 0, flags, NULL, permanent); } /* Turns off the specified 'flags' on 'netdev'. @@ -994,209 +543,37 @@ int netdev_turn_flags_off(struct netdev *netdev, enum netdev_flags flags, bool permanent) { - return do_update_flags(netdev, flags, 0, permanent); + return do_update_flags(netdev, flags, 0, NULL, permanent); } /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be * successfully retrieved, it stores the corresponding MAC address in 'mac' and * returns 0. Otherwise, it returns a positive errno value; in particular, - * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */ + * ENXIO indicates that there is no ARP table entry for 'ip' on 'netdev'. */ int -netdev_arp_lookup(const struct netdev *netdev, uint32_t ip, - uint8_t mac[ETH_ADDR_LEN]) +netdev_arp_lookup(const struct netdev *netdev, + uint32_t ip, uint8_t mac[ETH_ADDR_LEN]) { - const char *netdev_name = netdev_get_name(netdev); - struct arpreq r; - struct sockaddr_in *pa; - int retval; - - init_netdev(); - - memset(&r, 0, sizeof r); - pa = (struct sockaddr_in *) &r.arp_pa; - pa->sin_family = AF_INET; - pa->sin_addr.s_addr = ip; - pa->sin_port = 0; - r.arp_ha.sa_family = ARPHRD_ETHER; - r.arp_flags = 0; - strncpy(r.arp_dev, netdev_name, sizeof r.arp_dev); - COVERAGE_INC(netdev_arp_lookup); - retval = ioctl(af_inet_sock, SIOCGARP, &r) < 0 ? errno : 0; - if (!retval) { - memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN); - } else if (retval != ENXIO) { - VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s", - netdev_name, IP_ARGS(&ip), strerror(retval)); - } - return retval; -} - -static int -get_stats_via_netlink(int ifindex, struct netdev_stats *stats) -{ - struct ofpbuf request; - struct ofpbuf *reply; - struct ifinfomsg *ifi; - const struct rtnl_link_stats *rtnl_stats; - struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; - int error; - - ofpbuf_init(&request, 0); - nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *ifi, - RTM_GETLINK, NLM_F_REQUEST); - ifi = ofpbuf_put_zeros(&request, sizeof *ifi); - ifi->ifi_family = PF_UNSPEC; - ifi->ifi_index = ifindex; - error = nl_sock_transact(rtnl_sock, &request, &reply); - ofpbuf_uninit(&request); + int error = (netdev->class->arp_lookup + ? netdev->class->arp_lookup(netdev, ip, mac) + : EOPNOTSUPP); if (error) { - return error; - } - - if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg), - rtnlgrp_link_policy, - attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { - ofpbuf_delete(reply); - return EPROTO; - } - - if (!attrs[IFLA_STATS]) { - VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats"); - return EPROTO; + memset(mac, 0, ETH_ADDR_LEN); } - - rtnl_stats = nl_attr_get(attrs[IFLA_STATS]); - stats->rx_packets = rtnl_stats->rx_packets; - stats->tx_packets = rtnl_stats->tx_packets; - stats->rx_bytes = rtnl_stats->rx_bytes; - stats->tx_bytes = rtnl_stats->tx_bytes; - stats->rx_errors = rtnl_stats->rx_errors; - stats->tx_errors = rtnl_stats->tx_errors; - stats->rx_dropped = rtnl_stats->rx_dropped; - stats->tx_dropped = rtnl_stats->tx_dropped; - stats->multicast = rtnl_stats->multicast; - stats->collisions = rtnl_stats->collisions; - stats->rx_length_errors = rtnl_stats->rx_length_errors; - stats->rx_over_errors = rtnl_stats->rx_over_errors; - stats->rx_crc_errors = rtnl_stats->rx_crc_errors; - stats->rx_frame_errors = rtnl_stats->rx_frame_errors; - stats->rx_fifo_errors = rtnl_stats->rx_fifo_errors; - stats->rx_missed_errors = rtnl_stats->rx_missed_errors; - stats->tx_aborted_errors = rtnl_stats->tx_aborted_errors; - stats->tx_carrier_errors = rtnl_stats->tx_carrier_errors; - stats->tx_fifo_errors = rtnl_stats->tx_fifo_errors; - stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors; - stats->tx_window_errors = rtnl_stats->tx_window_errors; - - return 0; -} - -static int -get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats) -{ - static const char fn[] = "/proc/net/dev"; - char line[1024]; - FILE *stream; - int ln; - - stream = fopen(fn, "r"); - if (!stream) { - VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); - return errno; - } - - ln = 0; - while (fgets(line, sizeof line, stream)) { - if (++ln >= 3) { - char devname[16]; -#define X64 "%"SCNu64 - if (sscanf(line, - " %15[^:]:" - X64 X64 X64 X64 X64 X64 X64 "%*u" - X64 X64 X64 X64 X64 X64 X64 "%*u", - devname, - &stats->rx_bytes, - &stats->rx_packets, - &stats->rx_errors, - &stats->rx_dropped, - &stats->rx_fifo_errors, - &stats->rx_frame_errors, - &stats->multicast, - &stats->tx_bytes, - &stats->tx_packets, - &stats->tx_errors, - &stats->tx_dropped, - &stats->tx_fifo_errors, - &stats->collisions, - &stats->tx_carrier_errors) != 15) { - VLOG_WARN_RL(&rl, "%s:%d: parse error", fn, ln); - } else if (!strcmp(devname, netdev_name)) { - stats->rx_length_errors = UINT64_MAX; - stats->rx_over_errors = UINT64_MAX; - stats->rx_crc_errors = UINT64_MAX; - stats->rx_missed_errors = UINT64_MAX; - stats->tx_aborted_errors = UINT64_MAX; - stats->tx_heartbeat_errors = UINT64_MAX; - stats->tx_window_errors = UINT64_MAX; - fclose(stream); - return 0; - } - } - } - VLOG_WARN_RL(&rl, "%s: no stats for %s", fn, netdev_name); - fclose(stream); - return ENODEV; + return error; } -/* Sets 'carrier' to true if carrier is active (link light is on) on +/* Sets 'carrier' to true if carrier is active (link light is on) on * 'netdev'. */ int netdev_get_carrier(const struct netdev *netdev, bool *carrier) { - char line[8]; - int retval; - int error; - char *fn; - int fd; - - *carrier = false; - - fn = xasprintf("/sys/class/net/%s/carrier", netdev_get_name(netdev)); - fd = open(fn, O_RDONLY); - if (fd < 0) { - error = errno; - VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(error)); - goto exit; - } - - retval = read(fd, line, sizeof line); - if (retval < 0) { - error = errno; - if (error == EINVAL) { - /* This is the normal return value when we try to check carrier if - * the network device is not up. */ - } else { - VLOG_WARN_RL(&rl, "%s: read failed: %s", fn, strerror(error)); - } - goto exit_close; - } else if (retval == 0) { - error = EPROTO; - VLOG_WARN_RL(&rl, "%s: unexpected end of file", fn); - goto exit_close; - } - - if (line[0] != '0' && line[0] != '1') { - error = EPROTO; - VLOG_WARN_RL(&rl, "%s: value is %c (expected 0 or 1)", fn, line[0]); - goto exit_close; + int error = (netdev->class->get_carrier + ? netdev->class->get_carrier(netdev, carrier) + : EOPNOTSUPP); + if (error) { + *carrier = false; } - *carrier = line[0] != '0'; - error = 0; - -exit_close: - close(fd); -exit: - free(fn); return error; } @@ -1207,98 +584,42 @@ netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats) int error; COVERAGE_INC(netdev_get_stats); - if (use_netlink_stats) { - int ifindex; - - error = get_ifindex(netdev, &ifindex); - if (!error) { - error = get_stats_via_netlink(ifindex, stats); - } - } else { - error = get_stats_via_proc(netdev->name, stats); - } - + error = (netdev->class->get_stats + ? netdev->class->get_stats(netdev, stats) + : EOPNOTSUPP); if (error) { memset(stats, 0xff, sizeof *stats); } return error; } -#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress" -#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1" -/* We redirect stderr to /dev/null because we often want to remove all - * traffic control configuration on a port so its in a known state. If - * this done when there is no such configuration, tc complains, so we just - * always ignore it. - */ -#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null" - -/* Attempts to set input rate limiting (policing) policy. */ +/* Attempts to set input rate limiting (policing) policy, such that up to + * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst + * size of 'kbits' kb. */ int netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate, uint32_t kbits_burst) { - const char *netdev_name = netdev_get_name(netdev); - char command[1024]; - - init_netdev(); - - COVERAGE_INC(netdev_set_policing); - if (kbits_rate) { - if (!kbits_burst) { - /* Default to 10 kilobits if not specified. */ - kbits_burst = 10; - } - - /* xxx This should be more careful about only adding if it - * xxx actually exists, as opposed to always deleting it. */ - snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); - if (system(command) == -1) { - VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); - } - - snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name); - if (system(command) != 0) { - VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name); - return -1; - } - - snprintf(command, sizeof(command), POLICE_CONFIG_CMD, netdev_name, - kbits_rate, kbits_burst); - if (system(command) != 0) { - VLOG_WARN_RL(&rl, "%s: problem configuring policing", - netdev_name); - return -1; - } - } else { - snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); - if (system(command) == -1) { - VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); - } - } - - return 0; + return (netdev->class->set_policing + ? netdev->class->set_policing(netdev, kbits_rate, kbits_burst) + : EOPNOTSUPP); } -/* Initializes 'svec' with a list of the names of all known network devices. */ -void -netdev_enumerate(struct svec *svec) +/* If 'netdev' is a VLAN network device (e.g. one created with vconfig(8)), + * sets '*vlan_vid' to the VLAN VID associated with that device and returns 0. + * Otherwise returns a errno value (specifically ENOENT if 'netdev_name' is the + * name of a network device that is not a VLAN device) and sets '*vlan_vid' to + * -1. */ +int +netdev_get_vlan_vid(const struct netdev *netdev, int *vlan_vid) { - struct if_nameindex *names; - - svec_init(svec); - names = if_nameindex(); - if (names) { - size_t i; - - for (i = 0; names[i].if_name != NULL; i++) { - svec_add(svec, names[i].if_name); - } - if_freenameindex(names); - } else { - VLOG_WARN("could not obtain list of network device names: %s", - strerror(errno)); + int error = (netdev->class->get_vlan_vid + ? netdev->class->get_vlan_vid(netdev, vlan_vid) + : ENOENT); + if (error) { + *vlan_vid = 0; } + return error; } /* Returns a network device that has 'in4' as its IP address, if one exists, @@ -1328,209 +649,162 @@ exit: svec_destroy(&dev_list); return netdev; } - -/* If 'netdev' is a VLAN network device (e.g. one created with vconfig(8)), - * sets '*vlan_vid' to the VLAN VID associated with that device and returns 0. - * Otherwise returns a errno value (specifically ENOENT if 'netdev_name' is the - * name of a network device that is not a VLAN device) and sets '*vlan_vid' to - * -1. */ -int -netdev_get_vlan_vid(const struct netdev *netdev, int *vlan_vid) + +/* Initializes 'netdev' as a netdev named 'name' of the specified 'class'. + * + * This function adds 'netdev' to a netdev-owned linked list, so it is very + * important that 'netdev' only be freed after calling netdev_close(). */ +void +netdev_init(struct netdev *netdev, const char *name, + const struct netdev_class *class) { - struct ds line = DS_EMPTY_INITIALIZER; - FILE *stream = NULL; - int error; - char *fn; - - COVERAGE_INC(netdev_get_vlan_vid); - fn = xasprintf("/proc/net/vlan/%s", netdev_get_name(netdev)); - stream = fopen(fn, "r"); - if (!stream) { - error = errno; - goto done; - } - - if (ds_get_line(&line, stream)) { - if (ferror(stream)) { - error = errno; - VLOG_ERR_RL(&rl, "error reading \"%s\": %s", fn, strerror(errno)); - } else { - error = EPROTO; - VLOG_ERR_RL(&rl, "unexpected end of file reading \"%s\"", fn); - } - goto done; - } - - if (!sscanf(ds_cstr(&line), "%*s VID: %d", vlan_vid)) { - error = EPROTO; - VLOG_ERR_RL(&rl, "parse error reading \"%s\" line 1: \"%s\"", - fn, ds_cstr(&line)); - goto done; - } - - error = 0; + netdev->class = class; + netdev->name = xstrdup(name); + netdev->save_flags = 0; + netdev->changed_flags = 0; + list_push_back(&netdev_list, &netdev->node); +} -done: - free(fn); - if (stream) { - fclose(stream); - } - ds_destroy(&line); - if (error) { - *vlan_vid = -1; - } - return error; +/* Initializes 'notifier' as a netdev notifier for 'netdev', for which + * notification will consist of calling 'cb', with auxiliary data 'aux'. */ +void +netdev_notifier_init(struct netdev_notifier *notifier, struct netdev *netdev, + void (*cb)(struct netdev_notifier *), void *aux) +{ + notifier->netdev = netdev; + notifier->cb = cb; + notifier->aux = aux; } +/* Tracks changes in the status of a set of network devices. */ struct netdev_monitor { - struct linux_netdev_notifier notifier; struct shash polled_netdevs; struct shash changed_netdevs; }; -static void netdev_monitor_change(const struct linux_netdev_change *change, - void *monitor); - -int -netdev_monitor_create(struct netdev_monitor **monitorp) +/* Creates and returns a new structure for monitor changes in the status of + * network devices. */ +struct netdev_monitor * +netdev_monitor_create(void) { - struct netdev_monitor *monitor; - int error; - - monitor = xmalloc(sizeof *monitor); - error = linux_netdev_notifier_register(&monitor->notifier, - netdev_monitor_change, monitor); - if (error) { - free(monitor); - return error; - } + struct netdev_monitor *monitor = xmalloc(sizeof *monitor); shash_init(&monitor->polled_netdevs); shash_init(&monitor->changed_netdevs); - *monitorp = monitor; - return 0; + return monitor; } +/* Destroys 'monitor'. */ void netdev_monitor_destroy(struct netdev_monitor *monitor) { if (monitor) { - linux_netdev_notifier_unregister(&monitor->notifier); + struct shash_node *node; + + SHASH_FOR_EACH (node, &monitor->polled_netdevs) { + struct netdev_notifier *notifier = node->data; + notifier->netdev->class->poll_remove(notifier); + } + shash_destroy(&monitor->polled_netdevs); + shash_destroy(&monitor->changed_netdevs); free(monitor); } } -void +static void +netdev_monitor_cb(struct netdev_notifier *notifier) +{ + struct netdev_monitor *monitor = notifier->aux; + const char *name = netdev_get_name(notifier->netdev); + if (!shash_find(&monitor->changed_netdevs, name)) { + shash_add(&monitor->changed_netdevs, name, NULL); + } +} + +/* Attempts to add 'netdev' as a netdev monitored by 'monitor'. Returns 0 if + * successful, otherwise a positive errno value. + * + * Adding a given 'netdev' to a monitor multiple times is equivalent to adding + * it once. */ +int netdev_monitor_add(struct netdev_monitor *monitor, struct netdev *netdev) { - if (!shash_find(&monitor->polled_netdevs, netdev_get_name(netdev))) { - shash_add(&monitor->polled_netdevs, netdev_get_name(netdev), NULL); + const char *netdev_name = netdev_get_name(netdev); + int error = 0; + if (!shash_find(&monitor->polled_netdevs, netdev_name) + && netdev->class->poll_add) + { + struct netdev_notifier *notifier; + error = netdev->class->poll_add(netdev, netdev_monitor_cb, monitor, + ¬ifier); + if (!error) { + assert(notifier->netdev == netdev); + shash_add(&monitor->polled_netdevs, netdev_name, notifier); + } } + return error; } +/* Removes 'netdev' from the set of netdevs monitored by 'monitor'. (This has + * no effect if 'netdev' is not in the set of devices monitored by + * 'monitor'.) */ void netdev_monitor_remove(struct netdev_monitor *monitor, struct netdev *netdev) { + const char *netdev_name = netdev_get_name(netdev); struct shash_node *node; - node = shash_find(&monitor->polled_netdevs, netdev_get_name(netdev)); + node = shash_find(&monitor->polled_netdevs, netdev_name); if (node) { + /* Cancel future notifications. */ + struct netdev_notifier *notifier = node->data; + netdev->class->poll_remove(notifier); shash_delete(&monitor->polled_netdevs, node); - node = shash_find(&monitor->changed_netdevs, netdev_get_name(netdev)); + + /* Drop any pending notification. */ + node = shash_find(&monitor->changed_netdevs, netdev_name); if (node) { shash_delete(&monitor->changed_netdevs, node); } } } +/* Checks for changes to netdevs in the set monitored by 'monitor'. If any of + * the attributes (Ethernet address, carrier status, speed or peer-advertised + * speed, flags, etc.) of a network device monitored by 'monitor' has changed, + * sets '*devnamep' to the name of a device that has changed and returns 0. + * The caller is responsible for freeing '*devnamep' (with free()). + * + * If no devices have changed, sets '*devnamep' to NULL and returns EAGAIN. + */ int netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep) { - int error = linux_netdev_notifier_get_error(&monitor->notifier); - *devnamep = NULL; - if (!error) { - struct shash_node *node = shash_first(&monitor->changed_netdevs); - if (!node) { - return EAGAIN; - } + struct shash_node *node = shash_first(&monitor->changed_netdevs); + if (!node) { + *devnamep = NULL; + return EAGAIN; + } else { *devnamep = xstrdup(node->name); shash_delete(&monitor->changed_netdevs, node); - } else { - shash_clear(&monitor->changed_netdevs); + return 0; } - return error; } +/* Registers with the poll loop to wake up from the next call to poll_block() + * when netdev_monitor_poll(monitor) would indicate that a device has + * changed. */ void netdev_monitor_poll_wait(const struct netdev_monitor *monitor) { - if (!shash_is_empty(&monitor->changed_netdevs) - || linux_netdev_notifier_peek_error(&monitor->notifier)) { + if (!shash_is_empty(&monitor->changed_netdevs)) { poll_immediate_wake(); } else { - linux_netdev_notifier_wait(); - } -} - -static void -netdev_monitor_change(const struct linux_netdev_change *change, void *monitor_) -{ - struct netdev_monitor *monitor = monitor_; - if (shash_find(&monitor->polled_netdevs, change->ifname) - && !shash_find(&monitor->changed_netdevs, change->ifname)) { - shash_add(&monitor->changed_netdevs, change->ifname, NULL); + /* XXX Nothing needed here for netdev_linux, but maybe other netdev + * classes need help. */ } } -static void restore_all_flags(void *aux); - -/* Set up a signal hook to restore network device flags on program - * termination. */ -static void -init_netdev(void) -{ - static bool inited; - if (!inited) { - int ifindex; - int error; - - inited = true; - - fatal_signal_add_hook(restore_all_flags, NULL, true); - - af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0); - if (af_inet_sock < 0) { - ovs_fatal(errno, "socket(AF_INET)"); - } - - error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock); - if (error) { - ovs_fatal(error, "socket(AF_NETLINK, NETLINK_ROUTE)"); - } - - /* Decide on the netdev_get_stats() implementation to use. Netlink is - * preferable, so if that works, we'll use it. */ - ifindex = do_get_ifindex("lo"); - if (ifindex < 0) { - VLOG_WARN("failed to get ifindex for lo, " - "obtaining netdev stats from proc"); - use_netlink_stats = false; - } else { - struct netdev_stats stats; - error = get_stats_via_netlink(ifindex, &stats); - if (!error) { - VLOG_DBG("obtaining netdev stats via rtnetlink"); - use_netlink_stats = true; - } else { - VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats " - "via proc (you are probably running a pre-2.6.19 " - "kernel)", strerror(error)); - use_netlink_stats = false; - } - } - } -} - /* Restore the network device flags on 'netdev' to those that were active * before we changed them. Returns 0 if successful, otherwise a positive * errno value. @@ -1539,27 +813,13 @@ init_netdev(void) static int restore_flags(struct netdev *netdev) { - struct ifreq ifr; - int restore_flags; - - /* Get current flags. */ - strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name); - COVERAGE_INC(netdev_get_flags); - if (ioctl(netdev->netdev_fd, SIOCGIFFLAGS, &ifr) < 0) { - return errno; - } - - /* Restore flags that we might have changed, if necessary. */ - restore_flags = netdev->changed_flags & (IFF_PROMISC | IFF_UP); - if ((ifr.ifr_flags ^ netdev->save_flags) & restore_flags) { - ifr.ifr_flags &= ~restore_flags; - ifr.ifr_flags |= netdev->save_flags & restore_flags; - COVERAGE_INC(netdev_set_flags); - if (ioctl(netdev->netdev_fd, SIOCSIFFLAGS, &ifr) < 0) { - return errno; - } + if (netdev->changed_flags) { + enum netdev_flags restore = netdev->save_flags & netdev->changed_flags; + enum netdev_flags old_flags; + return netdev->class->update_flags(netdev, + netdev->changed_flags & ~restore, + restore, &old_flags); } - return 0; } @@ -1573,109 +833,3 @@ restore_all_flags(void *aux UNUSED) restore_flags(netdev); } } - -static int -get_flags(const char *netdev_name, int *flags) -{ - struct ifreq ifr; - strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); - COVERAGE_INC(netdev_get_flags); - if (ioctl(af_inet_sock, SIOCGIFFLAGS, &ifr) < 0) { - VLOG_ERR("ioctl(SIOCGIFFLAGS) on %s device failed: %s", - netdev_name, strerror(errno)); - return errno; - } - *flags = ifr.ifr_flags; - return 0; -} - -static int -set_flags(const char *netdev_name, int flags) -{ - struct ifreq ifr; - strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); - ifr.ifr_flags = flags; - COVERAGE_INC(netdev_set_flags); - if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) < 0) { - VLOG_ERR("ioctl(SIOCSIFFLAGS) on %s device failed: %s", - netdev_name, strerror(errno)); - return errno; - } - return 0; -} - -static int -do_get_ifindex(const char *netdev_name) -{ - struct ifreq ifr; - - strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); - COVERAGE_INC(netdev_get_ifindex); - if (ioctl(af_inet_sock, SIOCGIFINDEX, &ifr) < 0) { - VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s", - netdev_name, strerror(errno)); - return -errno; - } - return ifr.ifr_ifindex; -} - -static int -get_ifindex(const struct netdev *netdev, int *ifindexp) -{ - *ifindexp = 0; - if (netdev->ifindex < 0) { - int ifindex = do_get_ifindex(netdev->name); - if (ifindex < 0) { - return -ifindex; - } - ((struct netdev *) netdev)->ifindex = ifindex; - } - *ifindexp = netdev->ifindex; - return 0; -} - -static int -get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN], - int *hwaddr_familyp) -{ - struct ifreq ifr; - - *hwaddr_familyp = 0; - memset(&ifr, 0, sizeof ifr); - strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); - COVERAGE_INC(netdev_get_hwaddr); - if (ioctl(af_inet_sock, SIOCGIFHWADDR, &ifr) < 0) { - VLOG_ERR("ioctl(SIOCGIFHWADDR) on %s device failed: %s", - netdev_name, strerror(errno)); - return errno; - } - if (hwaddr_familyp) { - int hwaddr_family = ifr.ifr_hwaddr.sa_family; - *hwaddr_familyp = hwaddr_family; - if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) { - VLOG_WARN("%s device has unknown hardware address family %d", - netdev_name, hwaddr_family); - } - } - memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN); - return 0; -} - -static int -set_etheraddr(const char *netdev_name, int hwaddr_family, - const uint8_t mac[ETH_ADDR_LEN]) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof ifr); - strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); - ifr.ifr_hwaddr.sa_family = hwaddr_family; - memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ADDR_LEN); - COVERAGE_INC(netdev_set_hwaddr); - if (ioctl(af_inet_sock, SIOCSIFHWADDR, &ifr) < 0) { - VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s", - netdev_name, strerror(errno)); - return errno; - } - return 0; -} diff --git a/lib/netdev.h b/lib/netdev.h index 5c98b156..b66d7bc0 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -44,6 +44,9 @@ enum netdev_pseudo_ethertype { NETDEV_ETH_TYPE_802_2 /* Receive all IEEE 802.2 frames. */ }; +/* Network device statistics. + * + * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */ struct netdev_stats { uint64_t rx_packets; /* Total packets received. */ uint64_t tx_packets; /* Total packets transmitted. */ @@ -74,47 +77,57 @@ struct netdev_stats { struct netdev; +int netdev_initialize(void); +void netdev_run(void); +void netdev_wait(void); + int netdev_open(const char *name, int ethertype, struct netdev **); void netdev_close(struct netdev *); -bool netdev_exists(const char *netdev_name); +bool netdev_exists(const char *name); + +int netdev_enumerate(struct svec *); + +const char *netdev_get_name(const struct netdev *); +int netdev_get_mtu(const struct netdev *, int *mtup); int netdev_recv(struct netdev *, struct ofpbuf *); void netdev_recv_wait(struct netdev *); int netdev_drain(struct netdev *); + int netdev_send(struct netdev *, const struct ofpbuf *); void netdev_send_wait(struct netdev *); + int netdev_set_etheraddr(struct netdev *, const uint8_t mac[6]); int netdev_get_etheraddr(const struct netdev *, uint8_t mac[6]); -const char *netdev_get_name(const struct netdev *); -int netdev_get_mtu(const struct netdev *, int *mtup); + +int netdev_get_carrier(const struct netdev *, bool *carrier); int netdev_get_features(struct netdev *, uint32_t *current, uint32_t *advertised, uint32_t *supported, uint32_t *peer); int netdev_set_advertisements(struct netdev *, uint32_t advertise); + int netdev_get_in4(const struct netdev *, struct in_addr *); int netdev_set_in4(struct netdev *, struct in_addr addr, struct in_addr mask); +int netdev_get_in6(const struct netdev *, struct in6_addr *); int netdev_add_router(struct netdev *, struct in_addr router); -bool netdev_get_in6(const struct netdev *, struct in6_addr *); +int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]); + int netdev_get_flags(const struct netdev *, enum netdev_flags *); int netdev_set_flags(struct netdev *, enum netdev_flags, bool permanent); int netdev_turn_flags_on(struct netdev *, enum netdev_flags, bool permanent); int netdev_turn_flags_off(struct netdev *, enum netdev_flags, bool permanent); -int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]); -int netdev_get_carrier(const struct netdev *, bool *carrier); + int netdev_get_stats(const struct netdev *, struct netdev_stats *); int netdev_set_policing(struct netdev *, uint32_t kbits_rate, uint32_t kbits_burst); -void netdev_enumerate(struct svec *); -struct netdev *netdev_find_dev_by_in4(const struct in_addr *); - int netdev_get_vlan_vid(const struct netdev *, int *vlan_vid); +struct netdev *netdev_find_dev_by_in4(const struct in_addr *); -struct netdev_monitor; -int netdev_monitor_create(struct netdev_monitor **); +struct netdev_monitor *netdev_monitor_create(void); void netdev_monitor_destroy(struct netdev_monitor *); -void netdev_monitor_add(struct netdev_monitor *, struct netdev *); +int netdev_monitor_add(struct netdev_monitor *, struct netdev *); void netdev_monitor_remove(struct netdev_monitor *, struct netdev *); int netdev_monitor_poll(struct netdev_monitor *, char **devnamep); void netdev_monitor_poll_wait(const struct netdev_monitor *); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 56432194..55eb2c28 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -259,7 +259,6 @@ int ofproto_create(const char *datapath, const struct ofhooks *ofhooks, void *aux, struct ofproto **ofprotop) { - struct netdev_monitor *netdev_monitor; struct odp_stats stats; struct ofproto *p; struct dpif *dpif; @@ -290,15 +289,6 @@ ofproto_create(const char *datapath, const struct ofhooks *ofhooks, void *aux, dpif_flow_flush(dpif); dpif_recv_purge(dpif); - /* Arrange to monitor datapath ports for status changes. */ - error = netdev_monitor_create(&netdev_monitor); - if (error) { - VLOG_ERR("failed to starting monitoring datapath %s: %s", - datapath, strerror(error)); - dpif_close(dpif); - return error; - } - /* Initialize settings. */ p = xcalloc(1, sizeof *p); p->fallback_dpid = pick_fallback_dpid(); @@ -310,7 +300,7 @@ ofproto_create(const char *datapath, const struct ofhooks *ofhooks, void *aux, /* Initialize datapath. */ p->dpif = dpif; - p->netdev_monitor = netdev_monitor; + p->netdev_monitor = netdev_monitor_create(); port_array_init(&p->ports); shash_init(&p->port_by_name); p->max_ports = stats.max_ports; diff --git a/utilities/ovs-openflowd.c b/utilities/ovs-openflowd.c index f60dea5a..5dd77c05 100644 --- a/utilities/ovs-openflowd.c +++ b/utilities/ovs-openflowd.c @@ -195,10 +195,12 @@ main(int argc, char *argv[]) } unixctl_server_run(unixctl); dp_run(); + netdev_run(); ofproto_wait(ofproto); unixctl_server_wait(unixctl); dp_wait(); + netdev_wait(); poll_block(); } diff --git a/vswitchd/ovs-brcompatd.c b/vswitchd/ovs-brcompatd.c index 306de136..70570e92 100644 --- a/vswitchd/ovs-brcompatd.c +++ b/vswitchd/ovs-brcompatd.c @@ -38,7 +38,6 @@ #include "coverage.h" #include "daemon.h" #include "dirs.h" -#include "dpif.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "fault.h" @@ -911,6 +910,7 @@ main(int argc, char *argv[]) for (;;) { unixctl_server_run(unixctl); brc_recv_update(); + netdev_run(); /* If 'prune_timeout' is non-zero, we actively prune from the * config file any 'bridge..port' entries that are no @@ -932,6 +932,7 @@ main(int argc, char *argv[]) nl_sock_wait(brc_sock, POLLIN); unixctl_server_wait(unixctl); + netdev_wait(); poll_block(); } diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index 3ee29c77..a7b43f7b 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -32,6 +32,7 @@ #include "fault.h" #include "leak-checker.h" #include "mgmt.h" +#include "netdev.h" #include "ovs-vswitchd.h" #include "poll-loop.h" #include "proc-net-compat.h" @@ -100,6 +101,7 @@ main(int argc, char *argv[]) } unixctl_server_run(unixctl); dp_run(); + netdev_run(); if (need_reconfigure) { poll_immediate_wake(); @@ -109,6 +111,7 @@ main(int argc, char *argv[]) bridge_wait(); unixctl_server_wait(unixctl); dp_wait(); + netdev_wait(); poll_block(); } -- 2.30.2