*/
#include <config.h>
-
-#include "netdev-linux.h"
-
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include "openflow/openflow.h"
#include "packets.h"
#include "poll-loop.h"
+#include "rtnetlink.h"
#include "socket-util.h"
#include "shash.h"
#include "svec.h"
VALID_IN4 = 1 << 2,
VALID_IN6 = 1 << 3,
VALID_MTU = 1 << 4,
- VALID_CARRIER = 1 << 5
+ VALID_CARRIER = 1 << 5,
+ VALID_IS_INTERNAL = 1 << 6
};
/* Cached network device information. */
int ifindex;
uint8_t etheraddr[ETH_ADDR_LEN];
- struct in_addr in4;
+ struct in_addr address, netmask;
struct in6_addr in6;
int mtu;
int carrier;
+ bool is_internal;
};
static struct shash cache_map = SHASH_INITIALIZER(&cache_map);
-static struct linux_netdev_notifier netdev_linux_cache_notifier;
-
-/* Policy for RTNLGRP_LINK messages.
- *
- * There are *many* more fields in these messages, but currently we only care
- * about interface names. */
-static const struct nl_policy rtnlgrp_link_policy[] = {
- [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
- [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
- [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true,
- .min_len = sizeof(struct rtnl_link_stats) },
-};
+static struct rtnetlink_notifier netdev_linux_cache_notifier;
/* An AF_INET socket (used for ioctl operations). */
static int af_inet_sock = -1;
static struct shash netdev_linux_notifiers =
SHASH_INITIALIZER(&netdev_linux_notifiers);
-static struct linux_netdev_notifier netdev_linux_poll_notifier;
+static struct rtnetlink_notifier netdev_linux_poll_notifier;
/* This is set pretty low because we probably won't learn anything from the
* additional log messages. */
int cmd, const char *cmd_name);
static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *,
int cmd, const char *cmd_name);
+static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *,
+ int cmd, const char *cmd_name);
static int get_flags(const struct netdev *, int *flagsp);
static int set_flags(struct netdev *, int flags);
static int do_get_ifindex(const char *netdev_name);
static void
netdev_linux_run(void)
{
- linux_netdev_notifier_run();
+ rtnetlink_notifier_run();
}
static void
netdev_linux_wait(void)
{
- linux_netdev_notifier_wait();
+ rtnetlink_notifier_wait();
}
static void
-netdev_linux_cache_cb(const struct linux_netdev_change *change,
+netdev_linux_cache_cb(const struct rtnetlink_change *change,
void *aux UNUSED)
{
struct netdev_linux_cache *cache;
netdev->cache = shash_find_data(&cache_map, suffix);
if (!netdev->cache) {
if (shash_is_empty(&cache_map)) {
- int error = linux_netdev_notifier_register(
+ int error = rtnetlink_notifier_register(
&netdev_linux_cache_notifier, netdev_linux_cache_cb, NULL);
if (error) {
netdev_close(&netdev->netdev);
free(netdev->cache);
if (shash_is_empty(&cache_map)) {
- linux_netdev_notifier_unregister(&netdev_linux_cache_notifier);
+ rtnetlink_notifier_unregister(&netdev_linux_cache_notifier);
}
}
if (netdev->netdev_fd >= 0) {
* XXX All of the members of struct netdev_stats are 64 bits wide, but on
* 32-bit architectures the Linux network stats are only 32 bits. */
static int
-netdev_linux_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
+netdev_linux_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
static int use_netlink_stats = -1;
int error;
+ struct netdev_stats raw_stats;
+ struct netdev_stats *collect_stats = stats;
COVERAGE_INC(netdev_get_stats);
+
+ if (!(netdev->cache->valid & VALID_IS_INTERNAL)) {
+ netdev->cache->is_internal = (netdev->tap_fd != -1);
+
+ if (!netdev->cache->is_internal) {
+ struct ethtool_drvinfo drvinfo;
+
+ memset(&drvinfo, 0, sizeof drvinfo);
+ error = netdev_linux_do_ethtool(&netdev->netdev,
+ (struct ethtool_cmd *)&drvinfo,
+ ETHTOOL_GDRVINFO,
+ "ETHTOOL_GDRVINFO");
+
+ if (!error) {
+ netdev->cache->is_internal = !strcmp(drvinfo.driver,
+ "openvswitch");
+ }
+ }
+
+ netdev->cache->valid |= VALID_IS_INTERNAL;
+ }
+
+ if (netdev->cache->is_internal) {
+ collect_stats = &raw_stats;
+ }
+
if (use_netlink_stats < 0) {
use_netlink_stats = check_for_working_netlink_stats();
}
if (use_netlink_stats) {
int ifindex;
- error = get_ifindex(netdev, &ifindex);
+ error = get_ifindex(&netdev->netdev, &ifindex);
if (!error) {
- error = get_stats_via_netlink(ifindex, stats);
+ error = get_stats_via_netlink(ifindex, collect_stats);
}
} else {
- error = get_stats_via_proc(netdev->name, stats);
+ error = get_stats_via_proc(netdev->netdev.name, collect_stats);
+ }
+
+ /* If this port is an internal port then the transmit and receive stats
+ * will appear to be swapped relative to the other ports since we are the
+ * one sending the data, not a remote computer. For consistency, we swap
+ * them back here. */
+ if (netdev->cache->is_internal) {
+ stats->rx_packets = raw_stats.tx_packets;
+ stats->tx_packets = raw_stats.rx_packets;
+ stats->rx_bytes = raw_stats.tx_bytes;
+ stats->tx_bytes = raw_stats.rx_bytes;
+ stats->rx_errors = raw_stats.tx_errors;
+ stats->tx_errors = raw_stats.rx_errors;
+ stats->rx_dropped = raw_stats.tx_dropped;
+ stats->tx_dropped = raw_stats.rx_dropped;
+ stats->multicast = raw_stats.multicast;
+ stats->collisions = raw_stats.collisions;
+ stats->rx_length_errors = 0;
+ stats->rx_over_errors = 0;
+ stats->rx_crc_errors = 0;
+ stats->rx_frame_errors = 0;
+ stats->rx_fifo_errors = 0;
+ stats->rx_missed_errors = 0;
+ stats->tx_aborted_errors = 0;
+ stats->tx_carrier_errors = 0;
+ stats->tx_fifo_errors = 0;
+ stats->tx_heartbeat_errors = 0;
+ stats->tx_window_errors = 0;
}
+
return error;
}
return 0;
}
-/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
- * 'in4' is non-null) and returns true. Otherwise, returns false. */
static int
-netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4)
+netdev_linux_get_in4(const struct netdev *netdev_,
+ struct in_addr *address, struct in_addr *netmask)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
if (!(netdev->cache->valid & VALID_IN4)) {
- const struct sockaddr_in *sin;
- struct ifreq ifr;
int error;
- ifr.ifr_addr.sa_family = AF_INET;
- error = netdev_linux_do_ioctl(netdev_, &ifr,
+ error = netdev_linux_get_ipv4(netdev_, &netdev->cache->address,
SIOCGIFADDR, "SIOCGIFADDR");
if (error) {
return error;
}
- sin = (struct sockaddr_in *) &ifr.ifr_addr;
- netdev->cache->in4 = sin->sin_addr;
+ error = netdev_linux_get_ipv4(netdev_, &netdev->cache->netmask,
+ SIOCGIFNETMASK, "SIOCGIFNETMASK");
+ if (error) {
+ return error;
+ }
+
netdev->cache->valid |= VALID_IN4;
}
- *in4 = netdev->cache->in4;
- return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0;
+ *address = netdev->cache->address;
+ *netmask = netdev->cache->netmask;
+ return address->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0;
}
-/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If
- * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a
- * positive errno value. */
static int
-netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr,
- struct in_addr mask)
+netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address,
+ struct in_addr netmask)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
int error;
- error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr);
+ error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address);
if (!error) {
netdev->cache->valid |= VALID_IN4;
- netdev->cache->in4 = addr;
- if (addr.s_addr != INADDR_ANY) {
+ netdev->cache->address = address;
+ netdev->cache->netmask = netmask;
+ if (address.s_addr != INADDR_ANY) {
error = do_set_addr(netdev_, SIOCSIFNETMASK,
- "SIOCSIFNETMASK", mask);
+ "SIOCSIFNETMASK", netmask);
}
}
return error;
return error;
}
+static int
+netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop,
+ char **netdev_name)
+{
+ static const char fn[] = "/proc/net/route";
+ FILE *stream;
+ char line[256];
+ int ln;
+
+ *netdev_name = NULL;
+ stream = fopen(fn, "r");
+ if (stream == NULL) {
+ VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno));
+ return errno;
+ }
+
+ ln = 0;
+ while (fgets(line, sizeof line, stream)) {
+ if (++ln >= 2) {
+ char iface[17];
+ uint32_t dest, gateway, mask;
+ int refcnt, metric, mtu;
+ unsigned int flags, use, window, irtt;
+
+ if (sscanf(line,
+ "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32
+ " %d %u %u\n",
+ iface, &dest, &gateway, &flags, &refcnt,
+ &use, &metric, &mask, &mtu, &window, &irtt) != 11) {
+
+ VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s",
+ fn, ln, line);
+ continue;
+ }
+ if (!(flags & RTF_UP)) {
+ /* Skip routes that aren't up. */
+ continue;
+ }
+
+ /* The output of 'dest', 'mask', and 'gateway' were given in
+ * network byte order, so we don't need need any endian
+ * conversions here. */
+ if ((dest & mask) == (host->s_addr & mask)) {
+ if (!gateway) {
+ /* The host is directly reachable. */
+ next_hop->s_addr = 0;
+ } else {
+ /* To reach the host, we must go through a gateway. */
+ next_hop->s_addr = gateway;
+ }
+ *netdev_name = xstrdup(iface);
+ fclose(stream);
+ return 0;
+ }
+ }
+ }
+
+ fclose(stream);
+ return ENXIO;
+}
+
/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
* successfully retrieved, it stores the corresponding MAC address in 'mac' and
* returns 0. Otherwise, it returns a positive errno value; in particular,
}
static void
-netdev_linux_poll_cb(const struct linux_netdev_change *change,
+netdev_linux_poll_cb(const struct rtnetlink_change *change,
void *aux UNUSED)
{
if (change) {
struct list *list;
if (shash_is_empty(&netdev_linux_notifiers)) {
- int error = linux_netdev_notifier_register(&netdev_linux_poll_notifier,
+ int error = rtnetlink_notifier_register(&netdev_linux_poll_notifier,
netdev_linux_poll_cb, NULL);
if (error) {
return error;
/* If that was the last notifier, unregister. */
if (shash_is_empty(&netdev_linux_notifiers)) {
- linux_netdev_notifier_unregister(&netdev_linux_poll_notifier);
+ rtnetlink_notifier_unregister(&netdev_linux_poll_notifier);
}
}
netdev_linux_set_in4,
netdev_linux_get_in6,
netdev_linux_add_router,
+ netdev_linux_get_next_hop,
netdev_linux_arp_lookup,
netdev_linux_update_flags,
netdev_linux_set_in4,
netdev_linux_get_in6,
netdev_linux_add_router,
+ netdev_linux_get_next_hop,
netdev_linux_arp_lookup,
netdev_linux_update_flags,
static int
get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
{
+ /* Policy for RTNLGRP_LINK messages.
+ *
+ * There are *many* more fields in these messages, but currently we only
+ * care about these fields. */
+ static const struct nl_policy rtnlgrp_link_policy[] = {
+ [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
+ [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true,
+ .min_len = sizeof(struct rtnl_link_stats) },
+ };
+
+
static struct nl_sock *rtnl_sock;
struct ofpbuf request;
struct ofpbuf *reply;
}
return 0;
}
-\f
-/* rtnetlink socket. */
-static struct nl_sock *notify_sock;
-
-/* All registered notifiers. */
-static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers);
-
-static void linux_netdev_report_change(const struct nlmsghdr *,
- const struct ifinfomsg *,
- struct nlattr *attrs[]);
-static void linux_netdev_report_notify_error(void);
-
-int
-linux_netdev_notifier_register(struct linux_netdev_notifier *notifier,
- linux_netdev_notify_func *cb, void *aux)
-{
- if (!notify_sock) {
- int error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0,
- ¬ify_sock);
- if (error) {
- VLOG_WARN("could not create rtnetlink socket: %s",
- strerror(error));
- return error;
- }
- } else {
- /* Catch up on notification work so that the new notifier won't
- * receive any stale notifications. */
- linux_netdev_notifier_run();
- }
-
- list_push_back(&all_notifiers, ¬ifier->node);
- notifier->cb = cb;
- notifier->aux = aux;
- return 0;
-}
-void
-linux_netdev_notifier_unregister(struct linux_netdev_notifier *notifier)
-{
- list_remove(¬ifier->node);
- if (list_is_empty(&all_notifiers)) {
- nl_sock_destroy(notify_sock);
- notify_sock = NULL;
- }
-}
-
-void
-linux_netdev_notifier_run(void)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-
- if (!notify_sock) {
- return;
- }
-
- for (;;) {
- struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
- struct ofpbuf *buf;
- int error;
-
- error = nl_sock_recv(notify_sock, &buf, false);
- if (!error) {
- if (nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
- rtnlgrp_link_policy,
- attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
- struct ifinfomsg *ifinfo;
-
- ifinfo = (void *) ((char *) buf->data + NLMSG_HDRLEN);
- linux_netdev_report_change(buf->data, ifinfo, attrs);
- } else {
- VLOG_WARN_RL(&rl, "received bad rtnl message");
- linux_netdev_report_notify_error();
- }
- ofpbuf_delete(buf);
- } else if (error == EAGAIN) {
- return;
- } else {
- if (error == ENOBUFS) {
- VLOG_WARN_RL(&rl, "rtnetlink receive buffer overflowed");
- } else {
- VLOG_WARN_RL(&rl, "error reading rtnetlink socket: %s",
- strerror(error));
- }
- linux_netdev_report_notify_error();
- }
- }
-}
-
-void
-linux_netdev_notifier_wait(void)
-{
- if (notify_sock) {
- nl_sock_wait(notify_sock, POLLIN);
- }
-}
-
-static void
-linux_netdev_report_change(const struct nlmsghdr *nlmsg,
- const struct ifinfomsg *ifinfo,
- struct nlattr *attrs[])
-{
- struct linux_netdev_notifier *notifier;
- struct linux_netdev_change change;
-
- COVERAGE_INC(linux_netdev_changed);
-
- change.nlmsg_type = nlmsg->nlmsg_type;
- change.ifi_index = ifinfo->ifi_index;
- change.ifname = nl_attr_get_string(attrs[IFLA_IFNAME]);
- change.master_ifindex = (attrs[IFLA_MASTER]
- ? nl_attr_get_u32(attrs[IFLA_MASTER]) : 0);
-
- LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node,
- &all_notifiers) {
- notifier->cb(&change, notifier->aux);
- }
-}
-
-static void
-linux_netdev_report_notify_error(void)
+static int
+netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip,
+ int cmd, const char *cmd_name)
{
- struct linux_netdev_notifier *notifier;
+ struct ifreq ifr;
+ int error;
- LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node,
- &all_notifiers) {
- notifier->cb(NULL, notifier->aux);
+ ifr.ifr_addr.sa_family = AF_INET;
+ error = netdev_linux_do_ioctl(netdev, &ifr, cmd, cmd_name);
+ if (!error) {
+ const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr;
+ *ip = sin->sin_addr;
}
+ return error;
}