X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=lib%2Fnetdev-linux.c;h=2faffa346011d046b541e9b54b0419c6c512e09e;hb=85ab0a021523e51435539af0e6a138c73c9846a2;hp=6189bf79065a834ef0ed382671fafcc1996a4776;hpb=8b61709d5ec6c4ef58a04fcaefde617ff63fa10d;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6189bf79..2faffa34 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -15,9 +15,6 @@ */ #include - -#include "netdev-linux.h" - #include #include #include @@ -53,6 +50,7 @@ #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" +#include "rtnetlink.h" #include "socket-util.h" #include "shash.h" #include "svec.h" @@ -87,7 +85,8 @@ enum { VALID_IN4 = 1 << 2, VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, - VALID_CARRIER = 1 << 5 + VALID_CARRIER = 1 << 5, + VALID_IS_INTERNAL = 1 << 6 }; /* Cached network device information. */ @@ -98,25 +97,15 @@ struct netdev_linux_cache { int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; - struct in_addr in4; + struct in_addr address, netmask; struct in6_addr in6; int mtu; int carrier; + bool is_internal; }; static struct shash cache_map = SHASH_INITIALIZER(&cache_map); -static struct linux_netdev_notifier netdev_linux_cache_notifier; - -/* Policy for RTNLGRP_LINK messages. - * - * There are *many* more fields in these messages, but currently we only care - * about interface names. */ -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, - [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, - [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, - .min_len = sizeof(struct rtnl_link_stats) }, -}; +static struct rtnetlink_notifier netdev_linux_cache_notifier; /* An AF_INET socket (used for ioctl operations). */ static int af_inet_sock = -1; @@ -128,7 +117,7 @@ struct netdev_linux_notifier { static struct shash netdev_linux_notifiers = SHASH_INITIALIZER(&netdev_linux_notifiers); -static struct linux_netdev_notifier netdev_linux_poll_notifier; +static struct rtnetlink_notifier netdev_linux_poll_notifier; /* This is set pretty low because we probably won't learn anything from the * additional log messages. */ @@ -138,6 +127,8 @@ static int netdev_linux_do_ethtool(struct netdev *, struct ethtool_cmd *, int cmd, const char *cmd_name); static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *, int cmd, const char *cmd_name); +static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *, + int cmd, const char *cmd_name); static int get_flags(const struct netdev *, int *flagsp); static int set_flags(struct netdev *, int flags); static int do_get_ifindex(const char *netdev_name); @@ -175,17 +166,17 @@ netdev_linux_init(void) static void netdev_linux_run(void) { - linux_netdev_notifier_run(); + rtnetlink_notifier_run(); } static void netdev_linux_wait(void) { - linux_netdev_notifier_wait(); + rtnetlink_notifier_wait(); } static void -netdev_linux_cache_cb(const struct linux_netdev_change *change, +netdev_linux_cache_cb(const struct rtnetlink_change *change, void *aux UNUSED) { struct netdev_linux_cache *cache; @@ -219,7 +210,7 @@ netdev_linux_open(const char *name, char *suffix, int ethertype, netdev->cache = shash_find_data(&cache_map, suffix); if (!netdev->cache) { if (shash_is_empty(&cache_map)) { - int error = linux_netdev_notifier_register( + int error = rtnetlink_notifier_register( &netdev_linux_cache_notifier, netdev_linux_cache_cb, NULL); if (error) { netdev_close(&netdev->netdev); @@ -336,7 +327,7 @@ netdev_linux_close(struct netdev *netdev_) free(netdev->cache); if (shash_is_empty(&cache_map)) { - linux_netdev_notifier_unregister(&netdev_linux_cache_notifier); + rtnetlink_notifier_unregister(&netdev_linux_cache_notifier); } } if (netdev->netdev_fd >= 0) { @@ -636,25 +627,83 @@ check_for_working_netlink_stats(void) * XXX All of the members of struct netdev_stats are 64 bits wide, but on * 32-bit architectures the Linux network stats are only 32 bits. */ static int -netdev_linux_get_stats(const struct netdev *netdev, struct netdev_stats *stats) +netdev_linux_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); static int use_netlink_stats = -1; int error; + struct netdev_stats raw_stats; + struct netdev_stats *collect_stats = stats; COVERAGE_INC(netdev_get_stats); + + if (!(netdev->cache->valid & VALID_IS_INTERNAL)) { + netdev->cache->is_internal = (netdev->tap_fd != -1); + + if (!netdev->cache->is_internal) { + struct ethtool_drvinfo drvinfo; + + memset(&drvinfo, 0, sizeof drvinfo); + error = netdev_linux_do_ethtool(&netdev->netdev, + (struct ethtool_cmd *)&drvinfo, + ETHTOOL_GDRVINFO, + "ETHTOOL_GDRVINFO"); + + if (!error) { + netdev->cache->is_internal = !strcmp(drvinfo.driver, + "openvswitch"); + } + } + + netdev->cache->valid |= VALID_IS_INTERNAL; + } + + if (netdev->cache->is_internal) { + collect_stats = &raw_stats; + } + if (use_netlink_stats < 0) { use_netlink_stats = check_for_working_netlink_stats(); } if (use_netlink_stats) { int ifindex; - error = get_ifindex(netdev, &ifindex); + error = get_ifindex(&netdev->netdev, &ifindex); if (!error) { - error = get_stats_via_netlink(ifindex, stats); + error = get_stats_via_netlink(ifindex, collect_stats); } } else { - error = get_stats_via_proc(netdev->name, stats); + error = get_stats_via_proc(netdev->netdev.name, collect_stats); + } + + /* If this port is an internal port then the transmit and receive stats + * will appear to be swapped relative to the other ports since we are the + * one sending the data, not a remote computer. For consistency, we swap + * them back here. */ + if (netdev->cache->is_internal) { + stats->rx_packets = raw_stats.tx_packets; + stats->tx_packets = raw_stats.rx_packets; + stats->rx_bytes = raw_stats.tx_bytes; + stats->tx_bytes = raw_stats.rx_bytes; + stats->rx_errors = raw_stats.tx_errors; + stats->tx_errors = raw_stats.rx_errors; + stats->rx_dropped = raw_stats.tx_dropped; + stats->tx_dropped = raw_stats.rx_dropped; + stats->multicast = raw_stats.multicast; + stats->collisions = raw_stats.collisions; + stats->rx_length_errors = 0; + stats->rx_over_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + stats->tx_aborted_errors = 0; + stats->tx_carrier_errors = 0; + stats->tx_fifo_errors = 0; + stats->tx_heartbeat_errors = 0; + stats->tx_window_errors = 0; } + return error; } @@ -948,49 +997,48 @@ netdev_linux_set_policing(struct netdev *netdev, return 0; } -/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if - * 'in4' is non-null) and returns true. Otherwise, returns false. */ static int -netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4) +netdev_linux_get_in4(const struct netdev *netdev_, + struct in_addr *address, struct in_addr *netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (!(netdev->cache->valid & VALID_IN4)) { - const struct sockaddr_in *sin; - struct ifreq ifr; int error; - ifr.ifr_addr.sa_family = AF_INET; - error = netdev_linux_do_ioctl(netdev_, &ifr, + error = netdev_linux_get_ipv4(netdev_, &netdev->cache->address, SIOCGIFADDR, "SIOCGIFADDR"); if (error) { return error; } - sin = (struct sockaddr_in *) &ifr.ifr_addr; - netdev->cache->in4 = sin->sin_addr; + error = netdev_linux_get_ipv4(netdev_, &netdev->cache->netmask, + SIOCGIFNETMASK, "SIOCGIFNETMASK"); + if (error) { + return error; + } + netdev->cache->valid |= VALID_IN4; } - *in4 = netdev->cache->in4; - return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; + *address = netdev->cache->address; + *netmask = netdev->cache->netmask; + return address->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; } -/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If - * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a - * positive errno value. */ static int -netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr, - struct in_addr mask) +netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address, + struct in_addr netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; - error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr); + error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address); if (!error) { netdev->cache->valid |= VALID_IN4; - netdev->cache->in4 = addr; - if (addr.s_addr != INADDR_ANY) { + netdev->cache->address = address; + netdev->cache->netmask = netmask; + if (address.s_addr != INADDR_ANY) { error = do_set_addr(netdev_, SIOCSIFNETMASK, - "SIOCSIFNETMASK", mask); + "SIOCSIFNETMASK", netmask); } } return error; @@ -1089,6 +1137,67 @@ netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router) return error; } +static int +netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop, + char **netdev_name) +{ + static const char fn[] = "/proc/net/route"; + FILE *stream; + char line[256]; + int ln; + + *netdev_name = NULL; + stream = fopen(fn, "r"); + if (stream == NULL) { + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); + return errno; + } + + ln = 0; + while (fgets(line, sizeof line, stream)) { + if (++ln >= 2) { + char iface[17]; + uint32_t dest, gateway, mask; + int refcnt, metric, mtu; + unsigned int flags, use, window, irtt; + + if (sscanf(line, + "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32 + " %d %u %u\n", + iface, &dest, &gateway, &flags, &refcnt, + &use, &metric, &mask, &mtu, &window, &irtt) != 11) { + + VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s", + fn, ln, line); + continue; + } + if (!(flags & RTF_UP)) { + /* Skip routes that aren't up. */ + continue; + } + + /* The output of 'dest', 'mask', and 'gateway' were given in + * network byte order, so we don't need need any endian + * conversions here. */ + if ((dest & mask) == (host->s_addr & mask)) { + if (!gateway) { + /* The host is directly reachable. */ + next_hop->s_addr = 0; + } else { + /* To reach the host, we must go through a gateway. */ + next_hop->s_addr = gateway; + } + *netdev_name = xstrdup(iface); + fclose(stream); + return 0; + } + } + } + + fclose(stream); + return ENXIO; +} + /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be * successfully retrieved, it stores the corresponding MAC address in 'mac' and * returns 0. Otherwise, it returns a positive errno value; in particular, @@ -1175,7 +1284,7 @@ poll_notify(struct list *list) } static void -netdev_linux_poll_cb(const struct linux_netdev_change *change, +netdev_linux_poll_cb(const struct rtnetlink_change *change, void *aux UNUSED) { if (change) { @@ -1202,7 +1311,7 @@ netdev_linux_poll_add(struct netdev *netdev, struct list *list; if (shash_is_empty(&netdev_linux_notifiers)) { - int error = linux_netdev_notifier_register(&netdev_linux_poll_notifier, + int error = rtnetlink_notifier_register(&netdev_linux_poll_notifier, netdev_linux_poll_cb, NULL); if (error) { return error; @@ -1243,7 +1352,7 @@ netdev_linux_poll_remove(struct netdev_notifier *notifier_) /* If that was the last notifier, unregister. */ if (shash_is_empty(&netdev_linux_notifiers)) { - linux_netdev_notifier_unregister(&netdev_linux_poll_notifier); + rtnetlink_notifier_unregister(&netdev_linux_poll_notifier); } } @@ -1282,6 +1391,7 @@ const struct netdev_class netdev_linux_class = { netdev_linux_set_in4, netdev_linux_get_in6, netdev_linux_add_router, + netdev_linux_get_next_hop, netdev_linux_arp_lookup, netdev_linux_update_flags, @@ -1325,6 +1435,7 @@ const struct netdev_class netdev_tap_class = { netdev_linux_set_in4, netdev_linux_get_in6, netdev_linux_add_router, + netdev_linux_get_next_hop, netdev_linux_arp_lookup, netdev_linux_update_flags, @@ -1336,6 +1447,17 @@ const struct netdev_class netdev_tap_class = { static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats) { + /* Policy for RTNLGRP_LINK messages. + * + * There are *many* more fields in these messages, but currently we only + * care about these fields. */ + static const struct nl_policy rtnlgrp_link_policy[] = { + [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, + [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(struct rtnl_link_stats) }, + }; + + static struct nl_sock *rtnl_sock; struct ofpbuf request; struct ofpbuf *reply; @@ -1593,131 +1715,19 @@ netdev_linux_do_ioctl(const struct netdev *netdev, struct ifreq *ifr, } return 0; } - -/* rtnetlink socket. */ -static struct nl_sock *notify_sock; - -/* All registered notifiers. */ -static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers); - -static void linux_netdev_report_change(const struct nlmsghdr *, - const struct ifinfomsg *, - struct nlattr *attrs[]); -static void linux_netdev_report_notify_error(void); - -int -linux_netdev_notifier_register(struct linux_netdev_notifier *notifier, - linux_netdev_notify_func *cb, void *aux) -{ - if (!notify_sock) { - int error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, - ¬ify_sock); - if (error) { - VLOG_WARN("could not create rtnetlink socket: %s", - strerror(error)); - return error; - } - } else { - /* Catch up on notification work so that the new notifier won't - * receive any stale notifications. */ - linux_netdev_notifier_run(); - } - - list_push_back(&all_notifiers, ¬ifier->node); - notifier->cb = cb; - notifier->aux = aux; - return 0; -} -void -linux_netdev_notifier_unregister(struct linux_netdev_notifier *notifier) -{ - list_remove(¬ifier->node); - if (list_is_empty(&all_notifiers)) { - nl_sock_destroy(notify_sock); - notify_sock = NULL; - } -} - -void -linux_netdev_notifier_run(void) -{ - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - - if (!notify_sock) { - return; - } - - for (;;) { - struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; - struct ofpbuf *buf; - int error; - - error = nl_sock_recv(notify_sock, &buf, false); - if (!error) { - if (nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), - rtnlgrp_link_policy, - attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { - struct ifinfomsg *ifinfo; - - ifinfo = (void *) ((char *) buf->data + NLMSG_HDRLEN); - linux_netdev_report_change(buf->data, ifinfo, attrs); - } else { - VLOG_WARN_RL(&rl, "received bad rtnl message"); - linux_netdev_report_notify_error(); - } - ofpbuf_delete(buf); - } else if (error == EAGAIN) { - return; - } else { - if (error == ENOBUFS) { - VLOG_WARN_RL(&rl, "rtnetlink receive buffer overflowed"); - } else { - VLOG_WARN_RL(&rl, "error reading rtnetlink socket: %s", - strerror(error)); - } - linux_netdev_report_notify_error(); - } - } -} - -void -linux_netdev_notifier_wait(void) -{ - if (notify_sock) { - nl_sock_wait(notify_sock, POLLIN); - } -} - -static void -linux_netdev_report_change(const struct nlmsghdr *nlmsg, - const struct ifinfomsg *ifinfo, - struct nlattr *attrs[]) -{ - struct linux_netdev_notifier *notifier; - struct linux_netdev_change change; - - COVERAGE_INC(linux_netdev_changed); - - change.nlmsg_type = nlmsg->nlmsg_type; - change.ifi_index = ifinfo->ifi_index; - change.ifname = nl_attr_get_string(attrs[IFLA_IFNAME]); - change.master_ifindex = (attrs[IFLA_MASTER] - ? nl_attr_get_u32(attrs[IFLA_MASTER]) : 0); - - LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node, - &all_notifiers) { - notifier->cb(&change, notifier->aux); - } -} - -static void -linux_netdev_report_notify_error(void) +static int +netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, + int cmd, const char *cmd_name) { - struct linux_netdev_notifier *notifier; + struct ifreq ifr; + int error; - LIST_FOR_EACH (notifier, struct linux_netdev_notifier, node, - &all_notifiers) { - notifier->cb(NULL, notifier->aux); + ifr.ifr_addr.sa_family = AF_INET; + error = netdev_linux_do_ioctl(netdev, &ifr, cmd, cmd_name); + if (!error) { + const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; + *ip = sin->sin_addr; } + return error; }