X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=c33405fd33cf9b9f53db8ff0f5e4188c51703fea;hb=8b681e6fdffe4ebd68dc259544abc87d4cccf0cb;hp=fbca67601e4cb725ba245148b89146834229ccf9;hpb=46097491e4ab6b957bb11690d86df24af19b5655;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index fbca6760..c33405fd 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -15,9 +15,6 @@ */ #include - -#include "netdev-linux.h" - #include #include #include @@ -53,6 +50,7 @@ #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" +#include "rtnetlink.h" #include "socket-util.h" #include "shash.h" #include "svec.h" @@ -87,7 +85,8 @@ enum { VALID_IN4 = 1 << 2, VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, - VALID_CARRIER = 1 << 5 + VALID_CARRIER = 1 << 5, + VALID_IS_INTERNAL = 1 << 6 }; /* Cached network device information. */ @@ -98,26 +97,16 @@ struct netdev_linux_cache { int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; - struct in_addr in4; + struct in_addr address, netmask; struct in6_addr in6; int mtu; int carrier; + bool is_internal; }; static struct shash cache_map = SHASH_INITIALIZER(&cache_map); static struct rtnetlink_notifier netdev_linux_cache_notifier; -/* Policy for RTNLGRP_LINK messages. - * - * There are *many* more fields in these messages, but currently we only care - * about interface names. */ -static const struct nl_policy rtnlgrp_link_policy[] = { - [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, - [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, - [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, - .min_len = sizeof(struct rtnl_link_stats) }, -}; - /* An AF_INET socket (used for ioctl operations). */ static int af_inet_sock = -1; @@ -138,6 +127,8 @@ static int netdev_linux_do_ethtool(struct netdev *, struct ethtool_cmd *, int cmd, const char *cmd_name); static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *, int cmd, const char *cmd_name); +static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *, + int cmd, const char *cmd_name); static int get_flags(const struct netdev *, int *flagsp); static int set_flags(struct netdev *, int flags); static int do_get_ifindex(const char *netdev_name); @@ -212,7 +203,7 @@ netdev_linux_open(const char *name, char *suffix, int ethertype, int error; /* Allocate network device. */ - netdev = xcalloc(1, sizeof *netdev); + netdev = xzalloc(sizeof *netdev); netdev_init(&netdev->netdev, suffix, &netdev_linux_class); netdev->netdev_fd = -1; netdev->tap_fd = -1; @@ -248,9 +239,11 @@ netdev_linux_open(const char *name, char *suffix, int ethertype, /* Create tap device. */ ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - error = netdev_linux_do_ioctl(&netdev->netdev, &ifr, - TUNSETIFF, "TUNSETIFF"); - if (error) { + strncpy(ifr.ifr_name, suffix, sizeof ifr.ifr_name); + if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { + VLOG_WARN("%s: creating tap device failed: %s", suffix, + strerror(errno)); + error = errno; goto error; } @@ -377,7 +370,7 @@ netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) if (netdev->tap_fd < 0) { /* Device was opened with NETDEV_ETH_TYPE_NONE. */ - return EAGAIN; + return -EAGAIN; } for (;;) { @@ -389,7 +382,7 @@ netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", strerror(errno), netdev_get_name(netdev_)); } - return errno; + return -errno; } } } @@ -499,9 +492,17 @@ netdev_linux_set_etheraddr(struct netdev *netdev_, const uint8_t mac[ETH_ADDR_LEN]) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - int error = set_etheraddr(netdev_get_name(netdev_), ARPHRD_ETHER, mac); - if (!error) { - memcpy(netdev->cache->etheraddr, mac, ETH_ADDR_LEN); + int error; + + if (!(netdev->cache->valid & VALID_ETHERADDR) + || !eth_addr_equals(netdev->cache->etheraddr, mac)) { + error = set_etheraddr(netdev_get_name(netdev_), ARPHRD_ETHER, mac); + if (!error) { + netdev->cache->valid |= VALID_ETHERADDR; + memcpy(netdev->cache->etheraddr, mac, ETH_ADDR_LEN); + } + } else { + error = 0; } return error; } @@ -636,25 +637,83 @@ check_for_working_netlink_stats(void) * XXX All of the members of struct netdev_stats are 64 bits wide, but on * 32-bit architectures the Linux network stats are only 32 bits. */ static int -netdev_linux_get_stats(const struct netdev *netdev, struct netdev_stats *stats) +netdev_linux_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); static int use_netlink_stats = -1; int error; + struct netdev_stats raw_stats; + struct netdev_stats *collect_stats = stats; COVERAGE_INC(netdev_get_stats); + + if (!(netdev->cache->valid & VALID_IS_INTERNAL)) { + netdev->cache->is_internal = (netdev->tap_fd != -1); + + if (!netdev->cache->is_internal) { + struct ethtool_drvinfo drvinfo; + + memset(&drvinfo, 0, sizeof drvinfo); + error = netdev_linux_do_ethtool(&netdev->netdev, + (struct ethtool_cmd *)&drvinfo, + ETHTOOL_GDRVINFO, + "ETHTOOL_GDRVINFO"); + + if (!error) { + netdev->cache->is_internal = !strcmp(drvinfo.driver, + "openvswitch"); + } + } + + netdev->cache->valid |= VALID_IS_INTERNAL; + } + + if (netdev->cache->is_internal) { + collect_stats = &raw_stats; + } + if (use_netlink_stats < 0) { use_netlink_stats = check_for_working_netlink_stats(); } if (use_netlink_stats) { int ifindex; - error = get_ifindex(netdev, &ifindex); + error = get_ifindex(&netdev->netdev, &ifindex); if (!error) { - error = get_stats_via_netlink(ifindex, stats); + error = get_stats_via_netlink(ifindex, collect_stats); } } else { - error = get_stats_via_proc(netdev->name, stats); + error = get_stats_via_proc(netdev->netdev.name, collect_stats); + } + + /* If this port is an internal port then the transmit and receive stats + * will appear to be swapped relative to the other ports since we are the + * one sending the data, not a remote computer. For consistency, we swap + * them back here. */ + if (netdev->cache->is_internal) { + stats->rx_packets = raw_stats.tx_packets; + stats->tx_packets = raw_stats.rx_packets; + stats->rx_bytes = raw_stats.tx_bytes; + stats->tx_bytes = raw_stats.rx_bytes; + stats->rx_errors = raw_stats.tx_errors; + stats->tx_errors = raw_stats.rx_errors; + stats->rx_dropped = raw_stats.tx_dropped; + stats->tx_dropped = raw_stats.rx_dropped; + stats->multicast = raw_stats.multicast; + stats->collisions = raw_stats.collisions; + stats->rx_length_errors = 0; + stats->rx_over_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + stats->tx_aborted_errors = 0; + stats->tx_carrier_errors = 0; + stats->tx_fifo_errors = 0; + stats->tx_heartbeat_errors = 0; + stats->tx_window_errors = 0; } + return error; } @@ -948,49 +1007,48 @@ netdev_linux_set_policing(struct netdev *netdev, return 0; } -/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if - * 'in4' is non-null) and returns true. Otherwise, returns false. */ static int -netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4) +netdev_linux_get_in4(const struct netdev *netdev_, + struct in_addr *address, struct in_addr *netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (!(netdev->cache->valid & VALID_IN4)) { - const struct sockaddr_in *sin; - struct ifreq ifr; int error; - ifr.ifr_addr.sa_family = AF_INET; - error = netdev_linux_do_ioctl(netdev_, &ifr, + error = netdev_linux_get_ipv4(netdev_, &netdev->cache->address, SIOCGIFADDR, "SIOCGIFADDR"); if (error) { return error; } - sin = (struct sockaddr_in *) &ifr.ifr_addr; - netdev->cache->in4 = sin->sin_addr; + error = netdev_linux_get_ipv4(netdev_, &netdev->cache->netmask, + SIOCGIFNETMASK, "SIOCGIFNETMASK"); + if (error) { + return error; + } + netdev->cache->valid |= VALID_IN4; } - *in4 = netdev->cache->in4; - return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; + *address = netdev->cache->address; + *netmask = netdev->cache->netmask; + return address->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; } -/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If - * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a - * positive errno value. */ static int -netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr, - struct in_addr mask) +netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address, + struct in_addr netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; - error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr); + error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address); if (!error) { netdev->cache->valid |= VALID_IN4; - netdev->cache->in4 = addr; - if (addr.s_addr != INADDR_ANY) { + netdev->cache->address = address; + netdev->cache->netmask = netmask; + if (address.s_addr != INADDR_ANY) { error = do_set_addr(netdev_, SIOCSIFNETMASK, - "SIOCSIFNETMASK", mask); + "SIOCSIFNETMASK", netmask); } } return error; @@ -1089,6 +1147,67 @@ netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router) return error; } +static int +netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop, + char **netdev_name) +{ + static const char fn[] = "/proc/net/route"; + FILE *stream; + char line[256]; + int ln; + + *netdev_name = NULL; + stream = fopen(fn, "r"); + if (stream == NULL) { + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); + return errno; + } + + ln = 0; + while (fgets(line, sizeof line, stream)) { + if (++ln >= 2) { + char iface[17]; + uint32_t dest, gateway, mask; + int refcnt, metric, mtu; + unsigned int flags, use, window, irtt; + + if (sscanf(line, + "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32 + " %d %u %u\n", + iface, &dest, &gateway, &flags, &refcnt, + &use, &metric, &mask, &mtu, &window, &irtt) != 11) { + + VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s", + fn, ln, line); + continue; + } + if (!(flags & RTF_UP)) { + /* Skip routes that aren't up. */ + continue; + } + + /* The output of 'dest', 'mask', and 'gateway' were given in + * network byte order, so we don't need need any endian + * conversions here. */ + if ((dest & mask) == (host->s_addr & mask)) { + if (!gateway) { + /* The host is directly reachable. */ + next_hop->s_addr = 0; + } else { + /* To reach the host, we must go through a gateway. */ + next_hop->s_addr = gateway; + } + *netdev_name = xstrdup(iface); + fclose(stream); + return 0; + } + } + } + + fclose(stream); + return ENXIO; +} + /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be * successfully retrieved, it stores the corresponding MAC address in 'mac' and * returns 0. Otherwise, it returns a positive errno value; in particular, @@ -1282,6 +1401,7 @@ const struct netdev_class netdev_linux_class = { netdev_linux_set_in4, netdev_linux_get_in6, netdev_linux_add_router, + netdev_linux_get_next_hop, netdev_linux_arp_lookup, netdev_linux_update_flags, @@ -1325,6 +1445,7 @@ const struct netdev_class netdev_tap_class = { netdev_linux_set_in4, netdev_linux_get_in6, netdev_linux_add_router, + netdev_linux_get_next_hop, netdev_linux_arp_lookup, netdev_linux_update_flags, @@ -1336,6 +1457,17 @@ const struct netdev_class netdev_tap_class = { static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats) { + /* Policy for RTNLGRP_LINK messages. + * + * There are *many* more fields in these messages, but currently we only + * care about these fields. */ + static const struct nl_policy rtnlgrp_link_policy[] = { + [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, + [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(struct rtnl_link_stats) }, + }; + + static struct nl_sock *rtnl_sock; struct ofpbuf request; struct ofpbuf *reply; @@ -1374,6 +1506,7 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats) if (!attrs[IFLA_STATS]) { VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats"); + ofpbuf_delete(reply); return EPROTO; } @@ -1400,6 +1533,8 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats) stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors; stats->tx_window_errors = rtnl_stats->tx_window_errors; + ofpbuf_delete(reply); + return 0; } @@ -1593,131 +1728,19 @@ netdev_linux_do_ioctl(const struct netdev *netdev, struct ifreq *ifr, } return 0; } - -/* rtnetlink socket. */ -static struct nl_sock *notify_sock; - -/* All registered notifiers. */ -static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers); - -static void rtnetlink_report_change(const struct nlmsghdr *, - const struct ifinfomsg *, - struct nlattr *attrs[]); -static void rtnetlink_report_notify_error(void); - -int -rtnetlink_notifier_register(struct rtnetlink_notifier *notifier, - rtnetlink_notify_func *cb, void *aux) -{ - if (!notify_sock) { - int error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, - ¬ify_sock); - if (error) { - VLOG_WARN("could not create rtnetlink socket: %s", - strerror(error)); - return error; - } - } else { - /* Catch up on notification work so that the new notifier won't - * receive any stale notifications. */ - rtnetlink_notifier_run(); - } - - list_push_back(&all_notifiers, ¬ifier->node); - notifier->cb = cb; - notifier->aux = aux; - return 0; -} - -void -rtnetlink_notifier_unregister(struct rtnetlink_notifier *notifier) -{ - list_remove(¬ifier->node); - if (list_is_empty(&all_notifiers)) { - nl_sock_destroy(notify_sock); - notify_sock = NULL; - } -} - -void -rtnetlink_notifier_run(void) -{ - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - - if (!notify_sock) { - return; - } - - for (;;) { - struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; - struct ofpbuf *buf; - int error; - error = nl_sock_recv(notify_sock, &buf, false); - if (!error) { - if (nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), - rtnlgrp_link_policy, - attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { - struct ifinfomsg *ifinfo; - - ifinfo = (void *) ((char *) buf->data + NLMSG_HDRLEN); - rtnetlink_report_change(buf->data, ifinfo, attrs); - } else { - VLOG_WARN_RL(&rl, "received bad rtnl message"); - rtnetlink_report_notify_error(); - } - ofpbuf_delete(buf); - } else if (error == EAGAIN) { - return; - } else { - if (error == ENOBUFS) { - VLOG_WARN_RL(&rl, "rtnetlink receive buffer overflowed"); - } else { - VLOG_WARN_RL(&rl, "error reading rtnetlink socket: %s", - strerror(error)); - } - rtnetlink_report_notify_error(); - } - } -} - -void -rtnetlink_notifier_wait(void) -{ - if (notify_sock) { - nl_sock_wait(notify_sock, POLLIN); - } -} - -static void -rtnetlink_report_change(const struct nlmsghdr *nlmsg, - const struct ifinfomsg *ifinfo, - struct nlattr *attrs[]) -{ - struct rtnetlink_notifier *notifier; - struct rtnetlink_change change; - - COVERAGE_INC(rtnetlink_changed); - - change.nlmsg_type = nlmsg->nlmsg_type; - change.ifi_index = ifinfo->ifi_index; - change.ifname = nl_attr_get_string(attrs[IFLA_IFNAME]); - change.master_ifindex = (attrs[IFLA_MASTER] - ? nl_attr_get_u32(attrs[IFLA_MASTER]) : 0); - - LIST_FOR_EACH (notifier, struct rtnetlink_notifier, node, - &all_notifiers) { - notifier->cb(&change, notifier->aux); - } -} - -static void -rtnetlink_report_notify_error(void) +static int +netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, + int cmd, const char *cmd_name) { - struct rtnetlink_notifier *notifier; + struct ifreq ifr; + int error; - LIST_FOR_EACH (notifier, struct rtnetlink_notifier, node, - &all_notifiers) { - notifier->cb(NULL, notifier->aux); + ifr.ifr_addr.sa_family = AF_INET; + error = netdev_linux_do_ioctl(netdev, &ifr, cmd, cmd_name); + if (!error) { + const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; + *ip = sin->sin_addr; } + return error; }