X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=736b588a8a11638555e83a047960402f20ac8755;hb=ac3005057c9baf5fab366e31e2c18b26685a73e2;hp=53eb28bc61e5bc0e53b4518a009c6086636f20e0;hpb=0b0544d706d10516d3122fbcce8f1dc1dec6cb92;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 53eb28bc..736b588a 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -75,7 +75,7 @@ #endif static struct rtnetlink_notifier netdev_linux_cache_notifier; -static struct shash cache_map = SHASH_INITIALIZER(&cache_map); +static int cache_notifier_refcount; enum { VALID_IFINDEX = 1 << 0, @@ -112,12 +112,7 @@ struct netdev_dev_linux { struct netdev_linux { struct netdev netdev; - - /* File descriptors. For ordinary network devices, the two fds below are - * the same; for tap devices, they differ. */ - int netdev_fd; /* Network device. */ - int tap_fd; /* TAP character device, if any, otherwise the - * network device. */ + int fd; }; /* An AF_INET socket (used for ioctl operations). */ @@ -128,10 +123,12 @@ struct gre_config { uint32_t remote_ip; uint32_t in_key; uint32_t out_key; + uint8_t tos; bool have_in_key; bool have_out_key; bool in_csum; bool out_csum; + bool pmtud; }; static struct { @@ -221,28 +218,34 @@ netdev_linux_wait(void) static void netdev_linux_cache_cb(const struct rtnetlink_change *change, - void *aux UNUSED) + void *aux OVS_UNUSED) { struct netdev_dev_linux *dev; if (change) { - dev = shash_find_data(&cache_map, change->ifname); - if (dev) { + struct netdev_dev *base_dev = netdev_dev_from_name(change->ifname); + if (base_dev) { + dev = netdev_dev_linux_cast(base_dev); dev->cache_valid = 0; } } else { + struct shash device_shash; struct shash_node *node; - SHASH_FOR_EACH (node, &cache_map) { + + shash_init(&device_shash); + netdev_dev_get_devices(&netdev_linux_class, &device_shash); + SHASH_FOR_EACH (node, &device_shash) { dev = node->data; dev->cache_valid = 0; } + shash_destroy(&device_shash); } } /* The arguments are marked as unused to prevent warnings on platforms where * the Netlink interface isn't supported. */ static int -setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED, - bool create UNUSED) +setup_gre_netlink(const char *name OVS_UNUSED, + struct gre_config *config OVS_UNUSED, bool create OVS_UNUSED) { #ifdef GRE_IOCTL_ONLY return EOPNOTSUPP; @@ -255,7 +258,6 @@ setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED, struct nlattr *info_data_hdr; uint16_t iflags = 0; uint16_t oflags = 0; - uint8_t pmtudisc = 0; VLOG_DBG("%s: attempting to create gre device using netlink", name); @@ -314,9 +316,9 @@ setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED, nl_msg_put_u16(&request, IFLA_GRE_OFLAGS, oflags); nl_msg_put_u32(&request, IFLA_GRE_LOCAL, config->local_ip); nl_msg_put_u32(&request, IFLA_GRE_REMOTE, config->remote_ip); - nl_msg_put_u8(&request, IFLA_GRE_PMTUDISC, pmtudisc); - nl_msg_put_u8(&request, IFLA_GRE_TTL, 0); - nl_msg_put_u8(&request, IFLA_GRE_TOS, 0); + nl_msg_put_u8(&request, IFLA_GRE_PMTUDISC, config->pmtud); + nl_msg_put_u8(&request, IFLA_GRE_TTL, IPDEFTTL); + nl_msg_put_u8(&request, IFLA_GRE_TOS, config->tos); info_data_hdr->nla_len = (char *)ofpbuf_tail(&request) - (char *)info_data_hdr; @@ -355,6 +357,8 @@ setup_gre_ioctl(const char *name, struct gre_config *config, bool create) p.iph.protocol = IPPROTO_GRE; p.iph.saddr = config->local_ip; p.iph.daddr = config->remote_ip; + p.iph.ttl = IPDEFTTL; + p.iph.tos = config->tos; if (config->have_in_key) { p.i_flags |= GRE_KEY; @@ -372,6 +376,10 @@ setup_gre_ioctl(const char *name, struct gre_config *config, bool create) p.o_flags |= GRE_CSUM; } + if (config->pmtud) { + p.iph.frag_off = htons(IP_DONT_FRAGMENT); + } + strncpy(ifr.ifr_name, create ? GRE_IOCTL_DEVICE : name, IFNAMSIZ); ifr.ifr_ifru.ifru_data = (void *)&p; @@ -396,7 +404,7 @@ setup_gre_ioctl(const char *name, struct gre_config *config, bool create) /* The arguments are marked as unused to prevent warnings on platforms where * the Netlink interface isn't supported. */ static bool -check_gre_device_netlink(const char *name UNUSED) +check_gre_device_netlink(const char *name OVS_UNUSED) { #ifdef GRE_IOCTL_ONLY return false; @@ -485,6 +493,7 @@ setup_gre(const char *name, const struct shash *args, bool create) memset(&config, 0, sizeof config); config.in_csum = true; config.out_csum = true; + config.pmtud = true; SHASH_FOR_EACH (node, args) { if (!strcmp(node->name, "remote_ip")) { @@ -510,11 +519,17 @@ setup_gre(const char *name, const struct shash *args, bool create) } else if (!strcmp(node->name, "out_key")) { config.have_out_key = true; config.out_key = htonl(atoi(node->data)); + } else if (!strcmp(node->name, "tos")) { + config.tos = atoi(node->data); } else if (!strcmp(node->name, "csum")) { if (!strcmp(node->data, "false")) { config.in_csum = false; config.out_csum = false; } + } else if (!strcmp(node->name, "pmtud")) { + if (!strcmp(node->data, "false")) { + config.pmtud = false; + } } else { VLOG_WARN("unknown gre argument '%s'", node->name); } @@ -568,7 +583,7 @@ error: /* Creates the netdev device of 'type' with 'name'. */ static int -netdev_linux_create_system(const char *name, const char *type UNUSED, +netdev_linux_create_system(const char *name, const char *type OVS_UNUSED, const struct shash *args, struct netdev_dev **netdev_devp) { struct netdev_dev_linux *netdev_dev; @@ -578,24 +593,30 @@ netdev_linux_create_system(const char *name, const char *type UNUSED, VLOG_WARN("%s: arguments for system devices should be empty", name); } - if (shash_is_empty(&cache_map)) { + if (!cache_notifier_refcount) { error = rtnetlink_notifier_register(&netdev_linux_cache_notifier, netdev_linux_cache_cb, NULL); if (error) { return error; } } + cache_notifier_refcount++; netdev_dev = xzalloc(sizeof *netdev_dev); - netdev_dev->shash_node = shash_add(&cache_map, name, &netdev_dev); - netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_linux_class); + *netdev_devp = &netdev_dev->netdev_dev; return 0; } +/* For most types of netdevs we open the device for each call of + * netdev_open(). However, this is not the case with tap devices, + * since it is only possible to open the device once. In this + * situation we share a single file descriptor, and consequently + * buffers, across all readers. Therefore once data is read it will + * be unavailable to other reads for tap devices. */ static int -netdev_linux_create_tap(const char *name, const char *type UNUSED, +netdev_linux_create_tap(const char *name, const char *type OVS_UNUSED, const struct shash *args, struct netdev_dev **netdev_devp) { struct netdev_dev_linux *netdev_dev; @@ -662,7 +683,7 @@ if_up(const char *name) } static int -netdev_linux_create_gre(const char *name, const char *type UNUSED, +netdev_linux_create_gre(const char *name, const char *type OVS_UNUSED, const struct shash *args, struct netdev_dev **netdev_devp) { struct netdev_dev_linux *netdev_dev; @@ -701,7 +722,7 @@ netdev_linux_reconfigure_gre(struct netdev_dev *netdev_dev_, /* The arguments are marked as unused to prevent warnings on platforms where * the Netlink interface isn't supported. */ static int -destroy_gre_netlink(const char *name UNUSED) +destroy_gre_netlink(const char *name OVS_UNUSED) { #ifdef GRE_IOCTL_ONLY return EOPNOTSUPP; @@ -786,9 +807,9 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) const char *type = netdev_dev_get_type(netdev_dev_); if (!strcmp(type, "system")) { - shash_delete(&cache_map, netdev_dev->shash_node); + cache_notifier_refcount--; - if (shash_is_empty(&cache_map)) { + if (!cache_notifier_refcount) { rtnetlink_notifier_unregister(&netdev_linux_cache_notifier); } } else if (!strcmp(type, "tap")) { @@ -801,56 +822,27 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) } static int -netdev_linux_open(struct netdev_dev *netdev_dev, int ethertype, +netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype, struct netdev **netdevp) { + struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_dev_); struct netdev_linux *netdev; enum netdev_flags flags; int error; /* Allocate network device. */ netdev = xzalloc(sizeof *netdev); - netdev_init(&netdev->netdev, netdev_dev); - netdev->netdev_fd = -1; - netdev->tap_fd = -1; - - if (!strcmp(netdev_dev_get_type(netdev_dev), "tap")) { - static const char tap_dev[] = "/dev/net/tun"; - struct ifreq ifr; - - /* Open tap device. */ - netdev->tap_fd = open(tap_dev, O_RDWR); - if (netdev->tap_fd < 0) { - error = errno; - VLOG_WARN("opening \"%s\" failed: %s", tap_dev, strerror(error)); - goto error; - } - - /* Create tap device. */ - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strncpy(ifr.ifr_name, netdev_dev_get_name(netdev_dev), - sizeof ifr.ifr_name); - if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { - VLOG_WARN("%s: creating tap device failed: %s", - netdev_dev_get_name(netdev_dev), - strerror(errno)); - error = errno; - goto error; - } - - /* Make non-blocking. */ - error = set_nonblocking(netdev->tap_fd); - if (error) { - goto error; - } - } + netdev->fd = -1; + netdev_init(&netdev->netdev, netdev_dev_); error = netdev_get_flags(&netdev->netdev, &flags); if (error == ENODEV) { goto error; } - if (netdev->tap_fd >= 0 || ethertype != NETDEV_ETH_TYPE_NONE) { + if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap")) { + netdev->fd = netdev_dev->state.tap.fd; + } else if (ethertype != NETDEV_ETH_TYPE_NONE) { struct sockaddr_ll sll; int protocol; int ifindex; @@ -859,17 +851,14 @@ netdev_linux_open(struct netdev_dev *netdev_dev, int ethertype, protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2 : ethertype); - netdev->netdev_fd = socket(PF_PACKET, SOCK_RAW, htons(protocol)); - if (netdev->netdev_fd < 0) { + netdev->fd = socket(PF_PACKET, SOCK_RAW, htons(protocol)); + if (netdev->fd < 0) { error = errno; goto error; } - if (netdev->tap_fd < 0) { - netdev->tap_fd = netdev->netdev_fd; - } /* Set non-blocking mode. */ - error = set_nonblocking(netdev->netdev_fd); + error = set_nonblocking(netdev->fd); if (error) { goto error; } @@ -884,10 +873,10 @@ netdev_linux_open(struct netdev_dev *netdev_dev, int ethertype, memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; - if (bind(netdev->netdev_fd, + if (bind(netdev->fd, (struct sockaddr *) &sll, sizeof sll) < 0) { error = errno; - VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev), + VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev_), strerror(error)); goto error; } @@ -896,7 +885,7 @@ netdev_linux_open(struct netdev_dev *netdev_dev, int ethertype, * packets of the requested type on all system interfaces. We do not * want to receive that data, but there is no way to avoid it. So we * must now drain out the receive queue. */ - error = drain_rcvbuf(netdev->netdev_fd); + error = drain_rcvbuf(netdev->fd); if (error) { goto error; } @@ -916,11 +905,8 @@ netdev_linux_close(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->netdev_fd >= 0) { - close(netdev->netdev_fd); - } - if (netdev->tap_fd >= 0 && netdev->netdev_fd != netdev->tap_fd) { - close(netdev->tap_fd); + if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) { + close(netdev->fd); } free(netdev); } @@ -952,13 +938,13 @@ netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->tap_fd < 0) { + if (netdev->fd < 0) { /* Device was opened with NETDEV_ETH_TYPE_NONE. */ return -EAGAIN; } for (;;) { - ssize_t retval = read(netdev->tap_fd, data, size); + ssize_t retval = read(netdev->fd, data, size); if (retval >= 0) { return retval; } else if (errno != EINTR) { @@ -977,8 +963,8 @@ static void netdev_linux_recv_wait(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->tap_fd >= 0) { - poll_fd_wait(netdev->tap_fd, POLLIN); + if (netdev->fd >= 0) { + poll_fd_wait(netdev->fd, POLLIN); } } @@ -987,19 +973,19 @@ static int netdev_linux_drain(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) { + if (netdev->fd < 0) { return 0; - } else if (netdev->tap_fd != netdev->netdev_fd) { + } else if (!strcmp(netdev_get_type(netdev_), "tap")) { struct ifreq ifr; int error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr, SIOCGIFTXQLEN, "SIOCGIFTXQLEN"); if (error) { return error; } - drain_fd(netdev->tap_fd, ifr.ifr_qlen); + drain_fd(netdev->fd, ifr.ifr_qlen); return 0; } else { - return drain_rcvbuf(netdev->netdev_fd); + return drain_rcvbuf(netdev->fd); } } @@ -1019,12 +1005,12 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size) /* XXX should support sending even if 'ethertype' was NETDEV_ETH_TYPE_NONE. */ - if (netdev->tap_fd < 0) { + if (netdev->fd < 0) { return EPIPE; } for (;;) { - ssize_t retval = write(netdev->tap_fd, data, size); + ssize_t retval = write(netdev->fd, data, size); if (retval < 0) { /* The Linux AF_PACKET implementation never blocks waiting for room * for packets, instead returning ENOBUFS. Translate this into @@ -1059,10 +1045,10 @@ static void netdev_linux_send_wait(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->tap_fd < 0 && netdev->netdev_fd < 0) { + if (netdev->fd < 0) { /* Nothing to do. */ - } else if (netdev->tap_fd == netdev->netdev_fd) { - poll_fd_wait(netdev->tap_fd, POLLOUT); + } else if (strcmp(netdev_get_type(netdev_), "tap")) { + poll_fd_wait(netdev->fd, POLLOUT); } else { /* TAP device always accepts packets.*/ poll_immediate_wake(); @@ -1251,9 +1237,7 @@ netdev_linux_get_stats(const struct netdev *netdev_, COVERAGE_INC(netdev_get_stats); if (!(netdev_dev->cache_valid & VALID_IS_INTERNAL)) { - netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), - "tap"); - + netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), "tap"); if (!netdev_dev->is_internal) { struct ethtool_drvinfo drvinfo; @@ -1294,7 +1278,7 @@ netdev_linux_get_stats(const struct netdev *netdev_, * will appear to be swapped relative to the other ports since we are the * one sending the data, not a remote computer. For consistency, we swap * them back here. */ - if (netdev_dev->is_internal) { + if (!error && netdev_dev->is_internal) { stats->rx_packets = raw_stats.tx_packets; stats->tx_packets = raw_stats.rx_packets; stats->rx_bytes = raw_stats.tx_bytes; @@ -1576,8 +1560,8 @@ netdev_linux_set_policing(struct netdev *netdev, COVERAGE_INC(netdev_set_policing); if (kbits_rate) { if (!kbits_burst) { - /* Default to 10 kilobits if not specified. */ - kbits_burst = 10; + /* Default to 1000 kilobits if not specified. */ + kbits_burst = 1000; } /* xxx This should be more careful about only adding if it @@ -1737,7 +1721,7 @@ do_set_addr(struct netdev *netdev, /* Adds 'router' as a default IP gateway. */ static int -netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router) +netdev_linux_add_router(struct netdev *netdev OVS_UNUSED, struct in_addr router) { struct in_addr any = { INADDR_ANY }; struct rtentry rt; @@ -1904,7 +1888,7 @@ poll_notify(struct list *list) static void netdev_linux_poll_cb(const struct rtnetlink_change *change, - void *aux UNUSED) + void *aux OVS_UNUSED) { if (change) { struct list *list = shash_find_data(&netdev_linux_notifiers,