X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=2941bf98e32ad1d228f71e6f0675d1c4b52c9a55;hb=b5e80aecc4b10f73196f272598bac0644f196471;hp=d45349b29b7f3f84229f32cbf5c33daa3c66888a;hpb=88258e0034cc7ca6ffde0974f1fb92d26289ad6a;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index d45349b2..2941bf98 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -75,7 +75,7 @@ #endif static struct rtnetlink_notifier netdev_linux_cache_notifier; -static struct shash cache_map = SHASH_INITIALIZER(&cache_map); +static int cache_notifier_refcount; enum { VALID_IFINDEX = 1 << 0, @@ -91,6 +91,10 @@ struct tap_state { int fd; }; +struct patch_state { + char *peer; +}; + struct netdev_dev_linux { struct netdev_dev netdev_dev; @@ -107,6 +111,7 @@ struct netdev_dev_linux { union { struct tap_state tap; + struct patch_state patch; } state; }; @@ -123,10 +128,12 @@ struct gre_config { uint32_t remote_ip; uint32_t in_key; uint32_t out_key; + uint8_t tos; bool have_in_key; bool have_out_key; bool in_csum; bool out_csum; + bool pmtud; }; static struct { @@ -150,6 +157,7 @@ static struct rtnetlink_notifier netdev_linux_poll_notifier; * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); +static int if_up(const char *name); static int destroy_gre(const char *name); static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *, int cmd, const char *cmd_name); @@ -175,7 +183,7 @@ netdev_dev_linux_cast(const struct netdev_dev *netdev_dev) { const char *type = netdev_dev_get_type(netdev_dev); assert(!strcmp(type, "system") || !strcmp(type, "tap") - || !strcmp(type, "gre")); + || !strcmp(type, "gre") || !strcmp(type, "patch")); return CONTAINER_OF(netdev_dev, struct netdev_dev_linux, netdev_dev); } @@ -184,7 +192,7 @@ netdev_linux_cast(const struct netdev *netdev) { const char *type = netdev_get_type(netdev); assert(!strcmp(type, "system") || !strcmp(type, "tap") - || !strcmp(type, "gre")); + || !strcmp(type, "gre") || !strcmp(type, "patch")); return CONTAINER_OF(netdev, struct netdev_linux, netdev); } @@ -216,28 +224,34 @@ netdev_linux_wait(void) static void netdev_linux_cache_cb(const struct rtnetlink_change *change, - void *aux UNUSED) + void *aux OVS_UNUSED) { struct netdev_dev_linux *dev; if (change) { - dev = shash_find_data(&cache_map, change->ifname); - if (dev) { + struct netdev_dev *base_dev = netdev_dev_from_name(change->ifname); + if (base_dev) { + dev = netdev_dev_linux_cast(base_dev); dev->cache_valid = 0; } } else { + struct shash device_shash; struct shash_node *node; - SHASH_FOR_EACH (node, &cache_map) { + + shash_init(&device_shash); + netdev_dev_get_devices(&netdev_linux_class, &device_shash); + SHASH_FOR_EACH (node, &device_shash) { dev = node->data; dev->cache_valid = 0; } + shash_destroy(&device_shash); } } /* The arguments are marked as unused to prevent warnings on platforms where * the Netlink interface isn't supported. */ static int -setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED, - bool create UNUSED) +setup_gre_netlink(const char *name OVS_UNUSED, + struct gre_config *config OVS_UNUSED, bool create OVS_UNUSED) { #ifdef GRE_IOCTL_ONLY return EOPNOTSUPP; @@ -250,7 +264,6 @@ setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED, struct nlattr *info_data_hdr; uint16_t iflags = 0; uint16_t oflags = 0; - uint8_t pmtudisc = 0; VLOG_DBG("%s: attempting to create gre device using netlink", name); @@ -309,9 +322,9 @@ setup_gre_netlink(const char *name UNUSED, struct gre_config *config UNUSED, nl_msg_put_u16(&request, IFLA_GRE_OFLAGS, oflags); nl_msg_put_u32(&request, IFLA_GRE_LOCAL, config->local_ip); nl_msg_put_u32(&request, IFLA_GRE_REMOTE, config->remote_ip); - nl_msg_put_u8(&request, IFLA_GRE_PMTUDISC, pmtudisc); - nl_msg_put_u8(&request, IFLA_GRE_TTL, 0); - nl_msg_put_u8(&request, IFLA_GRE_TOS, 0); + nl_msg_put_u8(&request, IFLA_GRE_PMTUDISC, config->pmtud); + nl_msg_put_u8(&request, IFLA_GRE_TTL, IPDEFTTL); + nl_msg_put_u8(&request, IFLA_GRE_TOS, config->tos); info_data_hdr->nla_len = (char *)ofpbuf_tail(&request) - (char *)info_data_hdr; @@ -350,6 +363,8 @@ setup_gre_ioctl(const char *name, struct gre_config *config, bool create) p.iph.protocol = IPPROTO_GRE; p.iph.saddr = config->local_ip; p.iph.daddr = config->remote_ip; + p.iph.ttl = IPDEFTTL; + p.iph.tos = config->tos; if (config->have_in_key) { p.i_flags |= GRE_KEY; @@ -367,6 +382,10 @@ setup_gre_ioctl(const char *name, struct gre_config *config, bool create) p.o_flags |= GRE_CSUM; } + if (config->pmtud) { + p.iph.frag_off = htons(IP_DONT_FRAGMENT); + } + strncpy(ifr.ifr_name, create ? GRE_IOCTL_DEVICE : name, IFNAMSIZ); ifr.ifr_ifru.ifru_data = (void *)&p; @@ -391,7 +410,7 @@ setup_gre_ioctl(const char *name, struct gre_config *config, bool create) /* The arguments are marked as unused to prevent warnings on platforms where * the Netlink interface isn't supported. */ static bool -check_gre_device_netlink(const char *name UNUSED) +check_gre_device_netlink(const char *name OVS_UNUSED) { #ifdef GRE_IOCTL_ONLY return false; @@ -480,6 +499,7 @@ setup_gre(const char *name, const struct shash *args, bool create) memset(&config, 0, sizeof config); config.in_csum = true; config.out_csum = true; + config.pmtud = true; SHASH_FOR_EACH (node, args) { if (!strcmp(node->name, "remote_ip")) { @@ -505,11 +525,17 @@ setup_gre(const char *name, const struct shash *args, bool create) } else if (!strcmp(node->name, "out_key")) { config.have_out_key = true; config.out_key = htonl(atoi(node->data)); + } else if (!strcmp(node->name, "tos")) { + config.tos = atoi(node->data); } else if (!strcmp(node->name, "csum")) { if (!strcmp(node->data, "false")) { config.in_csum = false; config.out_csum = false; } + } else if (!strcmp(node->name, "pmtud")) { + if (!strcmp(node->data, "false")) { + config.pmtud = false; + } } else { VLOG_WARN("unknown gre argument '%s'", node->name); } @@ -561,9 +587,109 @@ error: return error; } +/* A veth may be created using the 'command' "+,". A veth may + * be destroyed by using the 'command' "-", where can be + * either side of the device. + */ +static int +modify_veth(const char *format, ...) +{ + FILE *veth_file; + va_list args; + int retval; + + veth_file = fopen("/sys/class/net/veth_pairs", "w"); + if (!veth_file) { + VLOG_WARN_RL(&rl, "could not open veth device. Are you running a " + "supported XenServer with the kernel module loaded?"); + return ENODEV; + } + setvbuf(veth_file, NULL, _IONBF, 0); + + va_start(args, format); + retval = vfprintf(veth_file, format, args); + va_end(args); + + fclose(veth_file); + if (retval < 0) { + VLOG_WARN_RL(&rl, "could not destroy patch: %s", strerror(errno)); + return errno; + } + + return 0; +} + +static int +create_patch(const char *name, const char *peer) +{ + int retval; + struct netdev_dev *peer_nd; + + + /* Only create the veth if the peer didn't already do it. */ + peer_nd = netdev_dev_from_name(peer); + if (peer_nd) { + if (!strcmp("patch", netdev_dev_get_type(peer_nd))) { + struct netdev_dev_linux *ndl = netdev_dev_linux_cast(peer_nd); + if (!strcmp(name, ndl->state.patch.peer)) { + return 0; + } else { + VLOG_WARN_RL(&rl, "peer '%s' already paired with '%s'", + peer, ndl->state.patch.peer); + return EINVAL; + } + } else { + VLOG_WARN_RL(&rl, "peer '%s' exists and is not a patch", peer); + return EINVAL; + } + } + + retval = modify_veth("+%s,%s", name, peer); + if (retval) { + return retval; + } + + retval = if_up(name); + if (retval) { + return retval; + } + + retval = if_up(peer); + if (retval) { + return retval; + } + + return 0; +} + +static int +setup_patch(const char *name, const struct shash *args, char **peer_) +{ + const char *peer; + + peer = shash_find_data(args, "peer"); + if (!peer) { + VLOG_WARN("patch type requires valid 'peer' argument"); + return EINVAL; + } + + if (shash_count(args) > 1) { + VLOG_WARN("patch type takes only a 'peer' argument"); + return EINVAL; + } + + if (strlen(peer) >= IFNAMSIZ) { + VLOG_WARN_RL(&rl, "patch 'peer' arg too long"); + return EINVAL; + } + + *peer_ = xstrdup(peer); + return create_patch(name, peer); +} + /* Creates the netdev device of 'type' with 'name'. */ static int -netdev_linux_create_system(const char *name, const char *type UNUSED, +netdev_linux_create_system(const char *name, const char *type OVS_UNUSED, const struct shash *args, struct netdev_dev **netdev_devp) { struct netdev_dev_linux *netdev_dev; @@ -573,18 +699,18 @@ netdev_linux_create_system(const char *name, const char *type UNUSED, VLOG_WARN("%s: arguments for system devices should be empty", name); } - if (shash_is_empty(&cache_map)) { + if (!cache_notifier_refcount) { error = rtnetlink_notifier_register(&netdev_linux_cache_notifier, netdev_linux_cache_cb, NULL); if (error) { return error; } } + cache_notifier_refcount++; netdev_dev = xzalloc(sizeof *netdev_dev); - netdev_dev->shash_node = shash_add(&cache_map, name, &netdev_dev); - netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_linux_class); + *netdev_devp = &netdev_dev->netdev_dev; return 0; } @@ -596,7 +722,7 @@ netdev_linux_create_system(const char *name, const char *type UNUSED, * buffers, across all readers. Therefore once data is read it will * be unavailable to other reads for tap devices. */ static int -netdev_linux_create_tap(const char *name, const char *type UNUSED, +netdev_linux_create_tap(const char *name, const char *type OVS_UNUSED, const struct shash *args, struct netdev_dev **netdev_devp) { struct netdev_dev_linux *netdev_dev; @@ -663,7 +789,7 @@ if_up(const char *name) } static int -netdev_linux_create_gre(const char *name, const char *type UNUSED, +netdev_linux_create_gre(const char *name, const char *type OVS_UNUSED, const struct shash *args, struct netdev_dev **netdev_devp) { struct netdev_dev_linux *netdev_dev; @@ -690,6 +816,28 @@ error: return error; } +static int +netdev_linux_create_patch(const char *name, const char *type OVS_UNUSED, + const struct shash *args, struct netdev_dev **netdev_devp) +{ + struct netdev_dev_linux *netdev_dev; + char *peer = NULL; + int error; + + error = setup_patch(name, args, &peer); + if (error) { + free(peer); + return error; + } + + netdev_dev = xzalloc(sizeof *netdev_dev); + netdev_dev->state.patch.peer = peer; + netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_patch_class); + *netdev_devp = &netdev_dev->netdev_dev; + + return 0; +} + static int netdev_linux_reconfigure_gre(struct netdev_dev *netdev_dev_, const struct shash *args) @@ -702,7 +850,7 @@ netdev_linux_reconfigure_gre(struct netdev_dev *netdev_dev_, /* The arguments are marked as unused to prevent warnings on platforms where * the Netlink interface isn't supported. */ static int -destroy_gre_netlink(const char *name UNUSED) +destroy_gre_netlink(const char *name OVS_UNUSED) { #ifdef GRE_IOCTL_ONLY return EOPNOTSUPP; @@ -779,6 +927,19 @@ destroy_gre(const char *name) } } +static void +destroy_patch(struct netdev_dev_linux *netdev_dev) +{ + const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev); + struct patch_state *state = &netdev_dev->state.patch; + + /* Only destroy veth if 'peer' doesn't exist as an existing netdev. */ + if (!netdev_dev_from_name(state->peer)) { + modify_veth("-%s", name); + } + free(state->peer); +} + /* Destroys the netdev device 'netdev_dev_'. */ static void netdev_linux_destroy(struct netdev_dev *netdev_dev_) @@ -787,15 +948,17 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) const char *type = netdev_dev_get_type(netdev_dev_); if (!strcmp(type, "system")) { - shash_delete(&cache_map, netdev_dev->shash_node); + cache_notifier_refcount--; - if (shash_is_empty(&cache_map)) { + if (!cache_notifier_refcount) { rtnetlink_notifier_unregister(&netdev_linux_cache_notifier); } } else if (!strcmp(type, "tap")) { destroy_tap(netdev_dev); } else if (!strcmp(type, "gre")) { destroy_gre(netdev_dev_get_name(&netdev_dev->netdev_dev)); + } else if (!strcmp(type, "patch")) { + destroy_patch(netdev_dev); } free(netdev_dev_); @@ -812,6 +975,7 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype, /* Allocate network device. */ netdev = xzalloc(sizeof *netdev); + netdev->fd = -1; netdev_init(&netdev->netdev, netdev_dev_); error = netdev_get_flags(&netdev->netdev, &flags); @@ -868,8 +1032,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype, if (error) { goto error; } - } else { - netdev->fd = -1; } *netdevp = &netdev->netdev; @@ -886,7 +1048,7 @@ netdev_linux_close(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (netdev->fd >= 0 && strcmp(netdev_get_type(netdev_), "tap")) { + if (netdev->fd > 0 && strcmp(netdev_get_type(netdev_), "tap")) { close(netdev->fd); } free(netdev); @@ -1218,9 +1380,7 @@ netdev_linux_get_stats(const struct netdev *netdev_, COVERAGE_INC(netdev_get_stats); if (!(netdev_dev->cache_valid & VALID_IS_INTERNAL)) { - netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), - "tap"); - + netdev_dev->is_internal = !strcmp(netdev_get_type(netdev_), "tap"); if (!netdev_dev->is_internal) { struct ethtool_drvinfo drvinfo; @@ -1261,7 +1421,7 @@ netdev_linux_get_stats(const struct netdev *netdev_, * will appear to be swapped relative to the other ports since we are the * one sending the data, not a remote computer. For consistency, we swap * them back here. */ - if (netdev_dev->is_internal) { + if (!error && netdev_dev->is_internal) { stats->rx_packets = raw_stats.tx_packets; stats->tx_packets = raw_stats.rx_packets; stats->rx_bytes = raw_stats.tx_bytes; @@ -1543,8 +1703,8 @@ netdev_linux_set_policing(struct netdev *netdev, COVERAGE_INC(netdev_set_policing); if (kbits_rate) { if (!kbits_burst) { - /* Default to 10 kilobits if not specified. */ - kbits_burst = 10; + /* Default to 1000 kilobits if not specified. */ + kbits_burst = 1000; } /* xxx This should be more careful about only adding if it @@ -1704,7 +1864,7 @@ do_set_addr(struct netdev *netdev, /* Adds 'router' as a default IP gateway. */ static int -netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router) +netdev_linux_add_router(struct netdev *netdev OVS_UNUSED, struct in_addr router) { struct in_addr any = { INADDR_ANY }; struct rtentry rt; @@ -1871,7 +2031,7 @@ poll_notify(struct list *list) static void netdev_linux_poll_cb(const struct rtnetlink_change *change, - void *aux UNUSED) + void *aux OVS_UNUSED) { if (change) { struct list *list = shash_find_data(&netdev_linux_notifiers, @@ -2085,6 +2245,54 @@ const struct netdev_class netdev_gre_class = { netdev_linux_poll_add, netdev_linux_poll_remove, }; + +const struct netdev_class netdev_patch_class = { + "patch", + + netdev_linux_init, + netdev_linux_run, + netdev_linux_wait, + + netdev_linux_create_patch, + netdev_linux_destroy, + NULL, /* reconfigure */ + + netdev_linux_open, + netdev_linux_close, + + NULL, /* enumerate */ + + netdev_linux_recv, + netdev_linux_recv_wait, + netdev_linux_drain, + + netdev_linux_send, + netdev_linux_send_wait, + + netdev_linux_set_etheraddr, + netdev_linux_get_etheraddr, + netdev_linux_get_mtu, + netdev_linux_get_ifindex, + netdev_linux_get_carrier, + netdev_linux_get_stats, + + netdev_linux_get_features, + netdev_linux_set_advertisements, + netdev_linux_get_vlan_vid, + netdev_linux_set_policing, + + netdev_linux_get_in4, + netdev_linux_set_in4, + netdev_linux_get_in6, + netdev_linux_add_router, + netdev_linux_get_next_hop, + netdev_linux_arp_lookup, + + netdev_linux_update_flags, + + netdev_linux_poll_add, + netdev_linux_poll_remove, +}; static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats)