X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=2fa05b66e3e2162f346a0369d1e2c641a3a15c76;hb=1e82e503c5358f8dce9eb2105448f0ec894d57bc;hp=c44d6baf69950f376ab8e927a90b752bef4838c1;hpb=8e46022197dd9f04a3e73f741482d98972f6b70a;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index c44d6baf..2fa05b66 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -81,15 +82,13 @@ enum { VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, VALID_CARRIER = 1 << 5, - VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */ + VALID_IS_PSEUDO = 1 << 6, /* Represents is_internal and is_tap. */ + VALID_POLICING = 1 << 7 }; struct tap_state { int fd; -}; - -struct patch_state { - char *peer; + bool opened; }; struct netdev_dev_linux { @@ -108,10 +107,11 @@ struct netdev_dev_linux { int carrier; bool is_internal; /* Is this an openvswitch internal device? */ bool is_tap; /* Is this a tuntap device? */ + uint32_t kbits_rate; /* Policing data. */ + uint32_t kbits_burst; union { struct tap_state tap; - struct patch_state patch; } state; }; @@ -239,123 +239,6 @@ netdev_linux_cache_cb(const struct rtnetlink_change *change, } } -static int -if_up(const char *name) -{ - struct ifreq ifr; - - strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); - ifr.ifr_flags = IFF_UP; - - if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) { - VLOG_DBG_RL(&rl, "%s: failed to bring device up: %s", - name, strerror(errno)); - return errno; - } - - return 0; -} - -/* A veth may be created using the 'command' "+,". A veth may - * be destroyed by using the 'command' "-", where can be - * either side of the device. - */ -static int -modify_veth(const char *format, ...) -{ - FILE *veth_file; - va_list args; - int retval; - - veth_file = fopen("/sys/class/net/veth_pairs", "w"); - if (!veth_file) { - VLOG_WARN_RL(&rl, "could not open veth device. Are you running a " - "supported XenServer with the kernel module loaded?"); - return ENODEV; - } - setvbuf(veth_file, NULL, _IONBF, 0); - - va_start(args, format); - retval = vfprintf(veth_file, format, args); - va_end(args); - - fclose(veth_file); - if (retval < 0) { - VLOG_WARN_RL(&rl, "could not destroy patch: %s", strerror(errno)); - return errno; - } - - return 0; -} - -static int -create_patch(const char *name, const char *peer) -{ - int retval; - struct netdev_dev *peer_nd; - - - /* Only create the veth if the peer didn't already do it. */ - peer_nd = netdev_dev_from_name(peer); - if (peer_nd) { - if (!strcmp("patch", netdev_dev_get_type(peer_nd))) { - struct netdev_dev_linux *ndl = netdev_dev_linux_cast(peer_nd); - if (!strcmp(name, ndl->state.patch.peer)) { - return 0; - } else { - VLOG_WARN_RL(&rl, "peer '%s' already paired with '%s'", - peer, ndl->state.patch.peer); - return EINVAL; - } - } else { - VLOG_WARN_RL(&rl, "peer '%s' exists and is not a patch", peer); - return EINVAL; - } - } - - retval = modify_veth("+%s,%s", name, peer); - if (retval) { - return retval; - } - - retval = if_up(name); - if (retval) { - return retval; - } - - retval = if_up(peer); - if (retval) { - return retval; - } - - return 0; -} - -static int -setup_patch(const char *name, const struct shash *args, char **peer_) -{ - const char *peer; - - peer = shash_find_data(args, "peer"); - if (!peer) { - VLOG_WARN("patch type requires valid 'peer' argument"); - return EINVAL; - } - - if (shash_count(args) > 1) { - VLOG_WARN("patch type takes only a 'peer' argument"); - return EINVAL; - } - - if (strlen(peer) >= IFNAMSIZ) { - VLOG_WARN_RL(&rl, "patch 'peer' arg too long"); - return EINVAL; - } - - *peer_ = xstrdup(peer); - return create_patch(name, peer); -} - /* Creates the netdev device of 'type' with 'name'. */ static int netdev_linux_create_system(const char *name, const char *type OVS_UNUSED, @@ -440,28 +323,6 @@ error: return error; } -static int -netdev_linux_create_patch(const char *name, const char *type OVS_UNUSED, - const struct shash *args, struct netdev_dev **netdev_devp) -{ - struct netdev_dev_linux *netdev_dev; - char *peer = NULL; - int error; - - error = setup_patch(name, args, &peer); - if (error) { - free(peer); - return error; - } - - netdev_dev = xzalloc(sizeof *netdev_dev); - netdev_dev->state.patch.peer = peer; - netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_patch_class); - *netdev_devp = &netdev_dev->netdev_dev; - - return 0; -} - static void destroy_tap(struct netdev_dev_linux *netdev_dev) { @@ -472,19 +333,6 @@ destroy_tap(struct netdev_dev_linux *netdev_dev) } } -static void -destroy_patch(struct netdev_dev_linux *netdev_dev) -{ - const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev); - struct patch_state *state = &netdev_dev->state.patch; - - /* Only destroy veth if 'peer' doesn't exist as an existing netdev. */ - if (!netdev_dev_from_name(state->peer)) { - modify_veth("-%s", name); - } - free(state->peer); -} - /* Destroys the netdev device 'netdev_dev_'. */ static void netdev_linux_destroy(struct netdev_dev *netdev_dev_) @@ -500,8 +348,6 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) } } else if (!strcmp(type, "tap")) { destroy_tap(netdev_dev); - } else if (!strcmp(type, "patch")) { - destroy_patch(netdev_dev); } free(netdev_dev); @@ -526,8 +372,15 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype, goto error; } - if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap")) { + if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap") && + !netdev_dev->state.tap.opened) { + + /* We assume that the first user of the tap device is the primary user + * and give them the tap FD. Subsequent users probably just expect + * this to be a system device so open it normally to avoid send/receive + * directions appearing to be reversed. */ netdev->fd = netdev_dev->state.tap.fd; + netdev_dev->state.tap.opened = true; } else if (ethertype != NETDEV_ETH_TYPE_NONE) { struct sockaddr_ll sll; int protocol; @@ -934,6 +787,14 @@ netdev_linux_update_is_pseudo(struct netdev_dev_linux *netdev_dev) } } +static void +swap_uint64(uint64_t *a, uint64_t *b) +{ + *a ^= *b; + *b ^= *a; + *a ^= *b; +} + /* Retrieves current device stats for 'netdev'. * * XXX All of the members of struct netdev_stats are 64 bits wide, but on @@ -946,16 +807,9 @@ netdev_linux_get_stats(const struct netdev *netdev_, netdev_dev_linux_cast(netdev_get_dev(netdev_)); static int use_netlink_stats = -1; int error; - struct netdev_stats raw_stats; - struct netdev_stats *collect_stats = stats; COVERAGE_INC(netdev_get_stats); - netdev_linux_update_is_pseudo(netdev_dev); - if (netdev_dev->is_internal) { - collect_stats = &raw_stats; - } - if (use_netlink_stats < 0) { use_netlink_stats = check_for_working_netlink_stats(); } @@ -964,27 +818,22 @@ netdev_linux_get_stats(const struct netdev *netdev_, error = get_ifindex(netdev_, &ifindex); if (!error) { - error = get_stats_via_netlink(ifindex, collect_stats); + error = get_stats_via_netlink(ifindex, stats); } } else { - error = get_stats_via_proc(netdev_get_name(netdev_), collect_stats); + error = get_stats_via_proc(netdev_get_name(netdev_), stats); } /* If this port is an internal port then the transmit and receive stats * will appear to be swapped relative to the other ports since we are the * one sending the data, not a remote computer. For consistency, we swap * them back here. */ + netdev_linux_update_is_pseudo(netdev_dev); if (!error && (netdev_dev->is_internal || netdev_dev->is_tap)) { - stats->rx_packets = raw_stats.tx_packets; - stats->tx_packets = raw_stats.rx_packets; - stats->rx_bytes = raw_stats.tx_bytes; - stats->tx_bytes = raw_stats.rx_bytes; - stats->rx_errors = raw_stats.tx_errors; - stats->tx_errors = raw_stats.rx_errors; - stats->rx_dropped = raw_stats.tx_dropped; - stats->tx_dropped = raw_stats.rx_dropped; - stats->multicast = raw_stats.multicast; - stats->collisions = raw_stats.collisions; + swap_uint64(&stats->rx_packets, &stats->tx_packets); + swap_uint64(&stats->rx_bytes, &stats->tx_bytes); + swap_uint64(&stats->rx_errors, &stats->tx_errors); + swap_uint64(&stats->rx_dropped, &stats->tx_dropped); stats->rx_length_errors = 0; stats->rx_over_errors = 0; stats->rx_crc_errors = 0; @@ -1273,28 +1122,60 @@ done: #define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress" #define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1" -/* We redirect stderr to /dev/null because we often want to remove all - * traffic control configuration on a port so its in a known state. If - * this done when there is no such configuration, tc complains, so we just - * always ignore it. - */ -#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null" /* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise a - * positive errno value. */ + * positive errno value. + * + * This function is equivalent to running + * /sbin/tc qdisc del dev %s handle ffff: ingress + * but it is much, much faster. + */ static int netdev_linux_remove_policing(struct netdev *netdev) { + struct netdev_dev_linux *netdev_dev = + netdev_dev_linux_cast(netdev_get_dev(netdev)); const char *netdev_name = netdev_get_name(netdev); - char command[1024]; - /* xxx This should be more careful about only adding if it - * xxx actually exists, as opposed to always deleting it. */ - snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); - if (system(command) == -1) { - VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); - return ECHILD; + struct ofpbuf request; + struct ofpbuf *reply; + struct tcmsg *tcmsg; + struct nl_sock *rtnl_sock; + int ifindex; + int error; + + error = get_ifindex(netdev, &ifindex); + if (error) { + return error; + } + + error = get_rtnl_sock(&rtnl_sock); + if (error) { + return error; + } + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *tcmsg, + RTM_DELQDISC, NLM_F_REQUEST); + tcmsg = ofpbuf_put_zeros(&request, sizeof *tcmsg); + tcmsg->tcm_family = AF_UNSPEC; + tcmsg->tcm_ifindex = ifindex; + tcmsg->tcm_handle = 0xffff0000; + tcmsg->tcm_parent = TC_H_INGRESS; + nl_msg_put_string(&request, TCA_KIND, "ingress"); + nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); + error = nl_sock_transact(rtnl_sock, &request, &reply); + ofpbuf_uninit(&request); + ofpbuf_delete(reply); + if (error && error != ENOENT && error != EINVAL) { + VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", + netdev_name, strerror(error)); + return error; } + + netdev_dev->kbits_rate = 0; + netdev_dev->kbits_burst = 0; + netdev_dev->cache_valid |= VALID_POLICING; return 0; } @@ -1303,18 +1184,26 @@ static int netdev_linux_set_policing(struct netdev *netdev, uint32_t kbits_rate, uint32_t kbits_burst) { + struct netdev_dev_linux *netdev_dev = + netdev_dev_linux_cast(netdev_get_dev(netdev)); const char *netdev_name = netdev_get_name(netdev); char command[1024]; COVERAGE_INC(netdev_set_policing); + kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */ + : !kbits_burst ? 1000 /* Default to 1000 kbits if 0. */ + : kbits_burst); /* Stick with user-specified value. */ + + if (netdev_dev->cache_valid & VALID_POLICING + && netdev_dev->kbits_rate == kbits_rate + && netdev_dev->kbits_burst == kbits_burst) { + /* Assume that settings haven't changed since we last set them. */ + return 0; + } + netdev_linux_remove_policing(netdev); if (kbits_rate) { - if (!kbits_burst) { - /* Default to 1000 kilobits if not specified. */ - kbits_burst = 1000; - } - snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name); if (system(command) != 0) { VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name); @@ -1328,6 +1217,10 @@ netdev_linux_set_policing(struct netdev *netdev, netdev_name); return -1; } + + netdev_dev->kbits_rate = kbits_rate; + netdev_dev->kbits_burst = kbits_burst; + netdev_dev->cache_valid |= VALID_POLICING; } return 0; @@ -1796,55 +1689,6 @@ const struct netdev_class netdev_tap_class = { netdev_linux_poll_remove, }; -const struct netdev_class netdev_patch_class = { - "patch", - - netdev_linux_init, - netdev_linux_run, - netdev_linux_wait, - - netdev_linux_create_patch, - netdev_linux_destroy, - NULL, /* reconfigure */ - - netdev_linux_open, - netdev_linux_close, - - NULL, /* enumerate */ - - netdev_linux_recv, - netdev_linux_recv_wait, - netdev_linux_drain, - - netdev_linux_send, - netdev_linux_send_wait, - - netdev_linux_set_etheraddr, - netdev_linux_get_etheraddr, - netdev_linux_get_mtu, - netdev_linux_get_ifindex, - netdev_linux_get_carrier, - netdev_linux_get_stats, - NULL, /* set_stats */ - - netdev_linux_get_features, - netdev_linux_set_advertisements, - netdev_linux_get_vlan_vid, - netdev_linux_set_policing, - - netdev_linux_get_in4, - netdev_linux_set_in4, - netdev_linux_get_in6, - netdev_linux_add_router, - netdev_linux_get_next_hop, - netdev_linux_arp_lookup, - - netdev_linux_update_flags, - - netdev_linux_poll_add, - netdev_linux_poll_remove, -}; - static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats)