X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=9a6d70a29dc578d0579df831b5d0220a840e787b;hb=8bf4bbe390af3f370e7e95d9237572ff750047a8;hp=e86a160c8e17da10d506ab277950e75e0faf1448;hpb=8722022c0c0d29d3f998dc26c50944c456e56646;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index e86a160c..9a6d70a2 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -81,17 +82,14 @@ enum { VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, VALID_CARRIER = 1 << 5, - VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */ + VALID_IS_PSEUDO = 1 << 6, /* Represents is_internal and is_tap. */ + VALID_POLICING = 1 << 7 }; struct tap_state { int fd; }; -struct patch_state { - char *peer; -}; - struct netdev_dev_linux { struct netdev_dev netdev_dev; @@ -108,10 +106,11 @@ struct netdev_dev_linux { int carrier; bool is_internal; /* Is this an openvswitch internal device? */ bool is_tap; /* Is this a tuntap device? */ + uint32_t kbits_rate; /* Policing data. */ + uint32_t kbits_burst; union { struct tap_state tap; - struct patch_state patch; } state; }; @@ -156,6 +155,7 @@ static int set_etheraddr(const char *netdev_name, int hwaddr_family, const uint8_t[ETH_ADDR_LEN]); static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats); static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats); +static int get_rtnl_sock(struct nl_sock **); static bool is_netdev_linux_class(const struct netdev_class *netdev_class) @@ -238,123 +238,6 @@ netdev_linux_cache_cb(const struct rtnetlink_change *change, } } -static int -if_up(const char *name) -{ - struct ifreq ifr; - - strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); - ifr.ifr_flags = IFF_UP; - - if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) { - VLOG_DBG_RL(&rl, "%s: failed to bring device up: %s", - name, strerror(errno)); - return errno; - } - - return 0; -} - -/* A veth may be created using the 'command' "+,". A veth may - * be destroyed by using the 'command' "-", where can be - * either side of the device. - */ -static int -modify_veth(const char *format, ...) -{ - FILE *veth_file; - va_list args; - int retval; - - veth_file = fopen("/sys/class/net/veth_pairs", "w"); - if (!veth_file) { - VLOG_WARN_RL(&rl, "could not open veth device. Are you running a " - "supported XenServer with the kernel module loaded?"); - return ENODEV; - } - setvbuf(veth_file, NULL, _IONBF, 0); - - va_start(args, format); - retval = vfprintf(veth_file, format, args); - va_end(args); - - fclose(veth_file); - if (retval < 0) { - VLOG_WARN_RL(&rl, "could not destroy patch: %s", strerror(errno)); - return errno; - } - - return 0; -} - -static int -create_patch(const char *name, const char *peer) -{ - int retval; - struct netdev_dev *peer_nd; - - - /* Only create the veth if the peer didn't already do it. */ - peer_nd = netdev_dev_from_name(peer); - if (peer_nd) { - if (!strcmp("patch", netdev_dev_get_type(peer_nd))) { - struct netdev_dev_linux *ndl = netdev_dev_linux_cast(peer_nd); - if (!strcmp(name, ndl->state.patch.peer)) { - return 0; - } else { - VLOG_WARN_RL(&rl, "peer '%s' already paired with '%s'", - peer, ndl->state.patch.peer); - return EINVAL; - } - } else { - VLOG_WARN_RL(&rl, "peer '%s' exists and is not a patch", peer); - return EINVAL; - } - } - - retval = modify_veth("+%s,%s", name, peer); - if (retval) { - return retval; - } - - retval = if_up(name); - if (retval) { - return retval; - } - - retval = if_up(peer); - if (retval) { - return retval; - } - - return 0; -} - -static int -setup_patch(const char *name, const struct shash *args, char **peer_) -{ - const char *peer; - - peer = shash_find_data(args, "peer"); - if (!peer) { - VLOG_WARN("patch type requires valid 'peer' argument"); - return EINVAL; - } - - if (shash_count(args) > 1) { - VLOG_WARN("patch type takes only a 'peer' argument"); - return EINVAL; - } - - if (strlen(peer) >= IFNAMSIZ) { - VLOG_WARN_RL(&rl, "patch 'peer' arg too long"); - return EINVAL; - } - - *peer_ = xstrdup(peer); - return create_patch(name, peer); -} - /* Creates the netdev device of 'type' with 'name'. */ static int netdev_linux_create_system(const char *name, const char *type OVS_UNUSED, @@ -439,28 +322,6 @@ error: return error; } -static int -netdev_linux_create_patch(const char *name, const char *type OVS_UNUSED, - const struct shash *args, struct netdev_dev **netdev_devp) -{ - struct netdev_dev_linux *netdev_dev; - char *peer = NULL; - int error; - - error = setup_patch(name, args, &peer); - if (error) { - free(peer); - return error; - } - - netdev_dev = xzalloc(sizeof *netdev_dev); - netdev_dev->state.patch.peer = peer; - netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_patch_class); - *netdev_devp = &netdev_dev->netdev_dev; - - return 0; -} - static void destroy_tap(struct netdev_dev_linux *netdev_dev) { @@ -471,19 +332,6 @@ destroy_tap(struct netdev_dev_linux *netdev_dev) } } -static void -destroy_patch(struct netdev_dev_linux *netdev_dev) -{ - const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev); - struct patch_state *state = &netdev_dev->state.patch; - - /* Only destroy veth if 'peer' doesn't exist as an existing netdev. */ - if (!netdev_dev_from_name(state->peer)) { - modify_veth("-%s", name); - } - free(state->peer); -} - /* Destroys the netdev device 'netdev_dev_'. */ static void netdev_linux_destroy(struct netdev_dev *netdev_dev_) @@ -499,8 +347,6 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) } } else if (!strcmp(type, "tap")) { destroy_tap(netdev_dev); - } else if (!strcmp(type, "patch")) { - destroy_patch(netdev_dev); } free(netdev_dev); @@ -1272,35 +1118,88 @@ done: #define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress" #define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1" -/* We redirect stderr to /dev/null because we often want to remove all - * traffic control configuration on a port so its in a known state. If - * this done when there is no such configuration, tc complains, so we just - * always ignore it. + +/* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise a + * positive errno value. + * + * This function is equivalent to running + * /sbin/tc qdisc del dev %s handle ffff: ingress + * but it is much, much faster. */ -#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null" +static int +netdev_linux_remove_policing(struct netdev *netdev) +{ + struct netdev_dev_linux *netdev_dev = + netdev_dev_linux_cast(netdev_get_dev(netdev)); + const char *netdev_name = netdev_get_name(netdev); + + struct ofpbuf request; + struct ofpbuf *reply; + struct tcmsg *tcmsg; + struct nl_sock *rtnl_sock; + int ifindex; + int error; + + error = get_ifindex(netdev, &ifindex); + if (error) { + return error; + } + + error = get_rtnl_sock(&rtnl_sock); + if (error) { + return error; + } + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *tcmsg, + RTM_DELQDISC, NLM_F_REQUEST); + tcmsg = ofpbuf_put_zeros(&request, sizeof *tcmsg); + tcmsg->tcm_family = AF_UNSPEC; + tcmsg->tcm_ifindex = ifindex; + tcmsg->tcm_handle = 0xffff0000; + tcmsg->tcm_parent = TC_H_INGRESS; + nl_msg_put_string(&request, TCA_KIND, "ingress"); + nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); + error = nl_sock_transact(rtnl_sock, &request, &reply); + ofpbuf_uninit(&request); + ofpbuf_delete(reply); + if (error && error != ENOENT && error != EINVAL) { + VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", + netdev_name, strerror(error)); + return error; + } + + netdev_dev->kbits_rate = 0; + netdev_dev->kbits_burst = 0; + netdev_dev->cache_valid |= VALID_POLICING; + return 0; +} /* Attempts to set input rate limiting (policing) policy. */ static int netdev_linux_set_policing(struct netdev *netdev, uint32_t kbits_rate, uint32_t kbits_burst) { + struct netdev_dev_linux *netdev_dev = + netdev_dev_linux_cast(netdev_get_dev(netdev)); const char *netdev_name = netdev_get_name(netdev); char command[1024]; COVERAGE_INC(netdev_set_policing); - if (kbits_rate) { - if (!kbits_burst) { - /* Default to 1000 kilobits if not specified. */ - kbits_burst = 1000; - } - /* xxx This should be more careful about only adding if it - * xxx actually exists, as opposed to always deleting it. */ - snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); - if (system(command) == -1) { - VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); - } + kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */ + : !kbits_burst ? 1000 /* Default to 1000 kbits if 0. */ + : kbits_burst); /* Stick with user-specified value. */ + + if (netdev_dev->cache_valid & VALID_POLICING + && netdev_dev->kbits_rate == kbits_rate + && netdev_dev->kbits_burst == kbits_burst) { + /* Assume that settings haven't changed since we last set them. */ + return 0; + } + netdev_linux_remove_policing(netdev); + if (kbits_rate) { snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name); if (system(command) != 0) { VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name); @@ -1314,11 +1213,10 @@ netdev_linux_set_policing(struct netdev *netdev, netdev_name); return -1; } - } else { - snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name); - if (system(command) == -1) { - VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name); - } + + netdev_dev->kbits_rate = kbits_rate; + netdev_dev->kbits_burst = kbits_burst; + netdev_dev->cache_valid |= VALID_POLICING; } return 0; @@ -1787,55 +1685,6 @@ const struct netdev_class netdev_tap_class = { netdev_linux_poll_remove, }; -const struct netdev_class netdev_patch_class = { - "patch", - - netdev_linux_init, - netdev_linux_run, - netdev_linux_wait, - - netdev_linux_create_patch, - netdev_linux_destroy, - NULL, /* reconfigure */ - - netdev_linux_open, - netdev_linux_close, - - NULL, /* enumerate */ - - netdev_linux_recv, - netdev_linux_recv_wait, - netdev_linux_drain, - - netdev_linux_send, - netdev_linux_send_wait, - - netdev_linux_set_etheraddr, - netdev_linux_get_etheraddr, - netdev_linux_get_mtu, - netdev_linux_get_ifindex, - netdev_linux_get_carrier, - netdev_linux_get_stats, - NULL, /* set_stats */ - - netdev_linux_get_features, - netdev_linux_set_advertisements, - netdev_linux_get_vlan_vid, - netdev_linux_set_policing, - - netdev_linux_get_in4, - netdev_linux_set_in4, - netdev_linux_get_in6, - netdev_linux_add_router, - netdev_linux_get_next_hop, - netdev_linux_arp_lookup, - - netdev_linux_update_flags, - - netdev_linux_poll_add, - netdev_linux_poll_remove, -}; - static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats) @@ -1850,8 +1699,7 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats) .min_len = sizeof(struct rtnl_link_stats) }, }; - - static struct nl_sock *rtnl_sock; + struct nl_sock *rtnl_sock; struct ofpbuf request; struct ofpbuf *reply; struct ifinfomsg *ifi; @@ -1859,13 +1707,9 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats) struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; int error; - if (!rtnl_sock) { - error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock); - if (error) { - VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s", - strerror(error)); - return error; - } + error = get_rtnl_sock(&rtnl_sock); + if (error) { + return error; } ofpbuf_init(&request, 0); @@ -2129,3 +1973,26 @@ netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, } return error; } + +/* Obtains a Netlink routing socket that is not subscribed to any multicast + * groups. Returns 0 if successful, otherwise a positive errno value. Stores + * the socket in '*rtnl_sockp' if successful, otherwise a null pointer. */ +static int +get_rtnl_sock(struct nl_sock **rtnl_sockp) +{ + static struct nl_sock *sock; + int error; + + if (!sock) { + error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &sock); + if (error) { + VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s", + strerror(error)); + } + } else { + error = 0; + } + + *rtnl_sockp = sock; + return error; +}