#include <linux/ip.h>
#include <linux/types.h>
#include <linux/ethtool.h>
+#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/sockios.h>
#include <linux/version.h>
VALID_IN6 = 1 << 3,
VALID_MTU = 1 << 4,
VALID_CARRIER = 1 << 5,
- VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */
+ VALID_IS_PSEUDO = 1 << 6, /* Represents is_internal and is_tap. */
+ VALID_POLICING = 1 << 7
};
struct tap_state {
int fd;
};
-struct patch_state {
- char *peer;
-};
-
struct netdev_dev_linux {
struct netdev_dev netdev_dev;
int carrier;
bool is_internal; /* Is this an openvswitch internal device? */
bool is_tap; /* Is this a tuntap device? */
+ uint32_t kbits_rate; /* Policing data. */
+ uint32_t kbits_burst;
union {
struct tap_state tap;
- struct patch_state patch;
} state;
};
const uint8_t[ETH_ADDR_LEN]);
static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats);
static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats);
+static int get_rtnl_sock(struct nl_sock **);
static bool
is_netdev_linux_class(const struct netdev_class *netdev_class)
}
}
-static int
-if_up(const char *name)
-{
- struct ifreq ifr;
-
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- ifr.ifr_flags = IFF_UP;
-
- if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) {
- VLOG_DBG_RL(&rl, "%s: failed to bring device up: %s",
- name, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-/* A veth may be created using the 'command' "+<name>,<peer>". A veth may
- * be destroyed by using the 'command' "-<name>", where <name> can be
- * either side of the device.
- */
-static int
-modify_veth(const char *format, ...)
-{
- FILE *veth_file;
- va_list args;
- int retval;
-
- veth_file = fopen("/sys/class/net/veth_pairs", "w");
- if (!veth_file) {
- VLOG_WARN_RL(&rl, "could not open veth device. Are you running a "
- "supported XenServer with the kernel module loaded?");
- return ENODEV;
- }
- setvbuf(veth_file, NULL, _IONBF, 0);
-
- va_start(args, format);
- retval = vfprintf(veth_file, format, args);
- va_end(args);
-
- fclose(veth_file);
- if (retval < 0) {
- VLOG_WARN_RL(&rl, "could not destroy patch: %s", strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-static int
-create_patch(const char *name, const char *peer)
-{
- int retval;
- struct netdev_dev *peer_nd;
-
-
- /* Only create the veth if the peer didn't already do it. */
- peer_nd = netdev_dev_from_name(peer);
- if (peer_nd) {
- if (!strcmp("patch", netdev_dev_get_type(peer_nd))) {
- struct netdev_dev_linux *ndl = netdev_dev_linux_cast(peer_nd);
- if (!strcmp(name, ndl->state.patch.peer)) {
- return 0;
- } else {
- VLOG_WARN_RL(&rl, "peer '%s' already paired with '%s'",
- peer, ndl->state.patch.peer);
- return EINVAL;
- }
- } else {
- VLOG_WARN_RL(&rl, "peer '%s' exists and is not a patch", peer);
- return EINVAL;
- }
- }
-
- retval = modify_veth("+%s,%s", name, peer);
- if (retval) {
- return retval;
- }
-
- retval = if_up(name);
- if (retval) {
- return retval;
- }
-
- retval = if_up(peer);
- if (retval) {
- return retval;
- }
-
- return 0;
-}
-
-static int
-setup_patch(const char *name, const struct shash *args, char **peer_)
-{
- const char *peer;
-
- peer = shash_find_data(args, "peer");
- if (!peer) {
- VLOG_WARN("patch type requires valid 'peer' argument");
- return EINVAL;
- }
-
- if (shash_count(args) > 1) {
- VLOG_WARN("patch type takes only a 'peer' argument");
- return EINVAL;
- }
-
- if (strlen(peer) >= IFNAMSIZ) {
- VLOG_WARN_RL(&rl, "patch 'peer' arg too long");
- return EINVAL;
- }
-
- *peer_ = xstrdup(peer);
- return create_patch(name, peer);
-}
-
/* Creates the netdev device of 'type' with 'name'. */
static int
netdev_linux_create_system(const char *name, const char *type OVS_UNUSED,
return error;
}
-static int
-netdev_linux_create_patch(const char *name, const char *type OVS_UNUSED,
- const struct shash *args, struct netdev_dev **netdev_devp)
-{
- struct netdev_dev_linux *netdev_dev;
- char *peer = NULL;
- int error;
-
- error = setup_patch(name, args, &peer);
- if (error) {
- free(peer);
- return error;
- }
-
- netdev_dev = xzalloc(sizeof *netdev_dev);
- netdev_dev->state.patch.peer = peer;
- netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_patch_class);
- *netdev_devp = &netdev_dev->netdev_dev;
-
- return 0;
-}
-
static void
destroy_tap(struct netdev_dev_linux *netdev_dev)
{
}
}
-static void
-destroy_patch(struct netdev_dev_linux *netdev_dev)
-{
- const char *name = netdev_dev_get_name(&netdev_dev->netdev_dev);
- struct patch_state *state = &netdev_dev->state.patch;
-
- /* Only destroy veth if 'peer' doesn't exist as an existing netdev. */
- if (!netdev_dev_from_name(state->peer)) {
- modify_veth("-%s", name);
- }
- free(state->peer);
-}
-
/* Destroys the netdev device 'netdev_dev_'. */
static void
netdev_linux_destroy(struct netdev_dev *netdev_dev_)
}
} else if (!strcmp(type, "tap")) {
destroy_tap(netdev_dev);
- } else if (!strcmp(type, "patch")) {
- destroy_patch(netdev_dev);
}
free(netdev_dev);
#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress"
#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1"
-/* We redirect stderr to /dev/null because we often want to remove all
- * traffic control configuration on a port so its in a known state. If
- * this done when there is no such configuration, tc complains, so we just
- * always ignore it.
+
+/* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise a
+ * positive errno value.
+ *
+ * This function is equivalent to running
+ * /sbin/tc qdisc del dev %s handle ffff: ingress
+ * but it is much, much faster.
*/
-#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null"
+static int
+netdev_linux_remove_policing(struct netdev *netdev)
+{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
+ const char *netdev_name = netdev_get_name(netdev);
+
+ struct ofpbuf request;
+ struct ofpbuf *reply;
+ struct tcmsg *tcmsg;
+ struct nl_sock *rtnl_sock;
+ int ifindex;
+ int error;
+
+ error = get_ifindex(netdev, &ifindex);
+ if (error) {
+ return error;
+ }
+
+ error = get_rtnl_sock(&rtnl_sock);
+ if (error) {
+ return error;
+ }
+
+ ofpbuf_init(&request, 0);
+ nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *tcmsg,
+ RTM_DELQDISC, NLM_F_REQUEST);
+ tcmsg = ofpbuf_put_zeros(&request, sizeof *tcmsg);
+ tcmsg->tcm_family = AF_UNSPEC;
+ tcmsg->tcm_ifindex = ifindex;
+ tcmsg->tcm_handle = 0xffff0000;
+ tcmsg->tcm_parent = TC_H_INGRESS;
+ nl_msg_put_string(&request, TCA_KIND, "ingress");
+ nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
+ error = nl_sock_transact(rtnl_sock, &request, &reply);
+ ofpbuf_uninit(&request);
+ ofpbuf_delete(reply);
+ if (error && error != ENOENT && error != EINVAL) {
+ VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
+ netdev_name, strerror(error));
+ return error;
+ }
+
+ netdev_dev->kbits_rate = 0;
+ netdev_dev->kbits_burst = 0;
+ netdev_dev->cache_valid |= VALID_POLICING;
+ return 0;
+}
/* Attempts to set input rate limiting (policing) policy. */
static int
netdev_linux_set_policing(struct netdev *netdev,
uint32_t kbits_rate, uint32_t kbits_burst)
{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
const char *netdev_name = netdev_get_name(netdev);
char command[1024];
COVERAGE_INC(netdev_set_policing);
- if (kbits_rate) {
- if (!kbits_burst) {
- /* Default to 1000 kilobits if not specified. */
- kbits_burst = 1000;
- }
- /* xxx This should be more careful about only adding if it
- * xxx actually exists, as opposed to always deleting it. */
- snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
- if (system(command) == -1) {
- VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
- }
+ kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */
+ : !kbits_burst ? 1000 /* Default to 1000 kbits if 0. */
+ : kbits_burst); /* Stick with user-specified value. */
+
+ if (netdev_dev->cache_valid & VALID_POLICING
+ && netdev_dev->kbits_rate == kbits_rate
+ && netdev_dev->kbits_burst == kbits_burst) {
+ /* Assume that settings haven't changed since we last set them. */
+ return 0;
+ }
+ netdev_linux_remove_policing(netdev);
+ if (kbits_rate) {
snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name);
if (system(command) != 0) {
VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name);
netdev_name);
return -1;
}
- } else {
- snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
- if (system(command) == -1) {
- VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
- }
+
+ netdev_dev->kbits_rate = kbits_rate;
+ netdev_dev->kbits_burst = kbits_burst;
+ netdev_dev->cache_valid |= VALID_POLICING;
}
return 0;
netdev_linux_poll_remove,
};
-const struct netdev_class netdev_patch_class = {
- "patch",
-
- netdev_linux_init,
- netdev_linux_run,
- netdev_linux_wait,
-
- netdev_linux_create_patch,
- netdev_linux_destroy,
- NULL, /* reconfigure */
-
- netdev_linux_open,
- netdev_linux_close,
-
- NULL, /* enumerate */
-
- netdev_linux_recv,
- netdev_linux_recv_wait,
- netdev_linux_drain,
-
- netdev_linux_send,
- netdev_linux_send_wait,
-
- netdev_linux_set_etheraddr,
- netdev_linux_get_etheraddr,
- netdev_linux_get_mtu,
- netdev_linux_get_ifindex,
- netdev_linux_get_carrier,
- netdev_linux_get_stats,
- NULL, /* set_stats */
-
- netdev_linux_get_features,
- netdev_linux_set_advertisements,
- netdev_linux_get_vlan_vid,
- netdev_linux_set_policing,
-
- netdev_linux_get_in4,
- netdev_linux_set_in4,
- netdev_linux_get_in6,
- netdev_linux_add_router,
- netdev_linux_get_next_hop,
- netdev_linux_arp_lookup,
-
- netdev_linux_update_flags,
-
- netdev_linux_poll_add,
- netdev_linux_poll_remove,
-};
-
\f
static int
get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
.min_len = sizeof(struct rtnl_link_stats) },
};
-
- static struct nl_sock *rtnl_sock;
+ struct nl_sock *rtnl_sock;
struct ofpbuf request;
struct ofpbuf *reply;
struct ifinfomsg *ifi;
struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
int error;
- if (!rtnl_sock) {
- error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock);
- if (error) {
- VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
- strerror(error));
- return error;
- }
+ error = get_rtnl_sock(&rtnl_sock);
+ if (error) {
+ return error;
}
ofpbuf_init(&request, 0);
}
return error;
}
+
+/* Obtains a Netlink routing socket that is not subscribed to any multicast
+ * groups. Returns 0 if successful, otherwise a positive errno value. Stores
+ * the socket in '*rtnl_sockp' if successful, otherwise a null pointer. */
+static int
+get_rtnl_sock(struct nl_sock **rtnl_sockp)
+{
+ static struct nl_sock *sock;
+ int error;
+
+ if (!sock) {
+ error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &sock);
+ if (error) {
+ VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
+ strerror(error));
+ }
+ } else {
+ error = 0;
+ }
+
+ *rtnl_sockp = sock;
+ return error;
+}