#include <linux/ip.h>
#include <linux/types.h>
#include <linux/ethtool.h>
+#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/sockios.h>
#include <linux/version.h>
VALID_IN6 = 1 << 3,
VALID_MTU = 1 << 4,
VALID_CARRIER = 1 << 5,
- VALID_IS_PSEUDO = 1 << 6 /* Represents is_internal and is_tap. */
+ VALID_IS_PSEUDO = 1 << 6, /* Represents is_internal and is_tap. */
+ VALID_POLICING = 1 << 7
};
struct tap_state {
int carrier;
bool is_internal; /* Is this an openvswitch internal device? */
bool is_tap; /* Is this a tuntap device? */
+ uint32_t kbits_rate; /* Policing data. */
+ uint32_t kbits_burst;
union {
struct tap_state tap;
const uint8_t[ETH_ADDR_LEN]);
static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats);
static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats);
+static int get_rtnl_sock(struct nl_sock **);
static bool
is_netdev_linux_class(const struct netdev_class *netdev_class)
#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress"
#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1"
-/* We redirect stderr to /dev/null because we often want to remove all
- * traffic control configuration on a port so its in a known state. If
- * this done when there is no such configuration, tc complains, so we just
- * always ignore it.
+
+/* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise a
+ * positive errno value.
+ *
+ * This function is equivalent to running
+ * /sbin/tc qdisc del dev %s handle ffff: ingress
+ * but it is much, much faster.
*/
-#define POLICE_DEL_CMD "/sbin/tc qdisc del dev %s handle ffff: ingress 2>/dev/null"
+static int
+netdev_linux_remove_policing(struct netdev *netdev)
+{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
+ const char *netdev_name = netdev_get_name(netdev);
+
+ struct ofpbuf request;
+ struct ofpbuf *reply;
+ struct tcmsg *tcmsg;
+ struct nl_sock *rtnl_sock;
+ int ifindex;
+ int error;
+
+ error = get_ifindex(netdev, &ifindex);
+ if (error) {
+ return error;
+ }
+
+ error = get_rtnl_sock(&rtnl_sock);
+ if (error) {
+ return error;
+ }
+
+ ofpbuf_init(&request, 0);
+ nl_msg_put_nlmsghdr(&request, rtnl_sock, sizeof *tcmsg,
+ RTM_DELQDISC, NLM_F_REQUEST);
+ tcmsg = ofpbuf_put_zeros(&request, sizeof *tcmsg);
+ tcmsg->tcm_family = AF_UNSPEC;
+ tcmsg->tcm_ifindex = ifindex;
+ tcmsg->tcm_handle = 0xffff0000;
+ tcmsg->tcm_parent = TC_H_INGRESS;
+ nl_msg_put_string(&request, TCA_KIND, "ingress");
+ nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
+ error = nl_sock_transact(rtnl_sock, &request, &reply);
+ ofpbuf_uninit(&request);
+ ofpbuf_delete(reply);
+ if (error && error != ENOENT) {
+ VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
+ netdev_name, strerror(error));
+ return error;
+ }
+
+ netdev_dev->kbits_rate = 0;
+ netdev_dev->kbits_burst = 0;
+ netdev_dev->cache_valid |= VALID_POLICING;
+ return 0;
+}
/* Attempts to set input rate limiting (policing) policy. */
static int
netdev_linux_set_policing(struct netdev *netdev,
uint32_t kbits_rate, uint32_t kbits_burst)
{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
const char *netdev_name = netdev_get_name(netdev);
char command[1024];
COVERAGE_INC(netdev_set_policing);
- if (kbits_rate) {
- if (!kbits_burst) {
- /* Default to 1000 kilobits if not specified. */
- kbits_burst = 1000;
- }
- /* xxx This should be more careful about only adding if it
- * xxx actually exists, as opposed to always deleting it. */
- snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
- if (system(command) == -1) {
- VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
- }
+ kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */
+ : !kbits_burst ? 1000 /* Default to 1000 kbits if 0. */
+ : kbits_burst); /* Stick with user-specified value. */
+ if (netdev_dev->cache_valid & VALID_POLICING
+ && netdev_dev->kbits_rate == kbits_rate
+ && netdev_dev->kbits_burst == kbits_burst) {
+ /* Assume that settings haven't changed since we last set them. */
+ return 0;
+ }
+
+ netdev_linux_remove_policing(netdev);
+ if (kbits_rate) {
snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name);
if (system(command) != 0) {
VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name);
netdev_name);
return -1;
}
- } else {
- snprintf(command, sizeof(command), POLICE_DEL_CMD, netdev_name);
- if (system(command) == -1) {
- VLOG_WARN_RL(&rl, "%s: problem removing policing", netdev_name);
- }
+
+ netdev_dev->kbits_rate = kbits_rate;
+ netdev_dev->kbits_burst = kbits_burst;
+ netdev_dev->cache_valid |= VALID_POLICING;
}
return 0;
.min_len = sizeof(struct rtnl_link_stats) },
};
-
- static struct nl_sock *rtnl_sock;
+ struct nl_sock *rtnl_sock;
struct ofpbuf request;
struct ofpbuf *reply;
struct ifinfomsg *ifi;
struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
int error;
- if (!rtnl_sock) {
- error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &rtnl_sock);
- if (error) {
- VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
- strerror(error));
- return error;
- }
+ error = get_rtnl_sock(&rtnl_sock);
+ if (error) {
+ return error;
}
ofpbuf_init(&request, 0);
}
return error;
}
+
+/* Obtains a Netlink routing socket that is not subscribed to any multicast
+ * groups. Returns 0 if successful, otherwise a positive errno value. Stores
+ * the socket in '*rtnl_sockp' if successful, otherwise a null pointer. */
+static int
+get_rtnl_sock(struct nl_sock **rtnl_sockp)
+{
+ static struct nl_sock *sock;
+ int error;
+
+ if (!sock) {
+ error = nl_sock_create(NETLINK_ROUTE, 0, 0, 0, &sock);
+ if (error) {
+ VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
+ strerror(error));
+ }
+ } else {
+ error = 0;
+ }
+
+ *rtnl_sockp = sock;
+ return error;
+}