X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fnetdev-linux.c;h=6ad08520050307f64aabf784e2dbaf86e862091d;hb=dd1ba5b3f4425c8eba008d1a93b044da63466812;hp=90e88c76d9efcf3c1c8a7a8291963be6fbc84d15;hpb=1f6e0fbd81b8f1786be0232f83792170a9f476d3;p=openvswitch diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 90e88c76..6ad08520 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -326,6 +327,9 @@ static unsigned int tc_buffer_per_jiffy(unsigned int rate); static struct tcmsg *tc_make_request(const struct netdev *, int type, unsigned int flags, struct ofpbuf *); static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp); +static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add); +static int tc_add_policer(struct netdev *netdev, int kbits_rate, + int kbits_burst); static int tc_parse_qdisc(const struct ofpbuf *, const char **kind, struct nlattr **options); @@ -364,7 +368,7 @@ struct netdev_dev_linux { struct in_addr address, netmask; struct in6_addr in6; int mtu; - bool carrier; + unsigned int ifi_flags; long long int carrier_resets; uint32_t kbits_rate; /* Policing data. */ uint32_t kbits_burst; @@ -399,8 +403,8 @@ static int netdev_linux_do_ioctl(const char *name, struct ifreq *, int cmd, const char *cmd_name); static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *, int cmd, const char *cmd_name); -static int get_flags(const struct netdev *, int *flagsp); -static int set_flags(struct netdev *, int flags); +static int get_flags(const struct netdev_dev *, unsigned int *flags); +static int set_flags(struct netdev *, unsigned int flags); static int do_get_ifindex(const char *netdev_name); static int get_ifindex(const struct netdev *, int *ifindexp); static int do_set_addr(struct netdev *netdev, @@ -411,7 +415,6 @@ static int set_etheraddr(const char *netdev_name, int hwaddr_family, const uint8_t[ETH_ADDR_LEN]); static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats); static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats); -static int get_carrier_via_sysfs(const char *name, bool *carrier); static int af_packet_sock(void); static void netdev_linux_miimon_run(void); static void netdev_linux_miimon_wait(void); @@ -480,12 +483,18 @@ netdev_linux_wait(void) } static void -netdev_dev_linux_changed(struct netdev_dev_linux *dev) +netdev_dev_linux_changed(struct netdev_dev_linux *dev, unsigned int ifi_flags) { dev->change_seq++; if (!dev->change_seq) { dev->change_seq++; } + + if ((dev->ifi_flags ^ ifi_flags) & IFF_RUNNING) { + dev->carrier_resets++; + } + dev->ifi_flags = ifi_flags; + dev->cache_valid = 0; } @@ -502,13 +511,7 @@ netdev_linux_cache_cb(const struct rtnetlink_link_change *change, if (is_netdev_linux_class(netdev_class)) { dev = netdev_dev_linux_cast(base_dev); - - if (dev->carrier != change->running) { - dev->carrier = change->running; - dev->carrier_resets++; - } - - netdev_dev_linux_changed(dev); + netdev_dev_linux_changed(dev, change->ifi_flags); } } } else { @@ -518,17 +521,12 @@ netdev_linux_cache_cb(const struct rtnetlink_link_change *change, shash_init(&device_shash); netdev_dev_get_devices(&netdev_linux_class, &device_shash); SHASH_FOR_EACH (node, &device_shash) { - bool carrier; + unsigned int flags; dev = node->data; - get_carrier_via_sysfs(node->name, &carrier); - if (dev->carrier != carrier) { - dev->carrier = carrier; - dev->carrier_resets++; - } - - netdev_dev_linux_changed(dev); + get_flags(&dev->netdev_dev, &flags); + netdev_dev_linux_changed(dev, flags); } shash_destroy(&device_shash); } @@ -579,7 +577,7 @@ netdev_linux_create(const struct netdev_class *class, const char *name, netdev_dev = xzalloc(sizeof *netdev_dev); netdev_dev->change_seq = 1; netdev_dev_init(&netdev_dev->netdev_dev, name, class); - get_carrier_via_sysfs(name, &netdev_dev->carrier); + get_flags(&netdev_dev->netdev_dev, &netdev_dev->ifi_flags); *netdev_devp = &netdev_dev->netdev_dev; return 0; @@ -798,9 +796,9 @@ netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) } for (;;) { - ssize_t retval = read(netdev->fd, data, size); + ssize_t retval = recv(netdev->fd, data, size, MSG_TRUNC); if (retval >= 0) { - return retval; + return retval <= size ? retval : -EMSGSIZE; } else if (errno != EINTR) { if (errno != EAGAIN) { VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", @@ -1026,6 +1024,10 @@ netdev_linux_set_mtu(const struct netdev *netdev_, int mtu) struct ifreq ifr; int error; + if (netdev_dev->cache_valid & VALID_MTU && + netdev_dev->mtu == mtu) { + return 0; + } ifr.ifr_mtu = mtu; error = netdev_linux_do_ioctl(netdev_get_name(netdev_), &ifr, SIOCSIFMTU, "SIOCSIFMTU"); @@ -1058,7 +1060,7 @@ netdev_linux_get_carrier(const struct netdev *netdev_, bool *carrier) if (netdev_dev->miimon_interval > 0) { *carrier = netdev_dev->miimon; } else { - *carrier = netdev_dev->carrier; + *carrier = (netdev_dev->ifi_flags & IFF_RUNNING) != 0; } return 0; @@ -1164,7 +1166,7 @@ netdev_linux_miimon_run(void) netdev_linux_get_miimon(dev->netdev_dev.name, &miimon); if (miimon != dev->miimon) { dev->miimon = miimon; - netdev_dev_linux_changed(dev); + netdev_dev_linux_changed(dev, dev->ifi_flags); } timer_set_duration(&dev->miimon_timer, dev->miimon_interval); @@ -1240,8 +1242,8 @@ get_stats_via_vport(const struct netdev *netdev_, error = netdev_vport_get_stats(netdev_, stats); if (error) { - VLOG_WARN_RL(&rl, "%s: obtaining netdev stats via vport failed %d", - netdev_get_name(netdev_), error); + VLOG_WARN_RL(&rl, "%s: obtaining netdev stats via vport failed " + "(%s)", netdev_get_name(netdev_), strerror(error)); } netdev_dev->have_vport_stats = !error; netdev_dev->cache_valid |= VALID_HAVE_VPORT_STATS; @@ -1564,50 +1566,8 @@ netdev_linux_set_advertisements(struct netdev *netdev, uint32_t advertise) ETHTOOL_SSET, "ETHTOOL_SSET"); } -#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress" -#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1" - -/* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise a - * positive errno value. - * - * This function is equivalent to running - * /sbin/tc qdisc del dev %s handle ffff: ingress - * but it is much, much faster. - */ -static int -netdev_linux_remove_policing(struct netdev *netdev) -{ - struct netdev_dev_linux *netdev_dev = - netdev_dev_linux_cast(netdev_get_dev(netdev)); - const char *netdev_name = netdev_get_name(netdev); - - struct ofpbuf request; - struct tcmsg *tcmsg; - int error; - - tcmsg = tc_make_request(netdev, RTM_DELQDISC, 0, &request); - if (!tcmsg) { - return ENODEV; - } - tcmsg->tcm_handle = tc_make_handle(0xffff, 0); - tcmsg->tcm_parent = TC_H_INGRESS; - nl_msg_put_string(&request, TCA_KIND, "ingress"); - nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); - - error = tc_transact(&request, NULL); - if (error && error != ENOENT && error != EINVAL) { - VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", - netdev_name, strerror(error)); - return error; - } - - netdev_dev->kbits_rate = 0; - netdev_dev->kbits_burst = 0; - netdev_dev->cache_valid |= VALID_POLICING; - return 0; -} - -/* Attempts to set input rate limiting (policing) policy. */ +/* Attempts to set input rate limiting (policing) policy. Returns 0 if + * successful, otherwise a positive errno value. */ static int netdev_linux_set_policing(struct netdev *netdev, uint32_t kbits_rate, uint32_t kbits_burst) @@ -1615,7 +1575,7 @@ netdev_linux_set_policing(struct netdev *netdev, struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_get_dev(netdev)); const char *netdev_name = netdev_get_name(netdev); - char command[1024]; + int error; COVERAGE_INC(netdev_set_policing); @@ -1630,27 +1590,34 @@ netdev_linux_set_policing(struct netdev *netdev, return 0; } - netdev_linux_remove_policing(netdev); + /* Remove any existing ingress qdisc. */ + error = tc_add_del_ingress_qdisc(netdev, false); + if (error) { + VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", + netdev_name, strerror(error)); + return error; + } + if (kbits_rate) { - snprintf(command, sizeof(command), POLICE_ADD_CMD, netdev_name); - if (system(command) != 0) { - VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name); - return -1; + error = tc_add_del_ingress_qdisc(netdev, true); + if (error) { + VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s", + netdev_name, strerror(error)); + return error; } - snprintf(command, sizeof(command), POLICE_CONFIG_CMD, netdev_name, - kbits_rate, kbits_burst); - if (system(command) != 0) { - VLOG_WARN_RL(&rl, "%s: problem configuring policing", - netdev_name); - return -1; + error = tc_add_policer(netdev, kbits_rate, kbits_burst); + if (error){ + VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s", + netdev_name, strerror(error)); + return error; } - - netdev_dev->kbits_rate = kbits_rate; - netdev_dev->kbits_burst = kbits_burst; - netdev_dev->cache_valid |= VALID_POLICING; } + netdev_dev->kbits_rate = kbits_rate; + netdev_dev->kbits_burst = kbits_burst; + netdev_dev->cache_valid |= VALID_POLICING; + return 0; } @@ -2240,16 +2207,17 @@ static int netdev_linux_update_flags(struct netdev *netdev, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) { + struct netdev_dev_linux *netdev_dev; int old_flags, new_flags; - int error; + int error = 0; - error = get_flags(netdev, &old_flags); - if (!error) { - *old_flagsp = iff_to_nd_flags(old_flags); - new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); - if (new_flags != old_flags) { - error = set_flags(netdev, new_flags); - } + netdev_dev = netdev_dev_linux_cast(netdev_get_dev(netdev)); + old_flags = netdev_dev->ifi_flags; + *old_flagsp = iff_to_nd_flags(old_flags); + new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); + if (new_flags != old_flags) { + error = set_flags(netdev, new_flags); + get_flags(&netdev_dev->netdev_dev, &netdev_dev->ifi_flags); } return error; } @@ -3491,6 +3459,107 @@ tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) return error; } +/* Adds or deletes a root ingress qdisc on 'netdev'. We use this for + * policing configuration. + * + * This function is equivalent to running the following when 'add' is true: + * /sbin/tc qdisc add dev handle ffff: ingress + * + * This function is equivalent to running the following when 'add' is false: + * /sbin/tc qdisc del dev handle ffff: ingress + * + * The configuration and stats may be seen with the following command: + * /sbin/tc -s qdisc show dev + * + * Returns 0 if successful, otherwise a positive errno value. + */ +static int +tc_add_del_ingress_qdisc(struct netdev *netdev, bool add) +{ + struct ofpbuf request; + struct tcmsg *tcmsg; + int error; + int type = add ? RTM_NEWQDISC : RTM_DELQDISC; + int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0; + + tcmsg = tc_make_request(netdev, type, flags, &request); + if (!tcmsg) { + return ENODEV; + } + tcmsg->tcm_handle = tc_make_handle(0xffff, 0); + tcmsg->tcm_parent = TC_H_INGRESS; + nl_msg_put_string(&request, TCA_KIND, "ingress"); + nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); + + error = tc_transact(&request, NULL); + if (error) { + /* If we're deleting the qdisc, don't worry about some of the + * error conditions. */ + if (!add && (error == ENOENT || error == EINVAL)) { + return 0; + } + return error; + } + + return 0; +} + +/* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size + * of 'kbits_burst'. + * + * This function is equivalent to running: + * /sbin/tc filter add dev parent ffff: protocol all prio 49 + * basic police rate kbit burst k + * mtu 65535 drop + * + * The configuration and stats may be seen with the following command: + * /sbin/tc -s filter show eth0 parent ffff: + * + * Returns 0 if successful, otherwise a positive errno value. + */ +static int +tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst) +{ + struct tc_police tc_police; + struct ofpbuf request; + struct tcmsg *tcmsg; + size_t basic_offset; + size_t police_offset; + int error; + int mtu = 65535; + + memset(&tc_police, 0, sizeof tc_police); + tc_police.action = TC_POLICE_SHOT; + tc_police.mtu = mtu; + tc_fill_rate(&tc_police.rate, kbits_rate/8 * 1000, mtu); + tc_police.burst = tc_bytes_to_ticks(tc_police.rate.rate, + kbits_burst * 1024); + + tcmsg = tc_make_request(netdev, RTM_NEWTFILTER, + NLM_F_EXCL | NLM_F_CREATE, &request); + if (!tcmsg) { + return ENODEV; + } + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); + tcmsg->tcm_info = tc_make_handle(49, + (OVS_FORCE uint16_t) htons(ETH_P_ALL)); + + nl_msg_put_string(&request, TCA_KIND, "basic"); + basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS); + police_offset = nl_msg_start_nested(&request, TCA_BASIC_POLICE); + nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof tc_police); + tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate); + nl_msg_end_nested(&request, police_offset); + nl_msg_end_nested(&request, basic_offset); + + error = tc_transact(&request, NULL); + if (error) { + return error; + } + + return 0; +} + static void read_psched(void) { @@ -4144,71 +4213,22 @@ get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats) } static int -get_carrier_via_sysfs(const char *name, bool *carrier) -{ - char line[8]; - int retval; - - int error = 0; - char *fn = NULL; - int fd = -1; - - *carrier = false; - - fn = xasprintf("/sys/class/net/%s/carrier", name); - fd = open(fn, O_RDONLY); - if (fd < 0) { - error = errno; - VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(error)); - goto exit; - } - - retval = read(fd, line, sizeof line); - if (retval < 0) { - error = errno; - if (error == EINVAL) { - /* This is the normal return value when we try to check carrier if - * the network device is not up. */ - } else { - VLOG_WARN_RL(&rl, "%s: read failed: %s", fn, strerror(error)); - } - goto exit; - } else if (retval == 0) { - error = EPROTO; - VLOG_WARN_RL(&rl, "%s: unexpected end of file", fn); - goto exit; - } - - if (line[0] != '0' && line[0] != '1') { - error = EPROTO; - VLOG_WARN_RL(&rl, "%s: value is %c (expected 0 or 1)", fn, line[0]); - goto exit; - } - *carrier = line[0] != '0'; - error = 0; - -exit: - if (fd >= 0) { - close(fd); - } - free(fn); - return error; -} - -static int -get_flags(const struct netdev *netdev, int *flags) +get_flags(const struct netdev_dev *dev, unsigned int *flags) { struct ifreq ifr; int error; - error = netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr, SIOCGIFFLAGS, + *flags = 0; + error = netdev_linux_do_ioctl(dev->name, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS"); - *flags = ifr.ifr_flags; + if (!error) { + *flags = ifr.ifr_flags; + } return error; } static int -set_flags(struct netdev *netdev, int flags) +set_flags(struct netdev *netdev, unsigned int flags) { struct ifreq ifr;