From f6eb6b202533ad393b8d1d8e6ae6e502cbf19caf Mon Sep 17 00:00:00 2001 From: Giuseppe Lettieri Date: Wed, 25 Jul 2012 22:51:05 +0200 Subject: [PATCH] netdev implementation for FreeBSD This patch adds new netdev classes that implement "system" and "tap" devices on FreeBSD using the libpcap library. This enables the use of the "netdev" datapath_type of Open vSwitch on FreeBSD. Signed-off-by: Gaetano Catalli Signed-off-by: Ed Maste Signed-off-by: Giuseppe Lettieri Signed-off-by: Ben Pfaff --- INSTALL.Linux => INSTALL | 39 +- INSTALL.KVM | 6 +- INSTALL.Libvirt | 2 +- INSTALL.RHEL | 2 +- INSTALL.SSL | 4 +- INSTALL.XenServer | 6 +- INSTALL.bridge | 2 +- INSTALL.userspace | 6 +- Makefile.am | 2 +- NEWS | 2 + NOTICE | 3 + README | 4 +- acinclude.m4 | 2 +- configure.ac | 1 + debian/copyright.in | 1 + lib/automake.mk | 3 + lib/netdev-bsd.c | 1485 ++++++++++++++++++++++++++++++++++ lib/netdev-provider.h | 3 + lib/netdev.c | 4 + lib/route-table.h | 1 + lib/rtbsd.c | 167 ++++ lib/rtbsd.h | 58 ++ ofproto/ofproto-dpif-sflow.c | 1 + 23 files changed, 1770 insertions(+), 34 deletions(-) rename INSTALL.Linux => INSTALL (91%) create mode 100644 lib/netdev-bsd.c create mode 100644 lib/rtbsd.c create mode 100644 lib/rtbsd.h diff --git a/INSTALL.Linux b/INSTALL similarity index 91% rename from INSTALL.Linux rename to INSTALL index 78152b69..671c4a49 100644 --- a/INSTALL.Linux +++ b/INSTALL @@ -1,9 +1,9 @@ - How to Install Open vSwitch on Linux - ==================================== + How to Install Open vSwitch on Linux and FreeBSD + ================================================ This document describes how to build and install Open vSwitch on a -generic Linux host. If you want to install Open vSwitch on a Citrix -XenServer, see INSTALL.XenServer instead. +generic Linux or FreeBSD host. If you want to install Open vSwitch on +a Citrix XenServer, see INSTALL.XenServer instead. This version of Open vSwitch may be built manually with "configure" and "make", as described below. You may also build Debian packages by @@ -29,9 +29,9 @@ you will need the following software: libssl is installed, then Open vSwitch will automatically build with support for it. -To compile the kernel module, you must also install the following. If -you cannot build or install the kernel module, you may use the -userspace-only implementation, at a cost in performance. The +To compile the kernel module on Linux, you must also install the +following. If you cannot build or install the kernel module, you may +use the userspace-only implementation, at a cost in performance. The userspace implementation may also lack some features. Refer to INSTALL.userspace for more information. @@ -119,14 +119,14 @@ following software: - libssl compatible with the libssl used for build, if OpenSSL was used for the build. - - The Linux kernel version configured as part of the build. + - On Linux, the same kernel version configured as part of the build. - - For optional support of ingress policing, the "tc" program from - iproute2 (part of all major distributions and available at + - For optional support of ingress policing on Linux, the "tc" program + from iproute2 (part of all major distributions and available at http://www.linux-foundation.org/en/Net:Iproute2). -You should ensure that /dev/urandom exists. To support TAP devices, -you must also ensure that /dev/net/tun exists. +On Linux you should ensure that /dev/urandom exists. To support TAP +devices, you must also ensure that /dev/net/tun exists. To run the ovsdbmonitor tool, the machine must also have the following software: @@ -144,13 +144,13 @@ software: (On Debian "lenny" the above can be installed with "apt-get install python-json python-qt4 python-zopeinterface python-twisted-conch".) -Building and Installing Open vSwitch for Linux -============================================== +Building and Installing Open vSwitch for Linux or FreeBSD +========================================================= Once you have installed all the prerequisites listed above in the Base Prerequisites section, follow the procedure below to build. -1. If you pulled the sources directly from an Open vSwitch Git tree, +1. If you pulled the sources directly from an Open vSwitch Git tree, run boot.sh in the top source directory: % ./boot.sh @@ -191,10 +191,13 @@ Prerequisites section, follow the procedure below to build. additional environment variables. For a full list, invoke configure with the --help option. -3. Run make in the top source directory: +3. Run make in the top source directory: % make + On FreeBSD you may need to use GNU make (gmake) or NetBSD make + (bmake) instead of the native make. + For improved warnings if you installed "sparse" (see "Prerequisites"), add C=1 to the "make" command line. @@ -322,7 +325,7 @@ also upgrade the database schema: - If there is no important data in your database, then you may delete the database file and recreate it with ovsdb-tool, following the instructions under "Building and Installing Open - vSwitch for Linux". + vSwitch for Linux or FreeBSD". - If you want to preserve the contents of your database, back it up first, then use "ovsdb-tool convert" to upgrade it, e.g.: @@ -330,7 +333,7 @@ also upgrade the database schema: % ovsdb-tool convert /usr/local/etc/openvswitch/conf.db vswitchd/vswitch.ovsschema 4. Start the Open vSwitch daemons as described under "Building and - Installing Open vSwitch for Linux" above. + Installing Open vSwitch for Linux or FreeBSD" above. Bug Reporting ------------- diff --git a/INSTALL.KVM b/INSTALL.KVM index 61620e51..18a7bd4f 100644 --- a/INSTALL.KVM +++ b/INSTALL.KVM @@ -3,12 +3,12 @@ This document describes how to use Open vSwitch with the Kernel-based Virtual Machine (KVM). This document assumes that you have read and -followed INSTALL.Linux to get Open vSwitch setup on your Linux system. +followed INSTALL to get Open vSwitch setup on your Linux system. Setup ----- -First, follow the setup instructions in INSTALL.Linux to get a working +First, follow the setup instructions in INSTALL to get a working Open vSwitch installation. KVM uses tunctl to handle various bridging modes, which you can @@ -43,7 +43,7 @@ switch='br0' ovs-vsctl del-port ${switch} $1 -------------------------------------------------------------------- -At the end of INSTALL.Linux, it describes basic usage of creating +At the end of INSTALL, it describes basic usage of creating bridges and ports. If you haven't already, create a bridge named br0 with the following command: diff --git a/INSTALL.Libvirt b/INSTALL.Libvirt index 4f318313..1bc45d5e 100644 --- a/INSTALL.Libvirt +++ b/INSTALL.Libvirt @@ -4,7 +4,7 @@ This document describes how to use Open vSwitch with Libvirt 0.9.11 or later. The Open vSwitch support in Libvirt 0.9.11 eliminates the need to use OVS Linux Bridge compatibility layer (brcompatd) and interface up/down -scripts. This document assumes that you followed INSTALL.Linux or installed +scripts. This document assumes that you followed INSTALL or installed Open vSwitch from distribution packaging such as a .deb or .rpm. The Open vSwitch support is included by default in Libvirt 0.9.11. Consult www.libvirt.org for instructions on how to build the latest Libvirt, if your diff --git a/INSTALL.RHEL b/INSTALL.RHEL index 039a8570..ff79c894 100644 --- a/INSTALL.RHEL +++ b/INSTALL.RHEL @@ -3,7 +3,7 @@ This document describes how to build and install Open vSwitch on a Red Hat Enterprise Linux (RHEL) host. If you want to install Open vSwitch -on a generic Linux host, see INSTALL.Linux instead. +on a generic Linux host, see INSTALL instead. We have tested these instructions with RHEL 5.6 and RHEL 6.0. diff --git a/INSTALL.SSL b/INSTALL.SSL index f322b413..8511b69e 100644 --- a/INSTALL.SSL +++ b/INSTALL.SSL @@ -7,8 +7,8 @@ with OpenSSL. SSL support ensures integrity and confidentiality of the OpenFlow connections, increasing network security. This file explains how to configure an Open vSwitch to connect to an -OpenFlow controller over SSL. Refer to INSTALL.Linux for instructions -on building Open vSwitch with SSL support. +OpenFlow controller over SSL. Refer to INSTALL for instructions on +building Open vSwitch with SSL support. Open vSwitch uses TLS version 1.0 or later (TLSv1), as specified by RFC 2246, which is very similar to SSL version 3.0. TLSv1 was diff --git a/INSTALL.XenServer b/INSTALL.XenServer index e45d5bca..7a4dd76e 100644 --- a/INSTALL.XenServer +++ b/INSTALL.XenServer @@ -3,7 +3,7 @@ This document describes how to build and install Open vSwitch on a Citrix XenServer host. If you want to install Open vSwitch on a -generic Linux host, see INSTALL.Linux instead. +generic Linux or BSD host, see INSTALL instead. These instructions have been tested with XenServer 5.6 FP1. @@ -19,8 +19,8 @@ RPMs for Citrix XenServer is the DDK VM available from Citrix. ./configure; make dist" in the Git tree. You cannot run this in the DDK VM, because it lacks tools that are necessary to bootstrap the Open vSwitch distribution. Instead, you must run this on a - machine that has the tools listed in INSTALL.Linux as prerequisites - for building from a Git tree. + machine that has the tools listed in INSTALL as prerequisites for + building from a Git tree. 2. Copy the distribution tarball into /usr/src/redhat/SOURCES inside the DDK VM. diff --git a/INSTALL.bridge b/INSTALL.bridge index dec09d8d..af20bff0 100644 --- a/INSTALL.bridge +++ b/INSTALL.bridge @@ -25,7 +25,7 @@ that the Open vSwitch kernel modules are loaded before the Linux kernel bridge module. 1. Build, install, and start up the Open vSwitch kernel modules and - userspace programs as described in INSTALL.Linux. + userspace programs as described in INSTALL. It is important to run "make install", because some Open vSwitch programs expect to find files in locations selected at installation diff --git a/INSTALL.userspace b/INSTALL.userspace index 10511b16..296f0771 100644 --- a/INSTALL.userspace +++ b/INSTALL.userspace @@ -17,7 +17,7 @@ Building and Installing ----------------------- The requirements and procedure for building, installing, and -configuring Open vSwitch are the same as those given in INSTALL.Linux. +configuring Open vSwitch are the same as those given in INSTALL. You may omit configuring, building, and installing the kernel module, and the related requirements. @@ -31,6 +31,10 @@ The tun device must also exist as /dev/net/tun. If it does not exist, then create /dev/net (if necessary) with "mkdir /dev/net", then create /dev/net/tun with "mknod /dev/net/tun c 10 200". +On FreeBSD, the userspace switch additionally requires the kernel +tap(4) driver to be available, either built into the kernel or loaded +as a module. + Using the Userspace Datapath with ovs-vswitchd ---------------------------------------------- diff --git a/Makefile.am b/Makefile.am index 50c4b142..9cad47b7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,9 +42,9 @@ EXTRA_DIST = \ CodingStyle \ DESIGN \ FAQ \ + INSTALL \ INSTALL.KVM \ INSTALL.Libvirt \ - INSTALL.Linux \ INSTALL.RHEL \ INSTALL.SSL \ INSTALL.XenServer \ diff --git a/NEWS b/NEWS index cc52322a..34fa0ea9 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ post-v1.8.0 ------------------------ + - FreeBSD is now a supported platform, thanks to code contributions from + Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. - New %t and %T log escapes to identify the subprogram within a cooperating group of processes or threads that emitted a log message. diff --git a/NOTICE b/NOTICE index 912d7b87..dafd25fd 100644 --- a/NOTICE +++ b/NOTICE @@ -4,6 +4,9 @@ available at http://www.apache.org/licenses/LICENSE-2.0.html Open vSwitch Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira, Inc. +Open vSwitch BSD port +Copyright (c) 2011 Gaetano Catalli + Apache Portable Runtime Copyright 2008 The Apache Software Foundation. diff --git a/README b/README index aa317820..7c680d49 100644 --- a/README +++ b/README @@ -88,7 +88,7 @@ Open vSwitch also provides some tools: What other documentation is available? -------------------------------------- -To install Open vSwitch on a regular Linux machine, read INSTALL.Linux. +To install Open vSwitch on a regular Linux machine, read INSTALL. For answers to common questions, read FAQ. @@ -101,7 +101,7 @@ or resource pool, read INSTALL.XenServer. To build RPMs for installing Open vSwitch on a Red Hat Enterprise Linux host, read INSTALL.RHEL. -To use Open vSwitch with KVM on Linux, read INSTALL.Linux, then +To use Open vSwitch with KVM on Linux, read INSTALL, then INSTALL.KVM. To use Open vSwitch with Libvirt, read INSTALL.Libvirt. diff --git a/acinclude.m4 b/acinclude.m4 index c332b41a..d502b000 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -288,7 +288,7 @@ AC_DEFUN([OVS_CHECK_IF_PACKET], [Define to 1 if net/if_packet.h is available.]) fi]) -dnl Checks for net/if_dl.h +dnl Checks for net/if_dl.h. AC_DEFUN([OVS_CHECK_IF_DL], [AC_CHECK_HEADER([net/if_dl.h], [HAVE_IF_DL=yes], diff --git a/configure.ac b/configure.ac index 6006766d..b18b2596 100644 --- a/configure.ac +++ b/configure.ac @@ -41,6 +41,7 @@ AC_SYS_LARGEFILE AC_SEARCH_LIBS([pow], [m]) AC_SEARCH_LIBS([clock_gettime], [rt]) AC_SEARCH_LIBS([timer_create], [rt]) +AC_SEARCH_LIBS([pcap_open_live], [pcap]) OVS_CHECK_COVERAGE OVS_CHECK_NDEBUG diff --git a/debian/copyright.in b/debian/copyright.in index 71aa6803..16e55599 100644 --- a/debian/copyright.in +++ b/debian/copyright.in @@ -12,6 +12,7 @@ Upstream Copyright Holders: Copyright (c) 2010 Jean Tourrilhes - HP-Labs. Copyright (c) 2008,2009,2010 Citrix Systems, Inc. and authors listed above. + Copyright (c) 2011 Gaetano Catalli License: diff --git a/lib/automake.mk b/lib/automake.mk index b727fed6..5223423f 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -244,6 +244,9 @@ endif if HAVE_IF_DL lib_libopenvswitch_a_SOURCES += \ + lib/netdev-bsd.c \ + lib/rtbsd.c \ + lib/rtbsd.h \ lib/route-table-bsd.c endif diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c new file mode 100644 index 00000000..0b1a37c3 --- /dev/null +++ b/lib/netdev-bsd.c @@ -0,0 +1,1485 @@ +/* + * Copyright (c) 2011 Gaetano Catalli. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rtbsd.h" +#include "coverage.h" +#include "dynamic-string.h" +#include "fatal-signal.h" +#include "netdev-provider.h" +#include "ofpbuf.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "poll-loop.h" +#include "socket-util.h" +#include "shash.h" +#include "svec.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(netdev_bsd); + + +/* + * This file implements objects to access interfaces. + * Externally, interfaces are represented by two structures: + * + struct netdev_dev, representing a network device, + * containing e.g. name and a refcount; + * We can have private variables by embedding the + * struct netdev_dev into our own structure + * (e.g. netdev_dev_bsd) + * + * + struct netdev, representing an instance of an open netdev_dev. + * The structure contains a pointer to the 'struct netdev' + * representing the device. Again, private information + * such as file descriptor etc. are stored in our + * own struct netdev_bsd which includes a struct netdev. + * + * Both 'struct netdev' and 'struct netdev_dev' are referenced + * in containers which hold pointers to the data structures. + * We can reach our own struct netdev_XXX_bsd by putting a + * struct netdev_XXX within our own struct, and using CONTAINER_OF + * to access the parent structure. + */ +struct netdev_bsd { + struct netdev netdev; + + int netdev_fd; /* Selectable file descriptor for the network device. + This descriptor will be used for polling operations */ + + pcap_t *pcap_handle; /* Packet capture descriptor for a system network + device */ +}; + +struct netdev_dev_bsd { + struct netdev_dev netdev_dev; + unsigned int cache_valid; + unsigned int change_seq; + + int ifindex; + uint8_t etheraddr[ETH_ADDR_LEN]; + struct in_addr in4; + struct in6_addr in6; + int mtu; + int carrier; + + bool tap_opened; + int tap_fd; /* TAP character device, if any */ +}; + + +enum { + VALID_IFINDEX = 1 << 0, + VALID_ETHERADDR = 1 << 1, + VALID_IN4 = 1 << 2, + VALID_IN6 = 1 << 3, + VALID_MTU = 1 << 4, + VALID_CARRIER = 1 << 5 +}; + +/* An AF_INET socket (used for ioctl operations). */ +static int af_inet_sock = -1; + +#define PCAP_SNAPLEN 2048 + + +/* + * Notifier used to invalidate device informations in case of status change. + * + * It will be registered with a 'rtbsd_notifier_register()' when the first + * device will be created with the call of either 'netdev_bsd_tap_create()' or + * 'netdev_bsd_system_create()'. + * + * The callback associated with this notifier ('netdev_bsd_cache_cb()') will + * invalidate cached information about the device. + */ +static struct rtbsd_notifier netdev_bsd_cache_notifier; +static int cache_notifier_refcount; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + +static int netdev_bsd_do_ioctl(const struct netdev *, struct ifreq *, + unsigned long cmd, const char *cmd_name); +static void destroy_tap(int fd, const char *name); +static int get_flags(const struct netdev *, int *flagsp); +static int set_flags(struct netdev *, int flags); +static int do_set_addr(struct netdev *netdev, + int ioctl_nr, const char *ioctl_name, + struct in_addr addr); +static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]); +static int set_etheraddr(const char *netdev_name, int hwaddr_family, + int hwaddr_len, const uint8_t[ETH_ADDR_LEN]); +static int get_ifindex(const struct netdev *, int *ifindexp); + +static int netdev_bsd_init(void); + +static bool +is_netdev_bsd_class(const struct netdev_class *netdev_class) +{ + return netdev_class->init == netdev_bsd_init; +} + +static struct netdev_bsd * +netdev_bsd_cast(const struct netdev *netdev) +{ + assert(is_netdev_bsd_class(netdev_dev_get_class(netdev_get_dev(netdev)))); + return CONTAINER_OF(netdev, struct netdev_bsd, netdev); +} + +static struct netdev_dev_bsd * +netdev_dev_bsd_cast(const struct netdev_dev *netdev_dev) +{ + assert(is_netdev_bsd_class(netdev_dev_get_class(netdev_dev))); + return CONTAINER_OF(netdev_dev, struct netdev_dev_bsd, netdev_dev); +} + +/* Initialize the AF_INET socket used for ioctl operations */ +static int +netdev_bsd_init(void) +{ + static int status = -1; + + if (status >= 0) { /* already initialized */ + return status; + } + + af_inet_sock = socket(AF_INET, SOCK_DGRAM, 0); + status = af_inet_sock >= 0 ? 0 : errno; + + if (status) { + VLOG_ERR("failed to create inet socket: %s", strerror(status)); + } + + return status; +} + +/* + * Perform periodic work needed by netdev. In BSD netdevs it checks for any + * interface status changes, and eventually calls all the user callbacks. + */ +static void +netdev_bsd_run(void) +{ + rtbsd_notifier_run(); +} + +/* + * Arranges for poll_block() to wake up if the "run" member function needs to + * be called. + */ +static void +netdev_bsd_wait(void) +{ + rtbsd_notifier_wait(); +} + +static void +netdev_dev_bsd_changed(struct netdev_dev_bsd *dev) +{ + dev->change_seq++; + if (!dev->change_seq) { + dev->change_seq++; + } +} + +/* Invalidate cache in case of interface status change. */ +static void +netdev_bsd_cache_cb(const struct rtbsd_change *change, + void *aux OVS_UNUSED) +{ + struct netdev_dev_bsd *dev; + + if (change) { + struct netdev_dev *base_dev = netdev_dev_from_name(change->if_name); + + if (base_dev) { + const struct netdev_class *netdev_class = + netdev_dev_get_class(base_dev); + + if (is_netdev_bsd_class(netdev_class)) { + dev = netdev_dev_bsd_cast(base_dev); + dev->cache_valid = 0; + netdev_dev_bsd_changed(dev); + } + } + } else { + /* + * XXX the API is lacking, we should be able to iterate on the list of + * netdevs without having to store the info in a temp shash. + */ + struct shash device_shash; + struct shash_node *node; + + shash_init(&device_shash); + netdev_dev_get_devices(&netdev_bsd_class, &device_shash); + SHASH_FOR_EACH (node, &device_shash) { + dev = node->data; + dev->cache_valid = 0; + netdev_dev_bsd_changed(dev); + } + shash_destroy(&device_shash); + } +} + +static int +cache_notifier_ref(void) +{ + int ret = 0; + + if (!cache_notifier_refcount) { + ret = rtbsd_notifier_register(&netdev_bsd_cache_notifier, + netdev_bsd_cache_cb, NULL); + if (ret) { + return ret; + } + } + cache_notifier_refcount++; + return 0; +} + +static int +cache_notifier_unref(void) +{ + cache_notifier_refcount--; + if (cache_notifier_refcount == 0) { + rtbsd_notifier_unregister(&netdev_bsd_cache_notifier); + } + return 0; +} + +/* Allocate a netdev_dev_bsd structure */ +static int +netdev_bsd_create_system(const struct netdev_class *class, const char *name, + struct netdev_dev **netdev_devp) +{ + struct netdev_dev_bsd *netdev_dev; + int error; + + error = cache_notifier_ref(); + if (error) { + return error; + } + + netdev_dev = xzalloc(sizeof *netdev_dev); + netdev_dev->change_seq = 1; + netdev_dev_init(&netdev_dev->netdev_dev, name, class); + *netdev_devp = &netdev_dev->netdev_dev; + + return 0; +} + +/* + * Allocate a netdev_dev_bsd structure with 'tap' class. + */ +static int +netdev_bsd_create_tap(const struct netdev_class *class, const char *name, + struct netdev_dev **netdev_devp) +{ + struct netdev_dev_bsd *netdev_dev = NULL; + int error = 0; + struct ifreq ifr; + + error = cache_notifier_ref(); + if (error) { + goto error; + } + + /* allocate the device structure and set the internal flag */ + netdev_dev = xzalloc(sizeof *netdev_dev); + + memset(&ifr, 0, sizeof(ifr)); + + /* Create a tap device by opening /dev/tap. The TAPGIFNAME ioctl is used + * to retrieve the name of the tap device. */ + netdev_dev->tap_fd = open("/dev/tap", O_RDWR); + netdev_dev->change_seq = 1; + if (netdev_dev->tap_fd < 0) { + error = errno; + VLOG_WARN("opening \"/dev/tap\" failed: %s", strerror(error)); + goto error_undef_notifier; + } + + /* Retrieve tap name (e.g. tap0) */ + if (ioctl(netdev_dev->tap_fd, TAPGIFNAME, &ifr) == -1) { + /* XXX Need to destroy the device? */ + error = errno; + goto error_undef_notifier; + } + + /* Change the name of the tap device */ + ifr.ifr_data = (void *)name; + if (ioctl(af_inet_sock, SIOCSIFNAME, &ifr) == -1) { + error = errno; + destroy_tap(netdev_dev->tap_fd, ifr.ifr_name); + goto error_undef_notifier; + } + + /* set non-blocking. */ + error = set_nonblocking(netdev_dev->tap_fd); + if (error) { + destroy_tap(netdev_dev->tap_fd, name); + goto error_undef_notifier; + } + + /* Turn device UP */ + ifr.ifr_flags = (uint16_t)IFF_UP; + ifr.ifr_flagshigh = 0; + strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); + if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) { + error = errno; + destroy_tap(netdev_dev->tap_fd, name); + goto error_undef_notifier; + } + + /* initialize the device structure and + * link the structure to its netdev */ + netdev_dev_init(&netdev_dev->netdev_dev, name, class); + *netdev_devp = &netdev_dev->netdev_dev; + + return 0; + +error_undef_notifier: + cache_notifier_unref(); +error: + free(netdev_dev); + return error; +} + +static void +netdev_bsd_destroy(struct netdev_dev *netdev_dev_) +{ + struct netdev_dev_bsd *netdev_dev = netdev_dev_bsd_cast(netdev_dev_); + + cache_notifier_unref(); + + if (netdev_dev->tap_fd >= 0 && + !strcmp(netdev_dev_get_type(netdev_dev_), "tap")) { + destroy_tap(netdev_dev->tap_fd, netdev_dev_get_name(netdev_dev_)); + } + free(netdev_dev); +} + + +static int +netdev_bsd_open_system(struct netdev_dev *netdev_dev_, struct netdev **netdevp) +{ + struct netdev_dev_bsd *netdev_dev = netdev_dev_bsd_cast(netdev_dev_); + struct netdev_bsd *netdev; + int error; + enum netdev_flags flags; + + /* Allocate network device. */ + netdev = xcalloc(1, sizeof *netdev); + netdev->netdev_fd = -1; + netdev_init(&netdev->netdev, netdev_dev_); + + /* Verify that the netdev really exists by attempting to read its flags */ + error = netdev_get_flags(&netdev->netdev, &flags); + if (error == ENXIO) { + goto error; + } + + /* The first user that opens a tap port(from dpif_create_and_open()) will + * receive the file descriptor associated with the tap device. Instead, the + * following users will open the tap device as a normal 'system' device. */ + if (!strcmp(netdev_dev_get_type(netdev_dev_), "tap") && + !netdev_dev->tap_opened) { + netdev_dev->tap_opened = true; + netdev->netdev_fd = netdev_dev->tap_fd; + } + + *netdevp = &netdev->netdev; + return 0; + +error: + netdev_uninit(&netdev->netdev, true); + return error; +} + + + +/* Close a 'netdev'. */ +static void +netdev_bsd_close(struct netdev *netdev_) +{ + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + + if (netdev->netdev_fd >= 0 && strcmp(netdev_get_type(netdev_), "tap")) { + pcap_close(netdev->pcap_handle); + } + + free(netdev); +} + +static int +netdev_bsd_listen(struct netdev *netdev_) +{ + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + char errbuf[PCAP_ERRBUF_SIZE]; + int error; + int fd; + int one = 1; + + if (netdev->netdev_fd >= 0) { + return 0; + } + + /* open the pcap device. The device is opened in non-promiscuous mode + * because the interface flags are manually set by the caller. */ + errbuf[0] = '\0'; + netdev->pcap_handle = pcap_open_live(netdev_get_name(netdev_), PCAP_SNAPLEN, + 0, 1000, errbuf); + if (netdev->pcap_handle == NULL) { + VLOG_ERR("%s: pcap_open_live failed: %s", + netdev_get_name(netdev_), errbuf); + error = EIO; + goto error; + } else if (errbuf[0] != '\0') { + VLOG_WARN("%s: pcap_open_live: %s", + netdev_get_name(netdev_), errbuf); + } + + netdev_dev_bsd_changed(netdev_dev_bsd_cast(netdev_get_dev(netdev_))); + + /* initialize netdev->netdev_fd */ + fd = pcap_get_selectable_fd(netdev->pcap_handle); + if (fd == -1) { + error = errno; + goto error; + } + + /* Set non-blocking mode. Also the BIOCIMMEDIATE ioctl must be called + * on the file descriptor returned by pcap_get_selectable_fd to achieve + * a real non-blocking behaviour.*/ + error = pcap_setnonblock(netdev->pcap_handle, 1, errbuf); + if (error == -1) { + error = errno; + goto error; + } + + /* This call assure that reads return immediately upon packet reception. + * Otherwise, a read will block until either the kernel buffer becomes + * full or a timeout occurs. */ + if(ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) { + VLOG_ERR("ioctl(BIOCIMMEDIATE) on %s device failed: %s", + netdev_get_name(netdev_), strerror(errno)); + error = errno; + goto error; + } + + /* Capture only incoming packets */ + error = pcap_setdirection(netdev->pcap_handle, PCAP_D_IN); + if (error == -1) { + error = errno; + goto error; + } + + netdev->netdev_fd = fd; + return 0; + +error: + if (fd >= 0) { + close(netdev->netdev_fd); + } + return error; +} + + +/* The recv callback of the netdev class returns the number of bytes of the + * received packet. + * + * This can be done by the pcap_next() function. Unfortunately pcap_next() does + * not make difference between a missing packet on the capture interface and + * an error during the file capture. We can use the pcap_dispatch() function + * instead, which is able to distinguish between errors and null packet. + * + * To make pcap_dispatch() returns the number of bytes read from the interface + * we need to define the following callback and argument. + */ +struct pcap_arg { + void *data; + int size; + int retval; +}; + +/* + * This callback will be executed on every captured packet. + * + * If the packet captured by pcap_dispatch() does not fit the pcap buffer, + * pcap returns a truncated packet and we follow this behavior. + * + * The argument args->retval is the packet size in bytes. + */ +static void +proc_pkt(u_char *args_, const struct pcap_pkthdr *hdr, const u_char *packet) +{ + struct pcap_arg *args = (struct pcap_arg *)args_; + + if (args->size < hdr->len) { + VLOG_WARN_RL(&rl, "packet truncated"); + args->retval = args->size; + } else { + args->retval = hdr->len; + } + + /* copy the packet to our buffer */ + memcpy(args->data, packet, args->retval); +} + +/* + * This function attempts to receive a packet from the specified network + * device. It is assumed that the network device is a system device or a tap + * device opened as a system one. In this case the read operation is performed + * on the 'netdev' pcap descriptor. + */ +static int +netdev_bsd_recv_system(struct netdev_bsd *netdev, void *data, size_t size) +{ + struct pcap_arg arg; + int ret; + + if (netdev->netdev_fd < 0) { + return -EAGAIN; + } + + /* prepare the pcap argument to store the packet */ + arg.size = size; + arg.data = data; + + for (;;) { + ret = pcap_dispatch(netdev->pcap_handle, 1, proc_pkt, (u_char *)&arg); + + if (ret > 0) { + return arg.retval; /* arg.retval < 0 is handled in the caller */ + } + if (ret == -1) { + if (errno == EINTR) { + continue; + } + } + + return -EAGAIN; + } +} + +/* + * This function attempts to receive a packet from the specified network + * device. It is assumed that the network device is a tap device and the + * 'netdev_fd' member of the 'netdev' structure is initialized with the tap + * file descriptor. + */ +static int +netdev_bsd_recv_tap(struct netdev_bsd *netdev, void *data, size_t size) +{ + if (netdev->netdev_fd < 0) { + return -EAGAIN; + } + + for (;;) { + ssize_t retval = read(netdev->netdev_fd, data, size); + if (retval >= 0) { + return retval; + } else if (errno != EINTR) { + if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", + strerror(errno), netdev->netdev.netdev_dev->name); + } + return -errno; + } + } +} + + +/* + * According with the nature of the device a different function must be called. + * If the device is the bridge local port the 'netdev_bsd_recv_tap' function + * must be called, otherwise the 'netdev_bsd_recv_system' function is called. + * + * type!="tap" ---> system device. + * type=="tap" && netdev_fd == tap_fd ---> internal tap device + * type=="tap" && netdev_fd != tap_fd ---> internal tap device + * opened as a system + * device. + */ +static int +netdev_bsd_recv(struct netdev *netdev_, void* data, size_t size) +{ + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + struct netdev_dev_bsd * netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + + if (!strcmp(netdev_get_type(netdev_), "tap") && + netdev->netdev_fd == netdev_dev->tap_fd) { + return netdev_bsd_recv_tap(netdev, data, size); + } else { + return netdev_bsd_recv_system(netdev, data, size); + } +} + + +/* + * Registers with the poll loop to wake up from the next call to poll_block() + * when a packet is ready to be received with netdev_recv() on 'netdev'. + */ +static void +netdev_bsd_recv_wait(struct netdev *netdev_) +{ + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + + if (netdev->netdev_fd >= 0) { + poll_fd_wait(netdev->netdev_fd, POLLIN); + } +} + +/* Discards all packets waiting to be received from 'netdev'. */ +static int +netdev_bsd_drain(struct netdev *netdev_) +{ + struct ifreq ifr; + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + + strcpy(ifr.ifr_name, netdev_get_name(netdev_)); + if (ioctl(netdev->netdev_fd, BIOCFLUSH, &ifr) == -1) { + VLOG_DBG_RL(&rl, "%s: ioctl(BIOCFLUSH) failed: %s", + netdev_get_name(netdev_), strerror(errno)); + return errno; + } + return 0; +} + +/* + * Send a packet on the specified network device. The device could be either a + * system or a tap device. + */ +static int +netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size) +{ + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + struct netdev_dev_bsd * netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + + if (netdev->netdev_fd < 0) { + return EPIPE; + } + + for (;;) { + ssize_t retval; + if (!strcmp(netdev_get_type(netdev_), "tap") && + netdev_dev->tap_fd == netdev->netdev_fd) { + retval = write(netdev->netdev_fd, data, size); + } else { + retval = pcap_inject(netdev->pcap_handle, data, size); + } + if (retval < 0) { + if (errno == EINTR) { + continue; + } else if (errno != EAGAIN) { + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), strerror(errno)); + } + return errno; + } else if (retval != size) { + VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of " + "%zu) on %s", retval, size, + netdev_get_name(netdev_)); + return EMSGSIZE; + } else { + return 0; + } + } +} + +/* + * Registers with the poll loop to wake up from the next call to poll_block() + * when the packet transmission queue has sufficient room to transmit a packet + * with netdev_send(). + */ +static void +netdev_bsd_send_wait(struct netdev *netdev_) +{ + struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); + + if (netdev->netdev_fd < 0) { /* Nothing to do. */ + return; + } + + if (strcmp(netdev_get_type(netdev_), "tap")) { + poll_fd_wait(netdev->netdev_fd, POLLOUT); + } else { + /* TAP device always accepts packets. */ + poll_immediate_wake(); + } +} + +/* + * Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, + * otherwise a positive errno value. + */ +static int +netdev_bsd_set_etheraddr(struct netdev *netdev_, + const uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + int error; + + if (!(netdev_dev->cache_valid & VALID_ETHERADDR) + || !eth_addr_equals(netdev_dev->etheraddr, mac)) { + error = set_etheraddr(netdev_get_name(netdev_), AF_LINK, ETH_ADDR_LEN, + mac); + if (!error) { + netdev_dev->cache_valid |= VALID_ETHERADDR; + memcpy(netdev_dev->etheraddr, mac, ETH_ADDR_LEN); + netdev_dev_bsd_changed(netdev_dev); + } + } else { + error = 0; + } + return error; +} + +/* + * Returns a pointer to 'netdev''s MAC address. The caller must not modify or + * free the returned buffer. + */ +static int +netdev_bsd_get_etheraddr(const struct netdev *netdev_, + uint8_t mac[ETH_ADDR_LEN]) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + + if (!(netdev_dev->cache_valid & VALID_ETHERADDR)) { + int error = get_etheraddr(netdev_get_name(netdev_), + netdev_dev->etheraddr); + if (error) { + return error; + } + netdev_dev->cache_valid |= VALID_ETHERADDR; + } + memcpy(mac, netdev_dev->etheraddr, ETH_ADDR_LEN); + + return 0; +} + +/* + * Returns the maximum size of transmitted (and received) packets on 'netdev', + * in bytes, not including the hardware header; thus, this is typically 1500 + * bytes for Ethernet devices. + */ +static int +netdev_bsd_get_mtu(const struct netdev *netdev_, int *mtup) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + + if (!(netdev_dev->cache_valid & VALID_MTU)) { + struct ifreq ifr; + int error; + + error = netdev_bsd_do_ioctl(netdev_, &ifr, SIOCGIFMTU, "SIOCGIFMTU"); + if (error) { + return error; + } + netdev_dev->mtu = ifr.ifr_mtu; + netdev_dev->cache_valid |= VALID_MTU; + } + + *mtup = netdev_dev->mtu; + return 0; +} + +static int +netdev_bsd_get_ifindex(const struct netdev *netdev) +{ + int ifindex, error; + + error = get_ifindex(netdev, &ifindex); + return error ? -error : ifindex; +} + +static int +netdev_bsd_get_carrier(const struct netdev *netdev_, bool *carrier) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + + if (!(netdev_dev->cache_valid & VALID_CARRIER)) { + struct ifmediareq ifmr; + + memset(&ifmr, 0, sizeof(ifmr)); + strncpy(ifmr.ifm_name, netdev_get_name(netdev_), sizeof ifmr.ifm_name); + + if (ioctl(af_inet_sock, SIOCGIFMEDIA, &ifmr) == -1) { + VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFMEDIA) failed: %s", + netdev_get_name(netdev_), strerror(errno)); + return errno; + } + + netdev_dev->carrier = (ifmr.ifm_status & IFM_ACTIVE) == IFM_ACTIVE; + netdev_dev->cache_valid |= VALID_CARRIER; + + /* If the interface doesn't report whether the media is active, + * just assume it is active. */ + if ((ifmr.ifm_status & IFM_AVALID) == 0) { + netdev_dev->carrier = true; + } + } + *carrier = netdev_dev->carrier; + + return 0; +} + +/* Retrieves current device stats for 'netdev'. */ +static int +netdev_bsd_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) +{ + int if_count, i; + int mib[6]; + size_t len; + struct ifmibdata ifmd; + + + mib[0] = CTL_NET; + mib[1] = PF_LINK; + mib[2] = NETLINK_GENERIC; + mib[3] = IFMIB_SYSTEM; + mib[4] = IFMIB_IFCOUNT; + + len = sizeof(if_count); + + if (sysctl(mib, 5, &if_count, &len, (void *)0, 0) == -1) { + VLOG_DBG_RL(&rl, "%s: sysctl failed: %s", + netdev_get_name(netdev_), strerror(errno)); + return errno; + } + + mib[5] = IFDATA_GENERAL; + mib[3] = IFMIB_IFDATA; + len = sizeof(ifmd); + for (i = 1; i <= if_count; i++) { + mib[4] = i; //row + if (sysctl(mib, 6, &ifmd, &len, (void *)0, 0) == -1) { + VLOG_DBG_RL(&rl, "%s: sysctl failed: %s", + netdev_get_name(netdev_), strerror(errno)); + return errno; + } else if (!strcmp(ifmd.ifmd_name, netdev_get_name(netdev_))) { + stats->rx_packets = ifmd.ifmd_data.ifi_ipackets; + stats->tx_packets = ifmd.ifmd_data.ifi_opackets; + stats->rx_bytes = ifmd.ifmd_data.ifi_ibytes; + stats->tx_bytes = ifmd.ifmd_data.ifi_obytes; + stats->rx_errors = ifmd.ifmd_data.ifi_ierrors; + stats->tx_errors = ifmd.ifmd_data.ifi_oerrors; + stats->rx_dropped = ifmd.ifmd_data.ifi_iqdrops; + stats->tx_dropped = 0; + stats->multicast = ifmd.ifmd_data.ifi_imcasts; + stats->collisions = ifmd.ifmd_data.ifi_collisions; + + stats->rx_length_errors = 0; + stats->rx_over_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + + stats->tx_aborted_errors = 0; + stats->tx_carrier_errors = 0; + stats->tx_fifo_errors = 0; + stats->tx_heartbeat_errors = 0; + stats->tx_window_errors = 0; + break; + } + } + + return 0; +} + +static uint32_t +netdev_bsd_parse_media(int media) +{ + uint32_t supported = 0; + bool half_duplex = media & IFM_HDX ? true : false; + + switch (IFM_SUBTYPE(media)) { + case IFM_10_2: + case IFM_10_5: + case IFM_10_STP: + case IFM_10_T: + supported |= half_duplex ? NETDEV_F_10MB_HD : NETDEV_F_10MB_FD; + supported |= NETDEV_F_COPPER; + break; + + case IFM_10_FL: + supported |= half_duplex ? NETDEV_F_10MB_HD : NETDEV_F_10MB_FD; + supported |= NETDEV_F_FIBER; + break; + + case IFM_100_T2: + case IFM_100_T4: + case IFM_100_TX: + case IFM_100_VG: + supported |= half_duplex ? NETDEV_F_100MB_HD : NETDEV_F_100MB_FD; + supported |= NETDEV_F_COPPER; + break; + + case IFM_100_FX: + supported |= half_duplex ? NETDEV_F_100MB_HD : NETDEV_F_100MB_FD; + supported |= NETDEV_F_FIBER; + break; + + case IFM_1000_CX: + case IFM_1000_T: + supported |= half_duplex ? NETDEV_F_1GB_HD : NETDEV_F_1GB_FD; + supported |= NETDEV_F_COPPER; + break; + + case IFM_1000_LX: + case IFM_1000_SX: + supported |= half_duplex ? NETDEV_F_1GB_HD : NETDEV_F_1GB_FD; + supported |= NETDEV_F_FIBER; + break; + + case IFM_10G_CX4: + supported |= NETDEV_F_10GB_FD; + supported |= NETDEV_F_COPPER; + break; + + case IFM_10G_LR: + case IFM_10G_SR: + supported |= NETDEV_F_10GB_FD; + supported |= NETDEV_F_FIBER; + break; + + default: + return 0; + } + + if (IFM_SUBTYPE(media) == IFM_AUTO) { + supported |= NETDEV_F_AUTONEG; + } + /* + if (media & IFM_ETH_FMASK) { + supported |= NETDEV_F_PAUSE; + } + */ + + return supported; +} + +/* + * Stores the features supported by 'netdev' into each of '*current', + * '*advertised', '*supported', and '*peer' that are non-null. Each value is a + * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if + * successful, otherwise a positive errno value. On failure, all of the + * passed-in values are set to 0. + */ +static int +netdev_bsd_get_features(const struct netdev *netdev, + enum netdev_features *current, uint32_t *advertised, + enum netdev_features *supported, uint32_t *peer) +{ + struct ifmediareq ifmr; + int *media_list; + int i; + int error; + + + /* XXX Look into SIOCGIFCAP instead of SIOCGIFMEDIA */ + + memset(&ifmr, 0, sizeof(ifmr)); + strncpy(ifmr.ifm_name, netdev_get_name(netdev), sizeof ifmr.ifm_name); + + /* We make two SIOCGIFMEDIA ioctl calls. The first to determine the + * number of supported modes, and a second with a buffer to retrieve + * them. */ + if (ioctl(af_inet_sock, SIOCGIFMEDIA, &ifmr) == -1) { + VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFMEDIA) failed: %s", + netdev_get_name(netdev), strerror(errno)); + return errno; + } + + media_list = xcalloc(ifmr.ifm_count, sizeof(int)); + ifmr.ifm_ulist = media_list; + + if (!IFM_TYPE(ifmr.ifm_current) & IFM_ETHER) { + VLOG_DBG_RL(&rl, "%s: doesn't appear to be ethernet", + netdev_get_name(netdev)); + error = EINVAL; + goto cleanup; + } + + if (ioctl(af_inet_sock, SIOCGIFMEDIA, &ifmr) == -1) { + VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFMEDIA) failed: %s", + netdev_get_name(netdev), strerror(errno)); + error = errno; + goto cleanup; + } + + /* Current settings. */ + *current = netdev_bsd_parse_media(ifmr.ifm_active); + + /* Advertised features. */ + *advertised = netdev_bsd_parse_media(ifmr.ifm_current); + + /* Supported features. */ + *supported = 0; + for (i = 0; i < ifmr.ifm_count; i++) { + *supported |= netdev_bsd_parse_media(ifmr.ifm_ulist[i]); + } + + /* Peer advertisements. */ + *peer = 0; /* XXX */ + + error = 0; +cleanup: + free(media_list); + return error; +} + +/* + * If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if + * 'in4' is non-null) and returns true. Otherwise, returns false. + */ +static int +netdev_bsd_get_in4(const struct netdev *netdev_, struct in_addr *in4, + struct in_addr *netmask) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + + if (!(netdev_dev->cache_valid & VALID_IN4)) { + const struct sockaddr_in *sin; + struct ifreq ifr; + int error; + + ifr.ifr_addr.sa_family = AF_INET; + error = netdev_bsd_do_ioctl(netdev_, &ifr, + SIOCGIFADDR, "SIOCGIFADDR"); + if (error) { + return error; + } + + sin = (struct sockaddr_in *) &ifr.ifr_addr; + netdev_dev->in4 = sin->sin_addr; + netdev_dev->cache_valid |= VALID_IN4; + error = netdev_bsd_do_ioctl(netdev_, &ifr, + SIOCGIFNETMASK, "SIOCGIFNETMASK"); + if (error) { + return error; + } + *netmask = ((struct sockaddr_in*)&ifr.ifr_addr)->sin_addr; + } + *in4 = netdev_dev->in4; + + return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; +} + +/* + * Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If + * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a + * positive errno value. + */ +static int +netdev_bsd_set_in4(struct netdev *netdev_, struct in_addr addr, + struct in_addr mask) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + int error; + + error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr); + if (!error) { + netdev_dev->cache_valid |= VALID_IN4; + netdev_dev->in4 = addr; + if (addr.s_addr != INADDR_ANY) { + error = do_set_addr(netdev_, SIOCSIFNETMASK, + "SIOCSIFNETMASK", mask); + } + netdev_dev_bsd_changed(netdev_dev); + } + return error; +} + +static int +netdev_bsd_get_in6(const struct netdev *netdev_, struct in6_addr *in6) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + if (!(netdev_dev->cache_valid & VALID_IN6)) { + struct ifaddrs *ifa, *head; + struct sockaddr_in6 *sin6; + const char *netdev_name = netdev_get_name(netdev_); + + if (getifaddrs(&head) != 0) { + VLOG_ERR("getifaddrs on %s device failed: %s", netdev_name, + strerror(errno)); + return errno; + } + + for (ifa = head; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family == AF_INET6 && + !strcmp(ifa->ifa_name, netdev_name)) { + sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + if (sin6) { + memcpy(&netdev_dev->in6, &sin6->sin6_addr, sin6->sin6_len); + netdev_dev->cache_valid |= VALID_IN6; + *in6 = netdev_dev->in6; + freeifaddrs(head); + return 0; + } + } + } + return EADDRNOTAVAIL; + } + *in6 = netdev_dev->in6; + return 0; +} + +static void +make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr) +{ + struct sockaddr_in sin; + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr = addr; + sin.sin_port = 0; + + memset(sa, 0, sizeof *sa); + memcpy(sa, &sin, sizeof sin); +} + +static int +do_set_addr(struct netdev *netdev, + int ioctl_nr, const char *ioctl_name, struct in_addr addr) +{ + struct ifreq ifr; + make_in4_sockaddr(&ifr.ifr_addr, addr); + return netdev_bsd_do_ioctl(netdev, &ifr, ioctl_nr, ioctl_name); +} + +static int +nd_to_iff_flags(enum netdev_flags nd) +{ + int iff = 0; + if (nd & NETDEV_UP) { + iff |= IFF_UP; + } + if (nd & NETDEV_PROMISC) { + iff |= IFF_PROMISC; + iff |= IFF_PPROMISC; + } + return iff; +} + +static int +iff_to_nd_flags(int iff) +{ + enum netdev_flags nd = 0; + if (iff & IFF_UP) { + nd |= NETDEV_UP; + } + if (iff & IFF_PROMISC) { + nd |= NETDEV_PROMISC; + } + return nd; +} + +static int +netdev_bsd_update_flags(struct netdev *netdev, enum netdev_flags off, + enum netdev_flags on, enum netdev_flags *old_flagsp) +{ + int old_flags, new_flags; + int error; + + error = get_flags(netdev, &old_flags); + if (!error) { + *old_flagsp = iff_to_nd_flags(old_flags); + new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); + if (new_flags != old_flags) { + error = set_flags(netdev, new_flags); + netdev_dev_bsd_changed(netdev_dev_bsd_cast(netdev_get_dev(netdev))); + } + } + return error; +} + +static unsigned int +netdev_bsd_change_seq(const struct netdev *netdev) +{ + return netdev_dev_bsd_cast(netdev_get_dev(netdev))->change_seq; +} + + +const struct netdev_class netdev_bsd_class = { + "system", + + netdev_bsd_init, + netdev_bsd_run, + netdev_bsd_wait, + netdev_bsd_create_system, + netdev_bsd_destroy, + NULL, /* get_config */ + NULL, /* set_config */ + netdev_bsd_open_system, + netdev_bsd_close, + + netdev_bsd_listen, + + netdev_bsd_recv, + netdev_bsd_recv_wait, + netdev_bsd_drain, + + netdev_bsd_send, + netdev_bsd_send_wait, + + netdev_bsd_set_etheraddr, + netdev_bsd_get_etheraddr, + netdev_bsd_get_mtu, + NULL, /* set_mtu */ + netdev_bsd_get_ifindex, + netdev_bsd_get_carrier, + NULL, /* get_carrier_resets */ + NULL, /* set_miimon_interval */ + netdev_bsd_get_stats, + NULL, /* set_stats */ + + netdev_bsd_get_features, + NULL, /* set_advertisement */ + NULL, /* set_policing */ + NULL, /* get_qos_type */ + NULL, /* get_qos_capabilities */ + NULL, /* get_qos */ + NULL, /* set_qos */ + NULL, /* get_queue */ + NULL, /* set_queue */ + NULL, /* delete_queue */ + NULL, /* get_queue_stats */ + NULL, /* dump_queue */ + NULL, /* dump_queue_stats */ + + netdev_bsd_get_in4, + netdev_bsd_set_in4, + netdev_bsd_get_in6, + NULL, /* add_router */ + NULL, /* get_next_hop */ + NULL, /* get_drv_info */ + NULL, /* arp_lookup */ + + netdev_bsd_update_flags, + + netdev_bsd_change_seq +}; + +const struct netdev_class netdev_tap_class = { + "tap", + + netdev_bsd_init, + netdev_bsd_run, + netdev_bsd_wait, + netdev_bsd_create_tap, + netdev_bsd_destroy, + NULL, /* get_config */ + NULL, /* set_config */ + netdev_bsd_open_system, + netdev_bsd_close, + + netdev_bsd_listen, + + netdev_bsd_recv, + netdev_bsd_recv_wait, + netdev_bsd_drain, + + netdev_bsd_send, + netdev_bsd_send_wait, + + netdev_bsd_set_etheraddr, + netdev_bsd_get_etheraddr, + netdev_bsd_get_mtu, + NULL, /* set_mtu */ + netdev_bsd_get_ifindex, + netdev_bsd_get_carrier, + NULL, /* get_carrier_resets */ + NULL, /* set_miimon_interval */ + netdev_bsd_get_stats, + NULL, /* set_stats */ + + netdev_bsd_get_features, + NULL, /* set_advertisement */ + NULL, /* set_policing */ + NULL, /* get_qos_type */ + NULL, /* get_qos_capabilities */ + NULL, /* get_qos */ + NULL, /* set_qos */ + NULL, /* get_queue */ + NULL, /* set_queue */ + NULL, /* delete_queue */ + NULL, /* get_queue_stats */ + NULL, /* dump_queue */ + NULL, /* dump_queue_stats */ + + netdev_bsd_get_in4, + netdev_bsd_set_in4, + netdev_bsd_get_in6, + NULL, /* add_router */ + NULL, /* get_next_hop */ + NULL, /* get_drv_info */ + NULL, /* arp_lookup */ + + netdev_bsd_update_flags, + + netdev_bsd_change_seq +}; + + +static void +destroy_tap(int fd, const char *name) +{ + struct ifreq ifr; + + close(fd); + strcpy(ifr.ifr_name, name); + /* XXX What to do if this call fails? */ + ioctl(af_inet_sock, SIOCIFDESTROY, &ifr); +} + +static int +get_flags(const struct netdev *netdev, int *flags) +{ + struct ifreq ifr; + int error; + + error = netdev_bsd_do_ioctl(netdev, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS"); + + *flags = 0xFFFF0000 & (ifr.ifr_flagshigh << 16); + *flags |= 0x0000FFFF & ifr.ifr_flags; + + return error; +} + +static int +set_flags(struct netdev *netdev, int flags) +{ + struct ifreq ifr; + + ifr.ifr_flags = 0x0000FFFF & flags; + ifr.ifr_flagshigh = (0xFFFF0000 & flags) >> 16; + + return netdev_bsd_do_ioctl(netdev, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS"); +} + +static int +get_ifindex(const struct netdev *netdev_, int *ifindexp) +{ + struct netdev_dev_bsd *netdev_dev = + netdev_dev_bsd_cast(netdev_get_dev(netdev_)); + *ifindexp = 0; + if (!(netdev_dev->cache_valid & VALID_IFINDEX)) { + int ifindex = if_nametoindex(netdev_get_name(netdev_)); + if (ifindex <= 0) { + return errno; + } + netdev_dev->cache_valid |= VALID_IFINDEX; + netdev_dev->ifindex = ifindex; + } + *ifindexp = netdev_dev->ifindex; + return 0; +} + +static int +get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]) +{ + struct ifaddrs *head; + struct ifaddrs *ifa; + struct sockaddr_dl *sdl; + + if (getifaddrs(&head) != 0) { + VLOG_ERR("getifaddrs on %s device failed: %s", netdev_name, + strerror(errno)); + return errno; + } + + for (ifa = head; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family == AF_LINK) { + if (!strcmp(ifa->ifa_name, netdev_name)) { + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + if (sdl) { + memcpy(ea, LLADDR(sdl), sdl->sdl_alen); + freeifaddrs(head); + return 0; + } + } + } + } + + VLOG_ERR("could not find ethernet address for %s device", netdev_name); + freeifaddrs(head); + return ENODEV; +} + +static int +set_etheraddr(const char *netdev_name, int hwaddr_family, + int hwaddr_len, const uint8_t mac[ETH_ADDR_LEN]) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof ifr); + strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); + ifr.ifr_addr.sa_family = hwaddr_family; + ifr.ifr_addr.sa_len = hwaddr_len; + memcpy(ifr.ifr_addr.sa_data, mac, hwaddr_len); + if (ioctl(af_inet_sock, SIOCSIFLLADDR, &ifr) < 0) { + VLOG_ERR("ioctl(SIOCSIFLLADDR) on %s device failed: %s", + netdev_name, strerror(errno)); + return errno; + } + return 0; +} + +static int +netdev_bsd_do_ioctl(const struct netdev *netdev, struct ifreq *ifr, + unsigned long cmd, const char *cmd_name) +{ + strncpy(ifr->ifr_name, netdev_get_name(netdev), sizeof ifr->ifr_name); + if (ioctl(af_inet_sock, cmd, ifr) == -1) { + VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", + netdev_get_name(netdev), cmd_name, strerror(errno)); + return errno; + } + return 0; +} diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 5fb1bbf2..94f60af7 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -596,6 +596,9 @@ const struct netdev_class *netdev_lookup_provider(const char *type); extern const struct netdev_class netdev_linux_class; extern const struct netdev_class netdev_internal_class; extern const struct netdev_class netdev_tap_class; +#ifdef __FreeBSD__ +extern const struct netdev_class netdev_bsd_class; +#endif #ifdef __cplusplus } diff --git a/lib/netdev.c b/lib/netdev.c index 1b76785a..ac98cb52 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -80,6 +80,10 @@ netdev_initialize(void) netdev_register_provider(&netdev_internal_class); netdev_register_provider(&netdev_tap_class); netdev_vport_register(); +#endif +#ifdef __FreeBSD__ + netdev_register_provider(&netdev_tap_class); + netdev_register_provider(&netdev_bsd_class); #endif } } diff --git a/lib/route-table.h b/lib/route-table.h index 1479249c..804cb3ff 100644 --- a/lib/route-table.h +++ b/lib/route-table.h @@ -17,6 +17,7 @@ #ifndef ROUTE_TABLE_H #define ROUTE_TABLE_H 1 +#include #include #include #include diff --git a/lib/rtbsd.c b/lib/rtbsd.c new file mode 100644 index 00000000..c5fe03a3 --- /dev/null +++ b/lib/rtbsd.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2011 Gaetano Catalli. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "coverage.h" +#include "socket-util.h" +#include "poll-loop.h" +#include "vlog.h" +#include "rtbsd.h" + +VLOG_DEFINE_THIS_MODULE(rtbsd); +COVERAGE_DEFINE(rtbsd_changed); + +/* PF_ROUTE socket. */ +static int notify_sock = -1; + +/* All registered notifiers. */ +static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers); + +static void rtbsd_report_change(const struct if_msghdr *); +static void rtbsd_report_notify_error(void); + +/* Registers 'cb' to be called with auxiliary data 'aux' with network device + * change notifications. The notifier is stored in 'notifier', which the + * caller must not modify or free. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +rtbsd_notifier_register(struct rtbsd_notifier *notifier, + rtbsd_notify_func *cb, void *aux) +{ + if (notify_sock < 0) { + int error; + notify_sock = socket(PF_ROUTE, SOCK_RAW, 0); + if (notify_sock < 0) { + VLOG_WARN("could not create PF_ROUTE socket: %s", + strerror(errno)); + return errno; + } + error = set_nonblocking(notify_sock); + if (error) { + VLOG_WARN("error set_nonblocking PF_ROUTE socket: %s", + strerror(error)); + return error; + } + } else { + /* Catch up on notification work so that the new notifier won't + * receive any stale notifications. XXX*/ + rtbsd_notifier_run(); + } + + list_push_back(&all_notifiers, ¬ifier->node); + notifier->cb = cb; + notifier->aux = aux; + return 0; +} + +/* Cancels notification on 'notifier', which must have previously been + * registered with rtbsd_notifier_register(). */ +void +rtbsd_notifier_unregister(struct rtbsd_notifier *notifier) +{ + list_remove(¬ifier->node); + if (list_is_empty(&all_notifiers)) { + close(notify_sock); + notify_sock = -1; + } +} + +/* Calls all of the registered notifiers, passing along any as-yet-unreported + * netdev change events. */ +void +rtbsd_notifier_run(void) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + struct if_msghdr msg; + if (notify_sock < 0) { + return; + } + + for (;;) { + int retval; + + msg.ifm_type = RTM_IFINFO; + msg.ifm_version = RTM_VERSION; //XXX check if necessary + + /* read from PF_ROUTE socket */ + retval = read(notify_sock, (char *)&msg, sizeof(msg)); + if (retval >= 0) { + /* received packet from PF_ROUTE socket + * XXX check for bad packets */ + if (msg.ifm_type == RTM_IFINFO) { + rtbsd_report_change(&msg); + } + } else if (errno == EAGAIN) { + return; + } else { + if (errno == ENOBUFS) { + VLOG_WARN_RL(&rl, "PF_ROUTE receive buffer overflowed"); + } else { + VLOG_WARN_RL(&rl, "error reading PF_ROUTE socket: %s", + strerror(errno)); + } + rtbsd_report_notify_error(); + } + } +} + +/* Causes poll_block() to wake up when network device change notifications are + * ready. */ +void +rtbsd_notifier_wait(void) +{ + if (notify_sock >= 0) { + poll_fd_wait(notify_sock, POLLIN); + } +} + +static void +rtbsd_report_change(const struct if_msghdr *msg) +{ + struct rtbsd_notifier *notifier; + struct rtbsd_change change; + + COVERAGE_INC(rtbsd_changed); + + change.msg_type = msg->ifm_type; //XXX + change.if_index = msg->ifm_index; + if_indextoname(msg->ifm_index, change.if_name); + change.master_ifindex = 0; //XXX + + LIST_FOR_EACH (notifier, node, &all_notifiers) { + notifier->cb(&change, notifier->aux); + } +} + +/* If an error occurs the notifiers' callbacks are called with NULL changes */ +static void +rtbsd_report_notify_error(void) +{ + struct rtbsd_notifier *notifier; + + LIST_FOR_EACH (notifier, node, &all_notifiers) { + notifier->cb(NULL, notifier->aux); + } +} diff --git a/lib/rtbsd.h b/lib/rtbsd.h new file mode 100644 index 00000000..60bfae98 --- /dev/null +++ b/lib/rtbsd.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2011 Gaetano Catalli. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RTBSD_H +#define RTBSD_H 1 + +#include "list.h" + +/* + * A digested version of a message received from a PF_ROUTE socket which + * indicates that a network device has been created or destroyed or changed. + */ +struct rtbsd_change { + /* Copied from struct if_msghdr. */ + int msg_type; /* e.g. XXX. */ + + /* Copied from struct if_msghdr. */ + int if_index; /* Index of network device. */ + + char if_name[IF_NAMESIZE]; /* Name of network device. */ + int master_ifindex; /* Ifindex of datapath master (0 if none). */ +}; + +/* + * Function called to report that a netdev has changed. 'change' describes the + * specific change. It may be null if the buffer of change information + * overflowed, in which case the function must assume that every device may + * have changed. 'aux' is as specified in the call to + * rtbsd_notifier_register(). + */ +typedef void rtbsd_notify_func(const struct rtbsd_change *, void *aux); + +struct rtbsd_notifier { + struct list node; + rtbsd_notify_func *cb; + void *aux; +}; + +int rtbsd_notifier_register(struct rtbsd_notifier *, + rtbsd_notify_func *, void *aux); +void rtbsd_notifier_unregister(struct rtbsd_notifier *); +void rtbsd_notifier_run(void); +void rtbsd_notifier_wait(void); + +#endif /* rtbsd.h */ diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index 611f89a0..23f54989 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -18,6 +18,7 @@ #include #include "ofproto-dpif-sflow.h" #include +#include #include #include #include "collectors.h" -- 2.30.2