From: Martin Casado Date: Tue, 4 Mar 2008 21:12:53 +0000 (-0800) Subject: Initial import X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=468e00132f76a6d057da1520873e7a468ccae422;p=openvswitch Initial import --- diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..76ecbb77 --- /dev/null +++ b/COPYING @@ -0,0 +1,367 @@ +All source files are Copyright (C) 2007 Board of Trustees, Leland +Stanford Jr. University and covered by the following licence. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Files in the datapath/ and its sub-directories are covered under the GNU +General Public License Version 2. Included below: + + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..b8bcaa3d --- /dev/null +++ b/ChangeLog @@ -0,0 +1,55 @@ +v0.1.8 - 03 Mar 2008 +-------------------- + - Added support for cross-compilation. + - Various bug fixes and tweaks + +v0.1.7 - 07 Feb 2008 +-------------------- + - Allow permanent flow entries to be set + - Added patch for tcpdump that allows parsing of OpenFlow messages + - Various bug fixes and tweaks + +v0.1.6 - 05 Feb 2008 +-------------------- + - Added support for Linux 2.6.24 + - Set nwsrc/nwdst fields in flow structs on ARP packets + - Various bug fixes and tweaks + +v0.1.5 - 17 Jan 2008 +-------------------- + - Added support for Linux 2.4.20 + - Added support for GCC 2.95 + +v0.1.4 - 15 Jan 2008 +-------------------- + - Decode and print port_status messages + - Fixed build problems on big-endian systems + - Fixed compatibility for older 2.6 kernels + - Various bug fixes and tweaks + +v0.1.3 - 08 Jan 2008 +-------------------- + - Added support for flow expiration messages + - Decode and print all datapath-generated messages in dpctl's "monitor" + - Added "--noflow" option to controller + - Various bug fixes and tweaks + +v0.1.2 - 07 Jan 2008 +-------------------- + - Fixed distribution to include ofp_pcap.h + - Removed autoconf C++ checks + +v0.1.1 - 18 Dec 2007 +-------------------- + - Fixed support for Linux 2.4.35 and 2.6.22 + - Added support for Linux 2.6.15 + - Added "vlogconf" utility to modify logging configuration + - Added better support for SNAP headers + - Improved printing of flow information in dpctl + - Made kernel code consistently use tabs instead of spaces + - Removed libpcap requirement for building + - Various bug fixes and tweaks + +v0.1.0 - 30 Nov 2007 +-------------------- + - Initial release diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000..e10aec69 --- /dev/null +++ b/INSTALL @@ -0,0 +1,156 @@ + Installation Instructions for OpenFlow Reference Release v0.1.5 + +This document describes how to build, install, and execute the v0.1.5 +reference implementation of OpenFlow. Please send any comments to: + + + +Setting up the Kernel Build Environment +--------------------------------------- + +The datapath kernel module must be compiled against a kernel build +directory for the Linux version the module is to run on. The datapath +module has been mainly tested on Linux 2.6.23. Support for Linux 2.4 +is also in place, although it has only been lightly tested under 2.4.35. + +For example, if compiling on Debian or Ubuntu, the Linux headers +and image packages must be installed (apt-get install +linux-headers- linux-image-). + +Note: the OpenFlow datapath requires that bridging support has been +configured in the kernel, but not enabled or in use. If the bridge +module is running (check with "lsmod | grep bridge"), you must remove +it ("rmmod bridge") before starting the datapath. + +Building the Code +----------------- + +1. In the top source directory, configure the package, passing the + location of the kernel build directory as an argument. Use + --with-l26 for Linux 2.6, --with-l24 for Linux 2.4: + + For example, if compiling for a running instance of Linux 2.6: + % ./configure --with-l26=/lib/modules/`uname -r`/build + + Or if compiling for a running instance of Linux 2.4: + % ./configure --with-l24=/lib/modules/`uname -r`/build + + To use a specific C compiler for compiling OpenFlow user programs, + also specify it on the configure command line, like so: + % ./configure CC=gcc-4.2 + +2. Run make in the top source directory: + + % make + + The following binaries will be built: + + Datapath kernel module: + ./datapath/linux-2.6/openflow_mod.ko (If compiling for Linux 2.6) + ./datapath/linux-2.4/openflow_mod.o (If compiling for Linux 2.4) + + Secure channel executable: + ./secchan/secchan + + Controller executable: + ./controller/controller + + Datapath administration utility: + ./utilities/dpctl + +3. (Optional) Run "make install" to install the executables and + manpages into the running system, by default under /usr/local. + +Installing the datapath +----------------------- + +To run the module, simply insmod it: + + (Linux 2.6) + % insmod datapath/linux-2.6/openflow_mod.ko + + (Linux 2.4) + % insmod datapath/linux-2.4/compat24_mod.o + % insmod datapath/linux-2.4/openflow_mod.o + + +Testing the datapath +-------------------- + +Once the OpenFlow datapath has been installed (you can verify that it is +running if it appears in lsmod's listing), you can configure it using +the dpctl command line utility. + +1. Create a datapath instance. The command below creates a datapath with + ID 0 (see dpctl(8) for more detailed usage information). + + % dpctl adddp 0 + + (note, while in principle openflow_mod supports multiple datapaths + within the same host, this is rarely useful in practice) + +2. Use dpctl to attach the datapath to physical interfaces on the + machine. Say, for example, you want to create a trivial 2-port + switch using interfaces eth1 and eth2, you would issue the following + commands: + + % dpctl addif 0 eth1 + % dpctl addif 0 eth2 + + You can verify that the interfaces were successfully added by asking + dpctl to print the current status of datapath 0: + + % dpctl show 0 + +3. (Optional) You can manually add flows to the datapath to test using + dpctl add-flows and view them using dpctl dump-flows. See dpctl(8) + for more details. + +4. The simplest way to test the datapath is to run the provided sample + controller on the host machine to manage the datapath directly using + netlink. + + % controller -v nl:0 + + Once the controller is running, the datapath should operate like a + learning Ethernet switch. You may monitor the flows in the datapath + flow table using "dpctl dump-flows" command. + +Running the datapath with a remote controller +--------------------------------------------- + +1. Start the datapath and attach it to two or more physical ports as + described in the previous section. + + Note: The current version of the secure channel and controller + require at least one interface not be connected to the datapath + to be functional. This interface will be used for communication + between the secure channel and the controller. Future releases will + support in-band control communication. + +2. Run the controller in passive tcp mode on the host which will act as + the controller. In the example below, the controller will bind to + port 975 (the default) awaiting connections from secure channels. + + % controller -v ptcp: + + (See controller(8) for more details) + + Make sure the machine hosting the controller is reachable by the switch. + +3. Run secchan on the datapath host to start the secure channel + connecting the datapath to a remote controller. (See secchan(8) + for usage details). The channel should be configured to connect to + the controller's IP address on the port configured in step 2. + + If the controller is running on host 192.168.1.2 port 975 (the + default port) and the datapath ID is 0, the secchan invocation + would look like: + + % secchan -v nl:0 tcp:192.168.1.2 + +Bug Reporting +------------- + +Please report problems to: +info@openflowswitch.org diff --git a/Make.vars b/Make.vars new file mode 100644 index 00000000..9b8b342f --- /dev/null +++ b/Make.vars @@ -0,0 +1,17 @@ +# -*- makefile -*- + +if HAVE_NETLINK +AM_CPPFLAGS = -DHAVE_NETLINK=1 +endif + +COMMON_FLAGS = -DVERSION=\"$(VERSION)\" +if NDEBUG +COMMON_FLAGS += -DNDEBUG -fomit-frame-pointer +endif + +AM_CFLAGS = $(COMMON_FLAGS) +AM_CFLAGS += -Wstrict-prototypes -I $(top_srcdir)/include + +if !NDEBUG +AM_LDFLAGS = -export-dynamic +endif diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 00000000..aee1a9dc --- /dev/null +++ b/Makefile.am @@ -0,0 +1,2 @@ +AUTOMAKE_OPTIONS=foreign +SUBDIRS = lib datapath secchan controller utilities man include third-party diff --git a/README b/README index e69de29b..b5268917 100644 --- a/README +++ b/README @@ -0,0 +1,199 @@ + OpenFlow Reference Release v0.1.5 + +What is OpenFlow? +----------------- + +OpenFlow is a flow-based switch specification designed to enable +researchers to run experiments in live networks. OpenFlow is based on a +simple Ethernet flow switch that exposes a standardized interface for +adding and removing flow entries. + +An OpenFlow Switch consists of three parts: (1) A Flow Table in which +each flow entry is associated with an action telling the switch how to +process the flow, (2) A Secure Channel connecting the switch to a remote +process (a controller), allowing commands and packets to be sent between +the controller and the switch, and (3) An OpenFlow Protocol +implementation, providing an open and standard way for a controller to +talk to the switch. + +An OpenFlow Switch can thus serve as a simple datapath element that +forwards packets between ports according to flow actions defined by +the controller using OpenFlow commands. Example actions are: + + - Forward this flow's packets to the given port(s) + - Drop this flow's packets + - Encapsulate and forward this flow's packets to the controller. + +The OpenFlow Switch is defined in detail in the OpenFlow Switch +Specification [2]. + +What's here? +------------ + +This software is a reference implementation of an OpenFlow Switch kernel +module for the Linux operating system, a secure channel implementation, +and an example controller that performs switching with MAC learning. + +The rest of this file contains the following sections: + + - Description of the directory hierarchy + - Platform support + - Quickstart build and install instructions + - Shortcomings + - References + +Directory Hierarchy +------------------- + + Source: + + datapath/ Linux kernel module implementing an OpenFlow Flow Table + that incoming packets are checked against. The + kernel module uses netlink (a socket protocol for + user-kernel communication, described in RFC 3549) to + pass OpenFlow messages with the secure channel to be + relayed to the controller. + + secchan/ A Secure Channel that connects to a kernel datapath + via netlink and a remote controller via TCP, + relaying OpenFlow packets received on one end to the + other. (The current implementation does not + support SSL, but this will be added in future releases.) + + controller/ A simple controller that connects to a datapath via + a Secure Channel, commanding the datapath to act as + a regular MAC learning switch. + + utilities/ Contains the sorce for "dpctl", a command-line utility + for controlling the OpenFlow datapath kernel module. + With it, you can add physical ports to the datapath, + add flows, monitor received packets, and query the + datapath state. + + include/ Header files common to the datapath, secure channel, + and controller components. + + lib/ Implementation files common to the datapath, secure + channel, and controller components. + + third-party/ Contains third-party software that may be useful for + debugging. Currently, it only contains a patch to + allow tcpdump to parse OpenFlow messages. + + Documentation: + + README Text file describing this OpenFlow implementation, + aka this document. + + INSTALLATION Detailed configure, build, and installation + instructions + + man/ Man pages describing how to administer datapath, + secure channel, and controller. + +Platform support +---------------- + + The datapath kernel module supports Linux 2.6.15 and above, however, + testing has focused on Linux 2.6.23. Support for Linux 2.4.20 and + above is also in place, although testing has focused on Linux 2.6. + + Components have been built and tested on Debian and Ubuntu. + + If you are able to build/run the code on platforms not mentioned + here, or have problems with supported system, please report your + experiences to: + + + + GCC is required for compilation. + + +Building and Installing (Quick Start) +------------------------------------- + + Building the datapath module requires that the source for the + currently running Linux kernel be installed on the machine and + been configured. + + The following instructions assume the Linux 2.6 source is located in + /usr/src/linux-2.6.23 and Linux 2.4 in /usr/src/linux-2.4.35 + + 1. ./configure the package, passing the location of one or more + kernel source trees on the command line: + + For example, if compiling for Linux 2.6: + % ./configure --with-l26=/usr/src/linux-2.6.23 + + Or compiling for Linux 2.4: + % ./configure --with-l24=/usr/src/linux-2.4.35 + + 2. Run make: + + % make + + The following binaries should be built. + + Datapath kernel module: + ./datapath/linux-2.6/openflow_mod.ko (If compiling for Linux 2.6) + ./datapath/linux-2.4/openflow_mod.o (If compiling for Linux 2.4) + + Secure channel executable: + ./secchan/secchan + + Controller executable: + ./controller/controller + + dpctl utility: + ./utility/dpctl + + 3. Optionally you can "make install" to install binaries and the + man pages (/usr/local/ is the default prefix). If you just want + access to the man pages without installing, set your MANPATH to + include the openflow/ source root. + + 4. Insert the datapath kernel module into the running Linux instance. + + (Linux 2.6) + % insmod datapath/linux-2.6/openflow_mod.ko + + (Linux 2.4) + % insmod datapath/linux-2.4/openflow_mod.o + + 5. Create datapaths by running dpctl on the Linux host (see man + dpctl(8)). Start the controller on a remote host with + controller (see man controller(8)). Start the Secure Channel + on the datapath host to connect the datapath to the controller + with secchan(see man secchan(8)). + + For more detailed installation instructions, refer to [3]. + + +Bugs/Shortcomings +----------------- + +- The current flowtable does not support all statistics messages + mentioned in the Type 0 OpenFlow spec +- The secure channel and sample controller don't support SSL +- The flowtable does not support the "normal processing" action +- Configure/build system does not support separate build directory for + the datapath. ./configure must be run from the source root. +- dpctl dump-flows may freeze when large numbers of flows are in the + flow table. This has no affect on the datapath + +References +---------- + + [1] OpenFlow: Enabling Innovation in College Networks. Whitepaper. + + + [2] OpenFlow Switch Specification. + + + [3] Installation Instructions: INSTALL + +Contact +------- + +e-mail: info@openflowswitch.org +www: http://openflowswitch.org/alpha/ diff --git a/acinclude.m4 b/acinclude.m4 new file mode 100644 index 00000000..1cca91a7 --- /dev/null +++ b/acinclude.m4 @@ -0,0 +1,42 @@ +dnl ================================================================================= +dnl Distributed under the terms of the GNU GPL version 2. +dnl Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior University +dnl ================================================================================= + + +dnl -- +dnl CHECK_LINUX(OPTION, VERSION, VARIABLE, CONDITIONAL) +dnl +dnl Configure linux kernel source tree +dnl -- +AC_DEFUN([CHECK_LINUX], [ + AC_ARG_WITH([$1], + [AC_HELP_STRING([--with-$1=/path/to/linux-$3], + [Specify the linux $3 kernel sources])], + [path="$withval"], [path=])dnl + if test -n "$path"; then + path=`eval echo "$path"` + + AC_MSG_CHECKING([for $path directory]) + if test -d "$path"; then + AC_MSG_RESULT([yes]) + $4=$path + AC_SUBST($4) + else + AC_MSG_RESULT([no]) + AC_ERROR([source dir $path doesn't exist]) + fi + + AC_MSG_CHECKING([for $path kernel version]) + version=`grep '^PATCHLEVEL = ' "$path/Makefile" | sed 's/PATCHLEVEL = '//` + AC_MSG_RESULT([2.$version]) + if test "2.$version" != '$3'; then + AC_ERROR([Linux kernel source in $path is not version $3]) + fi + if ! test -e "$path"/include/linux/version.h || \ + ! test -e "$path"/include/linux/autoconf.h; then + AC_MSG_ERROR([Linux kernel source in $path is not configured]) + fi + fi + AM_CONDITIONAL($5, test -n "$path") +]) diff --git a/configure.ac b/configure.ac new file mode 100644 index 00000000..4ec1972a --- /dev/null +++ b/configure.ac @@ -0,0 +1,55 @@ +AC_PREREQ(2.59) +AC_INIT(openflow, v0.1.8, info@openflowswitch.org) +AM_INIT_AUTOMAKE + +AC_PROG_CC +AC_PROG_CPP + +AC_USE_SYSTEM_EXTENSIONS + +AC_PROG_LIBTOOL + +AC_ARG_ENABLE( + [ndebug], + [AC_HELP_STRING([--enable-ndebug], + [Disable debugging features for max performance])], + [case "${enableval}" in # ( + yes) ndebug=true ;; # ( + no) ndebug=false ;; # ( + *) AC_MSG_ERROR([bad value ${enableval} for --enable-ndebug]) ;; + esac], + [ndebug=false]) +AM_CONDITIONAL([NDEBUG], [test x$ndebug = xtrue]) + +CHECK_LINUX(l26, 2.6, 2.6, KSRC26, L26_ENABLED) +CHECK_LINUX(uml, 2.6, 2.6-uml, KSRCUML, UML_ENABLED) +CHECK_LINUX(l24, 2.4, 2.4, KSRC24, L24_ENABLED) + +AC_CHECK_HEADER([linux/netlink.h], + [HAVE_NETLINK=yes], + [HAVE_NETLINK=no], + [#include ]) +AM_CONDITIONAL([HAVE_NETLINK], [test "$HAVE_NETLINK" = yes]) + +AC_CHECK_LIB([socket], [connect]) +AC_CHECK_LIB([resolv], [gethostbyname]) +AC_CHECK_LIB([dl], [dladdr]) + +CFLAGS="$CFLAGS -Wall -Wno-sign-compare" + +AC_CONFIG_FILES([Makefile +man/Makefile +man/man8/Makefile +datapath/Makefile +lib/Makefile +include/Makefile +controller/Makefile +utilities/Makefile +secchan/Makefile +datapath/tests/Makefile +third-party/Makefile +datapath/linux-2.6/Makefile +datapath/linux-2.6-uml/Makefile +datapath/linux-2.4/Makefile]) + +AC_OUTPUT diff --git a/controller/.gitignore b/controller/.gitignore new file mode 100644 index 00000000..14ba0495 --- /dev/null +++ b/controller/.gitignore @@ -0,0 +1,3 @@ +/Makefile +/Makefile.in +/controller diff --git a/controller/Makefile.am b/controller/Makefile.am new file mode 100644 index 00000000..1a22d525 --- /dev/null +++ b/controller/Makefile.am @@ -0,0 +1,6 @@ +include ../Make.vars + +bin_PROGRAMS = controller + +controller_SOURCES = controller.c +controller_LDADD = ../lib/libopenflow.la diff --git a/controller/controller.c b/controller/controller.c new file mode 100644 index 00000000..8e22feab --- /dev/null +++ b/controller/controller.c @@ -0,0 +1,687 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buffer.h" +#include "command-line.h" +#include "compiler.h" +#include "fault.h" +#include "flow.h" +#include "hash.h" +#include "list.h" +#include "mac.h" +#include "ofp-print.h" +#include "openflow.h" +#include "time.h" +#include "util.h" +#include "vconn.h" +#include "vlog-socket.h" +#include "xtoxll.h" + +#include "vlog.h" +#define THIS_MODULE VLM_controller + +#define MAX_SWITCHES 16 +#define MAX_TXQ 128 + +struct switch_ { + char *name; + struct vconn *vconn; + struct pollfd *pollfd; + + uint64_t datapath_id; + time_t last_control_hello; + + int n_txq; + struct buffer *txq, *tx_tail; +}; + +/* -H, --hub: Use dumb hub instead of learning switch? */ +static bool hub = false; + +/* -n, --noflow: Pass traffic, but don't setup flows in switch */ +static bool noflow = false; + +static void parse_options(int argc, char *argv[]); +static void usage(void) NO_RETURN; + +static struct switch_ *connect_switch(const char *name); +static struct switch_ *new_switch(const char *name, struct vconn *); +static void close_switch(struct switch_ *); + +static void queue_tx(struct switch_ *, struct buffer *); + +static void send_control_hello(struct switch_ *); + +static int do_switch_recv(struct switch_ *this); +static int do_switch_send(struct switch_ *this); + +static void process_packet(struct switch_ *, struct buffer *); +static void process_hub(struct switch_ *, struct ofp_packet_in *); +static void process_noflow(struct switch_ *, struct ofp_packet_in *); + +static void switch_init(void); +static void process_switch(struct switch_ *, struct ofp_packet_in *); + +int +main(int argc, char *argv[]) +{ + struct switch_ *switches[MAX_SWITCHES]; + struct pollfd pollfds[MAX_SWITCHES + 1]; + struct vlog_server *vlog_server; + int n_switches; + int retval; + int i; + + set_program_name(argv[0]); + register_fault_handlers(); + vlog_init(); + parse_options(argc, argv); + + if (!hub && !noflow) { + switch_init(); + } + + if (argc - optind < 1) { + fatal(0, "at least one vconn argument required; use --help for usage"); + } + + retval = vlog_server_listen(NULL, &vlog_server); + if (retval) { + fatal(retval, "Could not listen for vlog connections"); + } + + n_switches = 0; + for (i = 0; i < argc - optind; i++) { + struct switch_ *this = connect_switch(argv[optind + i]); + if (this) { + if (n_switches >= MAX_SWITCHES) { + fatal(0, "max %d switch connections", n_switches); + } + switches[n_switches++] = this; + } + } + if (n_switches == 0) { + fatal(0, "could not connect to any switches"); + } + + while (n_switches > 0) { + int retval; + + /* Wait until there's something to do. */ + for (i = 0; i < n_switches; i++) { + struct switch_ *this = switches[i]; + int want; + + if (vconn_is_passive(this->vconn)) { + want = n_switches < MAX_SWITCHES ? WANT_ACCEPT : 0; + } else { + want = WANT_RECV; + if (this->n_txq) { + want |= WANT_SEND; + } + } + + this->pollfd = &pollfds[i]; + this->pollfd->fd = -1; + this->pollfd->events = 0; + vconn_prepoll(this->vconn, want, this->pollfd); + } + if (vlog_server) { + pollfds[n_switches].fd = vlog_server_get_fd(vlog_server); + pollfds[n_switches].events = POLLIN; + } + do { + retval = poll(pollfds, n_switches + (vlog_server != NULL), -1); + } while (retval < 0 && errno == EINTR); + if (retval <= 0) { + fatal(retval < 0 ? errno : 0, "poll"); + } + + /* Let each connection deal with any pending operations. */ + for (i = 0; i < n_switches; i++) { + struct switch_ *this = switches[i]; + vconn_postpoll(this->vconn, &this->pollfd->revents); + if (this->pollfd->revents & POLLERR) { + this->pollfd->revents |= POLLIN | POLLOUT; + } + } + if (vlog_server && pollfds[n_switches].revents) { + vlog_server_poll(vlog_server); + } + + for (i = 0; i < n_switches; ) { + struct switch_ *this = switches[i]; + + if (this->pollfd) { + retval = 0; + if (vconn_is_passive(this->vconn)) { + if (this->pollfd->revents & POLLIN) { + struct vconn *new_vconn; + while (n_switches < MAX_SWITCHES + && (retval = vconn_accept(this->vconn, + &new_vconn)) == 0) { + switches[n_switches++] = new_switch("tcp", + new_vconn); + } + } + } else { + bool may_read = this->pollfd->revents & POLLIN; + bool may_write = this->pollfd->revents & POLLOUT; + if (may_read) { + retval = do_switch_recv(this); + if (!retval || retval == EAGAIN) { + retval = 0; + + /* Enable writing to avoid round trip through poll + * in common case. */ + may_write = true; + } + } + while ((!retval || retval == EAGAIN) && may_write) { + retval = do_switch_send(this); + may_write = !retval; + } + } + + if (retval && retval != EAGAIN) { + close_switch(this); + switches[i] = switches[--n_switches]; + continue; + } + } else { + /* New switch that hasn't been polled yet. */ + } + i++; + } + } + + return 0; +} + +static int +do_switch_recv(struct switch_ *this) +{ + struct buffer *msg; + int retval; + + retval = vconn_recv(this->vconn, &msg); + if (!retval) { + process_packet(this, msg); + buffer_delete(msg); + } + return retval; +} + +static int +do_switch_send(struct switch_ *this) +{ + int retval = 0; + if (this->n_txq) { + struct buffer *next = this->txq->next; + + retval = vconn_send(this->vconn, this->txq); + if (retval) { + return retval; + } + + this->txq = next; + if (this->txq == NULL) { + this->tx_tail = NULL; + } + this->n_txq--; + return 0; + } + return EAGAIN; +} + +struct switch_ * +connect_switch(const char *name) +{ + struct vconn *vconn; + int retval; + + retval = vconn_open(name, &vconn); + if (retval) { + VLOG_ERR("%s: connect: %s", name, strerror(retval)); + return NULL; + } + + return new_switch(name, vconn); +} + +static struct switch_ * +new_switch(const char *name, struct vconn *vconn) +{ + struct switch_ *this = xmalloc(sizeof *this); + memset(this, 0, sizeof *this); + this->name = xstrdup(name); + this->vconn = vconn; + this->pollfd = NULL; + this->n_txq = 0; + this->txq = NULL; + this->tx_tail = NULL; + this->last_control_hello = 0; + if (!vconn_is_passive(vconn)) { + send_control_hello(this); + } + return this; +} + +static void +close_switch(struct switch_ *this) +{ + if (this) { + struct buffer *cur, *next; + + free(this->name); + vconn_close(this->vconn); + for (cur = this->txq; cur != NULL; cur = next) { + next = cur->next; + buffer_delete(cur); + } + free(this); + } +} + +static void +send_control_hello(struct switch_ *this) +{ + time_t now = time(0); + if (now >= this->last_control_hello + 1) { + struct buffer *b; + struct ofp_control_hello *och; + + b = buffer_new(0); + och = buffer_put_uninit(b, sizeof *och); + memset(och, 0, sizeof *och); + och->header.version = OFP_VERSION; + och->header.length = htons(sizeof *och); + + och->version = htonl(OFP_VERSION); + och->flags = htons(OFP_CHELLO_SEND_FLOW_EXP); + och->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); + queue_tx(this, b); + + this->last_control_hello = now; + } +} + +static void +check_txq(struct switch_ *this UNUSED) +{ +#if 0 + struct buffer *iter; + size_t n; + + assert(this->n_txq == 0 + ? this->txq == NULL && this->tx_tail == NULL + : this->txq != NULL && this->tx_tail != NULL); + + n = 0; + for (iter = this->txq; iter != NULL; iter = iter->next) { + n++; + assert((iter->next != NULL) == (iter != this->tx_tail)); + } + assert(n == this->n_txq); +#endif +} + +static void +queue_tx(struct switch_ *this, struct buffer *b) +{ + check_txq(this); + + b->next = NULL; + if (this->n_txq++) { + this->tx_tail->next = b; + } else { + this->txq = b; + } + this->tx_tail = b; + + check_txq(this); +} + +static void +process_packet(struct switch_ *sw, struct buffer *msg) +{ + static const size_t min_size[UINT8_MAX + 1] = { + [0 ... UINT8_MAX] = SIZE_MAX, + [OFPT_CONTROL_HELLO] = sizeof (struct ofp_control_hello), + [OFPT_DATA_HELLO] = sizeof (struct ofp_data_hello), + [OFPT_PACKET_IN] = offsetof (struct ofp_packet_in, data), + [OFPT_PACKET_OUT] = sizeof (struct ofp_packet_out), + [OFPT_FLOW_MOD] = sizeof (struct ofp_flow_mod), + [OFPT_FLOW_EXPIRED] = sizeof (struct ofp_flow_expired), + [OFPT_TABLE] = sizeof (struct ofp_table), + [OFPT_PORT_MOD] = sizeof (struct ofp_port_mod), + [OFPT_PORT_STATUS] = sizeof (struct ofp_port_status), + [OFPT_FLOW_STAT_REQUEST] = sizeof (struct ofp_flow_stat_request), + [OFPT_FLOW_STAT_REPLY] = sizeof (struct ofp_flow_stat_reply), + }; + struct ofp_header *oh; + + oh = msg->data; + if (msg->size < min_size[oh->type]) { + VLOG_WARN("%s: too short (%zu bytes) for type %"PRIu8" (min %zu)", + sw->name, msg->size, oh->type, min_size[oh->type]); + return; + } + + if (oh->type == OFPT_DATA_HELLO) { + struct ofp_data_hello *odh = msg->data; + sw->datapath_id = odh->datapath_id; + } else if (sw->datapath_id == 0) { + send_control_hello(sw); + return; + } + + if (oh->type == OFPT_PACKET_IN) { + if (sw->n_txq >= MAX_TXQ) { + VLOG_WARN("%s: tx queue overflow", sw->name); + } else if (noflow) { + process_noflow(sw, msg->data); + } else if (hub) { + process_hub(sw, msg->data); + } else { + process_switch(sw, msg->data); + } + return; + } + + ofp_print(stdout, msg->data, msg->size, 2); +} + +static void +process_hub(struct switch_ *sw, struct ofp_packet_in *opi) +{ + size_t pkt_ofs, pkt_len; + struct buffer pkt; + struct flow flow; + + /* Extract flow data from 'opi' into 'flow'. */ + pkt_ofs = offsetof(struct ofp_packet_in, data); + pkt_len = ntohs(opi->header.length) - pkt_ofs; + pkt.data = opi->data; + pkt.size = pkt_len; + flow_extract(&pkt, ntohs(opi->in_port), &flow); + + /* Add new flow. */ + queue_tx(sw, make_add_simple_flow(&flow, ntohl(opi->buffer_id), + OFPP_FLOOD)); + + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + queue_tx(sw, make_unbuffered_packet_out(&pkt, ntohs(flow.in_port), + OFPP_FLOOD)); + } +} + +static void +process_noflow(struct switch_ *sw, struct ofp_packet_in *opi) +{ + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + size_t pkt_ofs, pkt_len; + struct buffer pkt; + + /* Extract flow data from 'opi' into 'flow'. */ + pkt_ofs = offsetof(struct ofp_packet_in, data); + pkt_len = ntohs(opi->header.length) - pkt_ofs; + pkt.data = opi->data; + pkt.size = pkt_len; + + queue_tx(sw, make_unbuffered_packet_out(&pkt, ntohs(opi->in_port), + OFPP_FLOOD)); + } else { + queue_tx(sw, make_buffered_packet_out(ntohl(opi->buffer_id), + ntohs(opi->in_port), OFPP_FLOOD)); + } +} + + +#define MAC_HASH_BITS 10 +#define MAC_HASH_MASK (MAC_HASH_SIZE - 1) +#define MAC_HASH_SIZE (1u << MAC_HASH_BITS) + +#define MAC_MAX 1024 + +struct mac_source { + struct list hash_list; + struct list lru_list; + uint64_t datapath_id; + uint8_t mac[ETH_ADDR_LEN]; + uint16_t port; +}; + +static struct list mac_table[MAC_HASH_SIZE]; +static struct list lrus; +static size_t mac_count; + +static void +switch_init(void) +{ + int i; + + list_init(&lrus); + for (i = 0; i < MAC_HASH_SIZE; i++) { + list_init(&mac_table[i]); + } +} + +static struct list * +mac_table_bucket(uint64_t datapath_id, const uint8_t mac[ETH_ADDR_LEN]) +{ + uint32_t hash; + hash = hash_fnv(&datapath_id, sizeof datapath_id, HASH_FNV_BASIS); + hash = hash_fnv(mac, ETH_ADDR_LEN, hash); + return &mac_table[hash & MAC_HASH_BITS]; +} + +static void +process_switch(struct switch_ *sw, struct ofp_packet_in *opi) +{ + size_t pkt_ofs, pkt_len; + struct buffer pkt; + struct flow flow; + + uint16_t out_port; + + /* Extract flow data from 'opi' into 'flow'. */ + pkt_ofs = offsetof(struct ofp_packet_in, data); + pkt_len = ntohs(opi->header.length) - pkt_ofs; + pkt.data = opi->data; + pkt.size = pkt_len; + flow_extract(&pkt, ntohs(opi->in_port), &flow); + + /* Learn the source. */ + if (!mac_is_multicast(flow.dl_src)) { + struct mac_source *src; + struct list *bucket; + bool found; + + bucket = mac_table_bucket(sw->datapath_id, flow.dl_src); + found = false; + LIST_FOR_EACH (src, struct mac_source, hash_list, bucket) { + if (src->datapath_id == sw->datapath_id + && mac_equals(src->mac, flow.dl_src)) { + found = true; + break; + } + } + + if (!found) { + /* Learn a new address. */ + + if (mac_count >= MAC_MAX) { + /* Drop the least recently used mac source. */ + struct mac_source *lru; + lru = CONTAINER_OF(lrus.next, struct mac_source, lru_list); + list_remove(&lru->hash_list); + list_remove(&lru->lru_list); + free(lru); + } else { + mac_count++; + } + + /* Create new mac source */ + src = xmalloc(sizeof *src); + src->datapath_id = sw->datapath_id; + memcpy(src->mac, flow.dl_src, ETH_ADDR_LEN); + src->port = -1; + list_push_front(bucket, &src->hash_list); + list_push_back(&lrus, &src->lru_list); + } else { + /* Make 'src' most-recently-used. */ + list_remove(&src->lru_list); + list_push_back(&lrus, &src->lru_list); + } + + if (ntohs(flow.in_port) != src->port) { + src->port = ntohs(flow.in_port); + VLOG_DBG("learned that "MAC_FMT" is on datapath %"PRIx64" port %d", + MAC_ARGS(src->mac), ntohll(src->datapath_id), + src->port); + } + } else { + VLOG_DBG("multicast packet source "MAC_FMT, MAC_ARGS(flow.dl_src)); + } + + /* Figure out the destination. */ + out_port = OFPP_FLOOD; + if (!mac_is_multicast(flow.dl_dst)) { + struct mac_source *dst; + struct list *bucket; + + bucket = mac_table_bucket(sw->datapath_id, flow.dl_dst); + LIST_FOR_EACH (dst, struct mac_source, hash_list, bucket) { + if (dst->datapath_id == sw->datapath_id + && mac_equals(dst->mac, flow.dl_dst)) { + out_port = dst->port; + break; + } + } + } + + if (out_port != OFPP_FLOOD) { + /* The output port is known, so add a new flow. */ + queue_tx(sw, make_add_simple_flow(&flow, ntohl(opi->buffer_id), + out_port)); + + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (ntohl(opi->buffer_id) == UINT32_MAX) { + queue_tx(sw, make_unbuffered_packet_out(&pkt, ntohs(flow.in_port), + out_port)); + } + } else { + /* We don't know that MAC. Flood the packet. */ + struct buffer *b; + if (ntohl(opi->buffer_id) == UINT32_MAX) { + b = make_unbuffered_packet_out(&pkt, ntohs(flow.in_port), out_port); + } else { + b = make_buffered_packet_out(ntohl(opi->buffer_id), + ntohs(flow.in_port), out_port); + } + queue_tx(sw, b); + } +} + +static void +parse_options(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"hub", no_argument, 0, 'H'}, + {"noflow", no_argument, 0, 'n'}, + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'H': + hub = true; + break; + + case 'n': + noflow = true; + break; + + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: OpenFlow controller\n" + "usage: %s [OPTIONS] VCONN\n" + "where VCONN is one of the following:\n" +#ifdef HAVE_NETLINK + " nl:DP_IDX via netlink to local datapath DP_IDX\n" +#endif + " ptcp:[PORT] listen to TCP PORT (default: %d)\n" + "\nOther options:\n" + " -H, --hub act as hub instead of learning switch\n" + " -n, --noflow pass traffic, but don't add flows\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + program_name, program_name, OFP_TCP_PORT); + exit(EXIT_SUCCESS); +} diff --git a/datapath/.gitignore b/datapath/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/datapath/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/datapath/Makefile.am b/datapath/Makefile.am new file mode 100644 index 00000000..99c09af4 --- /dev/null +++ b/datapath/Makefile.am @@ -0,0 +1,35 @@ +SUBDIRS = tests +if L26_ENABLED +SUBDIRS += linux-2.6 +endif +if UML_ENABLED +SUBDIRS += linux-2.6-uml +endif +if L24_ENABLED +SUBDIRS += linux-2.4 +endif + +EXTRA_DIST = linux-2.6 linux-2.4 linux-2.6-uml\ + datapath.c snap.h chain.c crc32.c crc_t.c\ + flow.h forward.h table-hash.c table-mac.c\ + unit.c unit.h datapath.h chain.h crc32.h\ + flow.c forward.c forward_t.c table.h\ + table-linear.c table_t.c unit-exports.c\ + datapath_t.c datapath_t.h compat.h\ + dp_dev.c + +# Do not include header and source files from the top of the linux-* +# directories, as these are just symbolic links to the files in +# "datapath". +dist-hook: + rm -rf `find $(distdir)/linux-* -name Module.symvers` + rm -rf `find $(distdir)/linux-* -name .*.cmd` + rm -rf `find $(distdir)/linux-* -name .*.swp` + rm -rf `find $(distdir)/linux-* -name .*.d` + rm -rf `find $(distdir)/linux-* -name .tmp_versions` + rm -rf `find $(distdir)/linux-* -name *.o` + rm -rf `find $(distdir)/linux-* -name *.ko` + rm -rf `find $(distdir)/linux-* -name Makefile` + rm -rf `find $(distdir)/linux-* -name .gitignore` + rm -f $(distdir)/linux-*/*.h + rm -f $(distdir)/linux-*/*.c diff --git a/datapath/README b/datapath/README new file mode 100644 index 00000000..e69de29b diff --git a/datapath/chain.c b/datapath/chain.c new file mode 100644 index 00000000..458e9e4f --- /dev/null +++ b/datapath/chain.c @@ -0,0 +1,161 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior University + */ + +#include "chain.h" +#include "flow.h" +#include "table.h" +#include +#include + +/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or + * negative error. If 'table' is null it is assumed that table creation failed + * due to out-of-memory. */ +static int add_table(struct sw_chain *chain, struct sw_table *table) +{ + if (table == NULL) + return -ENOMEM; + if (chain->n_tables >= CHAIN_MAX_TABLES) { + printk("too many tables in chain\n"); + table->destroy(table); + return -ENOBUFS; + } + chain->tables[chain->n_tables++] = table; + return 0; +} + +/* Creates and returns a new chain associated with 'dp'. Returns NULL if the + * chain cannot be created. */ +struct sw_chain *chain_create(struct datapath *dp) +{ + struct sw_chain *chain = kzalloc(sizeof *chain, GFP_KERNEL); + if (chain == NULL) + return NULL; + chain->dp = dp; + + if (add_table(chain, table_mac_create(TABLE_MAC_NUM_BUCKETS, + TABLE_MAC_MAX_FLOWS)) + || add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS, + 0x741B8CD7, TABLE_HASH_MAX_FLOWS)) + || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) { + chain_destroy(chain); + return NULL; + } + + return chain; +} + +/* Searches 'chain' for a flow matching 'key', which must not have any wildcard + * fields. Returns the flow if successful, otherwise a null pointer. + * + * Caller must hold rcu_read_lock, and not release it until it is done with the + * returned flow. */ +struct sw_flow *chain_lookup(struct sw_chain *chain, + const struct sw_flow_key *key) +{ + int i; + + BUG_ON(key->wildcards); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + struct sw_flow *flow = t->lookup(t, key); + if (flow) + return flow; + } + return NULL; +} + +/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if + * successful or a negative error. + * + * If successful, 'flow' becomes owned by the chain, otherwise it is retained + * by the caller. + * + * Caller must hold rcu_read_lock. If insertion is successful, it must not + * release rcu_read_lock until it is done with the inserted flow. */ +int chain_insert(struct sw_chain *chain, struct sw_flow *flow) +{ + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + if (t->insert(t, flow)) + return 0; + } + + return -ENOBUFS; +} + +/* Deletes from 'chain' any and all flows that match 'key'. Returns the number + * of flows that were deleted. + * + * Expensive in the general case as currently implemented, since it requires + * iterating through the entire contents of each table for keys that contain + * wildcards. Relatively cheap for fully specified keys. + * + * The caller need not hold any locks. */ +int chain_delete(struct sw_chain *chain, const struct sw_flow_key *key, int strict) +{ + int count = 0; + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + rcu_read_lock(); + count += t->delete(t, key, strict); + rcu_read_unlock(); + } + + return count; + +} + +/* Performs timeout processing on all the tables in 'chain'. Returns the + * number of flow entries deleted through expiration. + * + * Expensive as currently implemented, since it iterates through the entire + * contents of each table. + * + * The caller need not hold any locks. */ +int chain_timeout(struct sw_chain *chain) +{ + int count = 0; + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + rcu_read_lock(); + count += t->timeout(chain->dp, t); + rcu_read_unlock(); + } + return count; +} + +/* Destroys 'chain', which must not have any users. */ +void chain_destroy(struct sw_chain *chain) +{ + int i; + + synchronize_rcu(); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + t->destroy(t); + } + kfree(chain); +} + +/* Prints statistics for each of the tables in 'chain'. */ +void chain_print_stats(struct sw_chain *chain) +{ + int i; + + printk("\n"); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + struct sw_table_stats stats; + t->stats(t, &stats); + printk("%s: %lu/%lu flows\n", + stats.name, stats.n_flows, stats.max_flows); + } +} diff --git a/datapath/chain.h b/datapath/chain.h new file mode 100644 index 00000000..fc07f513 --- /dev/null +++ b/datapath/chain.h @@ -0,0 +1,31 @@ +#ifndef CHAIN_H +#define CHAIN_H 1 + +struct sw_flow; +struct sw_flow_key; +struct datapath; + + +#define TABLE_LINEAR_MAX_FLOWS 100 +#define TABLE_HASH_MAX_FLOWS 65536 +#define TABLE_MAC_MAX_FLOWS 1024 +#define TABLE_MAC_NUM_BUCKETS 1024 + +/* Set of tables chained together in sequence from cheap to expensive. */ +#define CHAIN_MAX_TABLES 4 +struct sw_chain { + int n_tables; + struct sw_table *tables[CHAIN_MAX_TABLES]; + + struct datapath *dp; +}; + +struct sw_chain *chain_create(struct datapath *); +struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *); +int chain_insert(struct sw_chain *, struct sw_flow *); +int chain_delete(struct sw_chain *, const struct sw_flow_key *, int); +int chain_timeout(struct sw_chain *); +void chain_destroy(struct sw_chain *); +void chain_print_stats(struct sw_chain *); + +#endif /* chain.h */ diff --git a/datapath/compat.h b/datapath/compat.h new file mode 100644 index 00000000..12100ae3 --- /dev/null +++ b/datapath/compat.h @@ -0,0 +1,17 @@ +#ifndef COMPAT_H +#define COMPAT_H 1 + +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +#include "compat26.h" + +#else + +#include "compat24.h" + +#endif + + +#endif /* compat.h */ diff --git a/datapath/crc32.c b/datapath/crc32.c new file mode 100644 index 00000000..a5210ad4 --- /dev/null +++ b/datapath/crc32.c @@ -0,0 +1,40 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "crc32.h" + +void crc32_init(struct crc32 *crc, unsigned int polynomial) +{ + int i; + + for (i = 0; i < CRC32_TABLE_SIZE; ++i) { + unsigned int reg = i << 24; + int j; + for (j = 0; j < CRC32_TABLE_BITS; j++) { + int topBit = (reg & 0x80000000) != 0; + reg <<= 1; + if (topBit) + reg ^= polynomial; + } + crc->table[i] = reg; + } +} + +unsigned int crc32_calculate(const struct crc32 *crc, + const void *data_, size_t n_bytes) +{ + // FIXME: this can be optimized by unrolling, see linux-2.6/lib/crc32.c. + const uint8_t *data = data_; + unsigned int result = 0; + size_t i; + + for (i = 0; i < n_bytes; i++) { + unsigned int top = result >> 24; + top ^= data[i]; + result = (result << 8) ^ crc->table[top]; + } + return result; +} diff --git a/datapath/crc32.h b/datapath/crc32.h new file mode 100644 index 00000000..21a350a9 --- /dev/null +++ b/datapath/crc32.h @@ -0,0 +1,22 @@ +#ifndef CRC32_H +#define CRC32_H 1 + +#include +#ifndef __KERNEL__ +#include +#endif +#include + +#define CRC32_TABLE_BITS 8 +#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS) + +struct crc32 { + unsigned int table[CRC32_TABLE_SIZE]; +}; + +void crc32_init(struct crc32 *, unsigned int polynomial); +unsigned int crc32_calculate(const struct crc32 *, + const void *data_, size_t n_bytes); + + +#endif /* crc32.h */ diff --git a/datapath/crc_t.c b/datapath/crc_t.c new file mode 100644 index 00000000..e01768fc --- /dev/null +++ b/datapath/crc_t.c @@ -0,0 +1,47 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include + +#include "crc32.h" +#include "unit.h" + + +static void +print_error(unsigned int poly, char *data, + unsigned int expected, unsigned int calculated) +{ + unit_fail("crc error: poly=%x data=%s expected=%x calculated=%x\n", + poly, data, expected, calculated); +} + +void +run_crc_t(void) +{ + struct crc32 crc; + unsigned int val, i, j; + + char *data[3] = { "h3rei$@neX@mp13da7@sTr117G0fCH@r$", + "1324lkqasdf0-[LKJD0;asd,.cv;/asd0:\"'~`co29", + "6" }; + + unsigned int polys[2] = { 0x04C11DB7, + 0x1EDC6F41 }; + + unsigned int crc_values[2][3] = { + { 0xDE1040C3, 0x65343A0B, 0xCEB42022 }, + { 0x6C149FAE, 0x470A6B73, 0x4D3AA134 } }; + for (i = 0; i < 2; i++) { + crc32_init(&crc, polys[i]); + for (j = 0; j < 3; j++) { + val = crc32_calculate(&crc, data[j], strlen(data[j])); + if (val != crc_values[i][j]) { + print_error(polys[i], data[j], crc_values[i][j], val); + } + } + } +} diff --git a/datapath/datapath.c b/datapath/datapath.c new file mode 100644 index 00000000..1b5a2cd5 --- /dev/null +++ b/datapath/datapath.c @@ -0,0 +1,1624 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +/* Functions for managing the dp interface/device. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow-netlink.h" +#include "datapath.h" +#include "table.h" +#include "chain.h" +#include "forward.h" +#include "flow.h" +#include "datapath_t.h" + +#include "compat.h" + + +/* Number of seconds between runs of the flow expiration code. */ +#define EXPIRE_SECS 1 + +#define BRIDGE_PORT_NO_FLOOD 0x00000001 + +#define UINT32_MAX 4294967295U + +struct net_bridge_port { + u16 port_no; + u32 flags; + struct datapath *dp; + struct net_device *dev; + struct list_head node; /* Element in datapath.ports. */ +}; + +static struct genl_family dp_genl_family; +static struct genl_multicast_group mc_group; + +int dp_dev_setup(struct net_device *dev); + +/* It's hard to imagine wanting more than one datapath, but... */ +#define DP_MAX 32 + +/* datapaths. Protected on the read side by rcu_read_lock, on the write side + * by dp_mutex. + * + * It is safe to access the datapath and net_bridge_port structures with just + * the dp_mutex, but to access the chain you need to take the rcu_read_lock + * also (because dp_mutex doesn't prevent flows from being destroyed). + */ +static struct datapath *dps[DP_MAX]; +static DEFINE_MUTEX(dp_mutex); + +static void dp_timer_handler(unsigned long arg); +static int send_port_status(struct net_bridge_port *p, uint8_t status); + + +/* nla_unreserve - reduce amount of space reserved by nla_reserve + * @skb: socket buffer from which to recover room + * @nla: netlink attribute to adjust + * @len: amount by which to reduce attribute payload + * + * Reduces amount of space reserved by a call to nla_reserve. + * + * No other attributes may be added between calling nla_reserve and this + * function, since it will create a hole in the message. + */ +void nla_unreserve(struct sk_buff *skb, struct nlattr *nla, int len) +{ + skb->tail -= len; + skb->len -= len; + + nla->nla_len -= len; +} + +/* Generates a unique datapath id. It incorporates the datapath index + * and a hardware address, if available. If not, it generates a random + * one. + */ +static +uint64_t gen_datapath_id(uint16_t dp_idx) +{ + uint64_t id; + int i; + struct net_device *dev; + + /* The top 16 bits are used to identify the datapath. The lower 48 bits + * use an interface address. */ + id = (uint64_t)dp_idx << 48; + if ((dev = dev_get_by_name(&init_net, "ctl0")) + || (dev = dev_get_by_name(&init_net, "eth0"))) { + for (i=0; idev_addr[i] << (8*(ETH_ALEN-1 - i)); + } + dev_put(dev); + } else { + /* Randomly choose the lower 48 bits if we cannot find an + * address and mark the most significant bit to indicate that + * this was randomly generated. */ + uint8_t rand[ETH_ALEN]; + get_random_bytes(rand, ETH_ALEN); + id |= (uint64_t)1 << 63; + for (i=0; i= DP_MAX) + return -EINVAL; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&dp_mutex); + dp = rcu_dereference(dps[dp_idx]); + if (dp != NULL) { + err = -EEXIST; + goto err_unlock; + } + + err = -ENOMEM; + dp = kzalloc(sizeof *dp, GFP_KERNEL); + if (dp == NULL) + goto err_unlock; + + dp->dp_idx = dp_idx; + dp->id = gen_datapath_id(dp_idx); + dp->chain = chain_create(dp); + if (dp->chain == NULL) + goto err_free_dp; + INIT_LIST_HEAD(&dp->port_list); + +#if 0 + /* Setup our "of" device */ + dp->dev.priv = dp; + rtnl_lock(); + err = dp_dev_setup(&dp->dev); + rtnl_unlock(); + if (err != 0) + printk("datapath: problem setting up 'of' device\n"); +#endif + + dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + + setup_timer(&dp->timer, dp_timer_handler, (unsigned long) dp); + mod_timer(&dp->timer, round_jiffies(jiffies + (EXPIRE_SECS * HZ))); + + rcu_assign_pointer(dps[dp_idx], dp); + mutex_unlock(&dp_mutex); + + return 0; + +err_free_dp: + kfree(dp); +err_unlock: + mutex_unlock(&dp_mutex); + module_put(THIS_MODULE); + return err; +} + +/* Find and return a free port number under 'dp'. Called under dp_mutex. */ +static int find_portno(struct datapath *dp) +{ + int i; + for (i = 0; i < OFPP_MAX; i++) + if (dp->ports[i] == NULL) + return i; + return -EXFULL; +} + +static struct net_bridge_port *new_nbp(struct datapath *dp, + struct net_device *dev) +{ + struct net_bridge_port *p; + int port_no; + + port_no = find_portno(dp); + if (port_no < 0) + return ERR_PTR(port_no); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + + p->dp = dp; + dev_hold(dev); + p->dev = dev; + p->port_no = port_no; + + return p; +} + +/* Called with dp_mutex. */ +int add_switch_port(struct datapath *dp, struct net_device *dev) +{ + struct net_bridge_port *p; + + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) + return -EINVAL; + + if (dev->br_port != NULL) + return -EBUSY; + + p = new_nbp(dp, dev); + if (IS_ERR(p)) + return PTR_ERR(p); + + dev_hold(dev); + rcu_assign_pointer(dev->br_port, p); + rtnl_lock(); + dev_set_promiscuity(dev, 1); + rtnl_unlock(); + + rcu_assign_pointer(dp->ports[p->port_no], p); + list_add_rcu(&p->node, &dp->port_list); + + /* Notify the ctlpath that this port has been added */ + send_port_status(p, OFPPR_ADD); + + return 0; +} + +/* Delete 'p' from switch. + * Called with dp_mutex. */ +static int del_switch_port(struct net_bridge_port *p) +{ + /* First drop references to device. */ + rtnl_lock(); + dev_set_promiscuity(p->dev, -1); + rtnl_unlock(); + list_del_rcu(&p->node); + rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + rcu_assign_pointer(p->dev->br_port, NULL); + + /* Then wait until no one is still using it, and destroy it. */ + synchronize_rcu(); + + /* Notify the ctlpath that this port no longer exists */ + send_port_status(p, OFPPR_DELETE); + + dev_put(p->dev); + kfree(p); + + return 0; +} + +/* Called with dp_mutex. */ +static void del_dp(struct datapath *dp) +{ + struct net_bridge_port *p, *n; + +#if 0 + /* Unregister the "of" device of this dp */ + rtnl_lock(); + unregister_netdevice(&dp->dev); + rtnl_unlock(); +#endif + + /* Drop references to DP. */ + list_for_each_entry_safe (p, n, &dp->port_list, node) + del_switch_port(p); + del_timer_sync(&dp->timer); + rcu_assign_pointer(dps[dp->dp_idx], NULL); + + /* Wait until no longer in use, then destroy it. */ + synchronize_rcu(); + chain_destroy(dp->chain); + kfree(dp); + module_put(THIS_MODULE); +} + +static void dp_timer_handler(unsigned long arg) +{ + struct datapath *dp = (struct datapath *) arg; +#if 1 + chain_timeout(dp->chain); +#else + int count = chain_timeout(dp->chain); + chain_print_stats(dp->chain); + if (count) + printk("%d flows timed out\n", count); +#endif + mod_timer(&dp->timer, round_jiffies(jiffies + (EXPIRE_SECS * HZ))); +} + +/* + * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on + * different set of devices!) Returns 0 if *pskb should be processed further, + * 1 if *pskb is handled. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) +/* Called with rcu_read_lock. */ +static struct sk_buff *dp_frame_hook(struct net_bridge_port *p, + struct sk_buff *skb) +{ + struct ethhdr *eh = eth_hdr(skb); + struct sk_buff *skb_local = NULL; + + + if (compare_ether_addr(eh->h_dest, skb->dev->dev_addr) == 0) + return skb; + + if (is_broadcast_ether_addr(eh->h_dest) + || is_multicast_ether_addr(eh->h_dest) + || is_local_ether_addr(eh->h_dest)) + skb_local = skb_clone(skb, GFP_ATOMIC); + + /* Push the Ethernet header back on. */ + if (skb->protocol == htons(ETH_P_8021Q)) + skb_push(skb, VLAN_ETH_HLEN); + else + skb_push(skb, ETH_HLEN); + + fwd_port_input(p->dp->chain, skb, p->port_no); + + return skb_local; +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) +{ + /* Push the Ethernet header back on. */ + if ((*pskb)->protocol == htons(ETH_P_8021Q)) + skb_push(*pskb, VLAN_ETH_HLEN); + else + skb_push(*pskb, ETH_HLEN); + + fwd_port_input(p->dp->chain, *pskb, p->port_no); + return 1; +} +#else +/* NB: This has only been tested on 2.4.35 */ + +/* Called without any locks (?) */ +static void dp_frame_hook(struct sk_buff *skb) +{ + struct net_bridge_port *p = skb->dev->br_port; + + /* Push the Ethernet header back on. */ + if (skb->protocol == htons(ETH_P_8021Q)) + skb_push(skb, VLAN_ETH_HLEN); + else + skb_push(skb, ETH_HLEN); + + if (p) { + rcu_read_lock(); + fwd_port_input(p->dp->chain, skb, p->port_no); + rcu_read_unlock(); + } else + kfree_skb(skb); +} +#endif + +/* Forwarding output path. + * Based on net/bridge/br_forward.c. */ + +/* Don't forward packets to originating port or with flooding disabled */ +static inline int should_deliver(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + if ((skb->dev == p->dev) || (p->flags & BRIDGE_PORT_NO_FLOOD)) { + return 0; + } + + return 1; +} + +static inline unsigned packet_length(const struct sk_buff *skb) +{ + int length = skb->len - ETH_HLEN; + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; + return length; +} + +static int +flood(struct datapath *dp, struct sk_buff *skb) +{ + struct net_bridge_port *p; + int prev_port; + + prev_port = -1; + list_for_each_entry_rcu (p, &dp->port_list, node) { + if (!should_deliver(p, skb)) + continue; + if (prev_port != -1) { + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + kfree_skb(skb); + return -ENOMEM; + } + dp_output_port(dp, clone, prev_port); + } + prev_port = p->port_no; + } + if (prev_port != -1) + dp_output_port(dp, skb, prev_port); + else + kfree_skb(skb); + + return 0; +} + +/* Marks 'skb' as having originated from 'in_port' in 'dp'. + FIXME: how are devices reference counted? */ +int dp_set_origin(struct datapath *dp, uint16_t in_port, + struct sk_buff *skb) +{ + if (in_port < OFPP_MAX && dp->ports[in_port]) { + skb->dev = dp->ports[in_port]->dev; + return 0; + } + return -ENOENT; +} + +/* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'. + */ +int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port) +{ + struct net_bridge_port *p; + int len = skb->len; + + BUG_ON(!skb); + if (out_port == OFPP_FLOOD) + return flood(dp, skb); + else if (out_port == OFPP_CONTROLLER) + return dp_output_control(dp, skb, fwd_save_skb(skb), 0, + OFPR_ACTION); + else if (out_port >= OFPP_MAX) + goto bad_port; + + p = dp->ports[out_port]; + if (p == NULL) + goto bad_port; + + skb->dev = p->dev; + if (packet_length(skb) > skb->dev->mtu) { + printk("dropped over-mtu packet: %d > %d\n", + packet_length(skb), skb->dev->mtu); + kfree_skb(skb); + return -E2BIG; + } + + dev_queue_xmit(skb); + + return len; + +bad_port: + kfree_skb(skb); + if (net_ratelimit()) + printk("can't forward to bad port %d\n", out_port); + return -ENOENT; +} + +/* Takes ownership of 'skb' and transmits it to 'dp''s control path. If + * 'buffer_id' != -1, then only the first 64 bytes of 'skb' are sent; + * otherwise, all of 'skb' is sent. 'reason' indicates why 'skb' is being + * sent. 'max_len' sets the maximum number of bytes that the caller + * wants to be sent; a value of 0 indicates the entire packet should be + * sent. */ +int +dp_output_control(struct datapath *dp, struct sk_buff *skb, + uint32_t buffer_id, size_t max_len, int reason) +{ + /* FIXME? packet_rcv_spkt in net/packet/af_packet.c does some stuff + that we should possibly be doing here too. */ + /* FIXME? Can we avoid creating a new skbuff in the case where we + * forward the whole packet? */ + struct sk_buff *f_skb; + struct nlattr *attr; + struct ofp_packet_in *opi; + size_t opi_len; + size_t len, fwd_len; + void *data; + int err = -ENOMEM; + + fwd_len = skb->len; + if ((buffer_id != (uint32_t) -1) && max_len) + fwd_len = min(fwd_len, max_len); + + len = nla_total_size(offsetof(struct ofp_packet_in, data) + fwd_len) + + nla_total_size(sizeof(uint32_t)); + + f_skb = genlmsg_new(len, GFP_ATOMIC); + if (!f_skb) + goto error_free_skb; + + data = genlmsg_put(f_skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) + goto error_free_f_skb; + + NLA_PUT_U32(f_skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + opi_len = offsetof(struct ofp_packet_in, data) + fwd_len; + attr = nla_reserve(f_skb, DP_GENL_A_OPENFLOW, opi_len); + if (!attr) + goto error_free_f_skb; + opi = nla_data(attr); + opi->header.version = OFP_VERSION; + opi->header.type = OFPT_PACKET_IN; + opi->header.length = htons(opi_len); + opi->header.xid = htonl(0); + + opi->buffer_id = htonl(buffer_id); + opi->total_len = htons(skb->len); + opi->in_port = htons(skb->dev->br_port->port_no); + opi->reason = reason; + SKB_LINEAR_ASSERT(skb); + memcpy(opi->data, skb_mac_header(skb), fwd_len); + + err = genlmsg_end(f_skb, data); + if (err < 0) + goto error_free_f_skb; + + err = genlmsg_multicast(f_skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "dp_output_control: genlmsg_multicast failed: %d\n", err); + + kfree_skb(skb); + + return err; + +nla_put_failure: +error_free_f_skb: + nlmsg_free(f_skb); +error_free_skb: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "dp_output_control: failed to send: %d\n", err); + return err; +} + +static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc) +{ + desc->port_no = htons(p->port_no); + strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN); + desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0'; + memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN); + desc->flags = htonl(p->flags); + desc->features = 0; + desc->speed = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24) + if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) { + struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; + + if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) { + if (ecmd.supported & SUPPORTED_10baseT_Half) + desc->features |= OFPPF_10MB_HD; + if (ecmd.supported & SUPPORTED_10baseT_Full) + desc->features |= OFPPF_10MB_FD; + if (ecmd.supported & SUPPORTED_100baseT_Half) + desc->features |= OFPPF_100MB_HD; + if (ecmd.supported & SUPPORTED_100baseT_Full) + desc->features |= OFPPF_100MB_FD; + if (ecmd.supported & SUPPORTED_1000baseT_Half) + desc->features |= OFPPF_1GB_HD; + if (ecmd.supported & SUPPORTED_1000baseT_Full) + desc->features |= OFPPF_1GB_FD; + /* 10Gbps half-duplex doesn't exist... */ + if (ecmd.supported & SUPPORTED_10000baseT_Full) + desc->features |= OFPPF_10GB_FD; + + desc->features = htonl(desc->features); + desc->speed = htonl(ecmd.speed); + } + } +#endif +} + +static int +fill_data_hello(struct datapath *dp, struct ofp_data_hello *odh) +{ + struct net_bridge_port *p; + int port_count = 0; + + odh->header.version = OFP_VERSION; + odh->header.type = OFPT_DATA_HELLO; + odh->header.xid = htonl(0); + odh->datapath_id = cpu_to_be64(dp->id); + + odh->n_exact = htonl(2 * TABLE_HASH_MAX_FLOWS); + odh->n_mac_only = htonl(TABLE_MAC_MAX_FLOWS); + odh->n_compression = 0; /* Not supported */ + odh->n_general = htonl(TABLE_LINEAR_MAX_FLOWS); + odh->buffer_mb = htonl(UINT32_MAX); + odh->n_buffers = htonl(N_PKT_BUFFERS); + odh->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES); + odh->actions = htonl(OFP_SUPPORTED_ACTIONS); + odh->miss_send_len = htons(dp->miss_send_len); + + list_for_each_entry_rcu (p, &dp->port_list, node) { + fill_port_desc(p, &odh->ports[port_count]); + port_count++; + } + + return port_count; +} + +int +dp_send_hello(struct datapath *dp) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct ofp_data_hello *odh; + size_t odh_max_len, odh_len, port_max_len, len; + void *data; + int err = -ENOMEM; + int port_count; + + + /* Overallocate, since we can't reliably determine the number of + * ports a priori. */ + port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX; + + len = nla_total_size(sizeof(*odh) + port_max_len) + + nla_total_size(sizeof(uint32_t)); + + skb = genlmsg_new(len, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + printk("dp_send_hello: genlmsg_new failed\n"); + goto error; + } + + data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) { + if (net_ratelimit()) + printk("dp_send_hello: genlmsg_put failed\n"); + goto error; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + odh_max_len = sizeof(*odh) + port_max_len; + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, odh_max_len); + if (!attr) { + if (net_ratelimit()) + printk("dp_send_hello: nla_reserve failed\n"); + goto error; + } + odh = nla_data(attr); + port_count = fill_data_hello(dp, odh); + + /* Only now that we know how many ports we've added can we say + * say something about the length. */ + odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count); + odh->header.length = htons(odh_len); + + /* Take back the unused part that was reserved */ + nla_unreserve(skb, attr, (odh_max_len - odh_len)); + + err = genlmsg_end(skb, data); + if (err < 0) { + if (net_ratelimit()) + printk("dp_send_hello: genlmsg_end failed\n"); + goto error; + } + + err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "dp_send_hello: genlmsg_multicast failed: %d\n", err); + + return err; + +nla_put_failure: +error: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "dp_send_hello: failed to send: %d\n", err); + return err; +} + +int +dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp) +{ + struct net_bridge_port *p; + + p = dp->ports[htons(opp->port_no)]; + + /* Make sure the port id hasn't changed since this was sent */ + if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN) != 0) + return -1; + + p->flags = htonl(opp->flags); + + return 0; +} + + +static int +send_port_status(struct net_bridge_port *p, uint8_t status) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct ofp_port_status *ops; + void *data; + int err = -ENOMEM; + + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + printk("send_port_status: genlmsg_new failed\n"); + goto error; + } + + data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) { + if (net_ratelimit()) + printk("send_port_status: genlmsg_put failed\n"); + goto error; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, p->dp->dp_idx); + + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ops)); + if (!attr) { + if (net_ratelimit()) + printk("send_port_status: nla_reserve failed\n"); + goto error; + } + + ops = nla_data(attr); + ops->header.version = OFP_VERSION; + ops->header.type = OFPT_PORT_STATUS; + ops->header.length = htons(sizeof(*ops)); + ops->header.xid = htonl(0); + + ops->reason = status; + fill_port_desc(p, &ops->desc); + + err = genlmsg_end(skb, data); + if (err < 0) { + if (net_ratelimit()) + printk("send_port_status: genlmsg_end failed\n"); + goto error; + } + + err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "send_port_status: genlmsg_multicast failed: %d\n", err); + + return err; + +nla_put_failure: +error: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "send_port_status: failed to send: %d\n", err); + return err; +} + +int +dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct ofp_flow_expired *ofe; + void *data; + unsigned long duration_j; + int err = -ENOMEM; + + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + printk("dp_send_flow_expired: genlmsg_new failed\n"); + goto error; + } + + data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0, + DP_GENL_C_OPENFLOW); + if (data == NULL) { + if (net_ratelimit()) + printk("dp_send_flow_expired: genlmsg_put failed\n"); + goto error; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ofe)); + if (!attr) { + if (net_ratelimit()) + printk("dp_send_flow_expired: nla_reserve failed\n"); + goto error; + } + + ofe = nla_data(attr); + ofe->header.version = OFP_VERSION; + ofe->header.type = OFPT_FLOW_EXPIRED; + ofe->header.length = htons(sizeof(*ofe)); + ofe->header.xid = htonl(0); + + flow_fill_match(&ofe->match, &flow->key); + duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time; + ofe->duration = htonl(duration_j / HZ); + ofe->packet_count = cpu_to_be64(flow->packet_count); + ofe->byte_count = cpu_to_be64(flow->byte_count); + + err = genlmsg_end(skb, data); + if (err < 0) { + if (net_ratelimit()) + printk("dp_send_flow_expired: genlmsg_end failed\n"); + goto error; + } + + err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC); + if (err && net_ratelimit()) + printk(KERN_WARNING "send_flow_expired: genlmsg_multicast failed: %d\n", err); + + return err; + +nla_put_failure: +error: + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_ERR "send_flow_expired: failed to send: %d\n", err); + return err; +} + +/* Generic Netlink interface. + * + * See netlink(7) for an introduction to netlink. See + * http://linux-net.osdl.org/index.php/Netlink for more information and + * pointers on how to work with netlink and Generic Netlink in the kernel and + * in userspace. */ + +static struct genl_family dp_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = DP_GENL_FAMILY_NAME, + .version = 1, + .maxattr = DP_GENL_A_MAX, +}; + +/* Attribute policy: what each attribute may contain. */ +static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 }, + [DP_GENL_A_PORTNAME] = { .type = NLA_STRING } +}; + +static int dp_genl_add(struct sk_buff *skb, struct genl_info *info) +{ + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + return new_dp(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); +} + +static struct genl_ops dp_genl_ops_add_dp = { + .cmd = DP_GENL_C_ADD_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_add, + .dumpit = NULL, +}; + +struct datapath *dp_get(int dp_idx) +{ + if (dp_idx < 0 || dp_idx > DP_MAX) + return NULL; + return rcu_dereference(dps[dp_idx]); +} + +static int dp_genl_del(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + int err; + + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + mutex_lock(&dp_mutex); + dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]))); + if (!dp) + err = -ENOENT; + else { + del_dp(dp); + err = 0; + } + mutex_unlock(&dp_mutex); + return err; +} + +static struct genl_ops dp_genl_ops_del_dp = { + .cmd = DP_GENL_C_DEL_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_del, + .dumpit = NULL, +}; + +/* Queries a datapath for related information. Currently the only relevant + * information is the datapath's multicast group ID. Really we want one + * multicast group per datapath, but because of locking issues[*] we can't + * easily get one. Thus, every datapath will currently return the same + * global multicast group ID, but in the future it would be nice to fix that. + * + * [*] dp_genl_add, to add a new datapath, is called under the genl_lock + * mutex, and genl_register_mc_group, called to acquire a new multicast + * group ID, also acquires genl_lock, thus deadlock. + */ +static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + struct sk_buff *ans_skb = NULL; + int dp_idx; + int err = -ENOMEM; + + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + rcu_read_lock(); + dp_idx = nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])); + dp = dp_get(dp_idx); + if (!dp) + err = -ENOENT; + else { + void *data; + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ans_skb) { + err = -ENOMEM; + goto err; + } + data = genlmsg_put_reply(ans_skb, info, &dp_genl_family, + 0, DP_GENL_C_QUERY_DP); + if (data == NULL) { + err = -ENOMEM; + goto err; + } + NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id); + + genlmsg_end(ans_skb, data); + err = genlmsg_reply(ans_skb, info); + if (!err) + ans_skb = NULL; + } +err: +nla_put_failure: + if (ans_skb) + kfree_skb(ans_skb); + rcu_read_unlock(); + return err; +} + +/* + * Fill flow entry for nl flow query. Called with rcu_lock + * + */ +static +int +dp_fill_flow(struct ofp_flow_mod* ofm, struct swt_iterator* iter) +{ + ofm->header.version = OFP_VERSION; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(sizeof(struct ofp_flow_mod) + + sizeof(ofm->actions[0])); + ofm->header.xid = htonl(0); + + ofm->match.wildcards = htons(iter->flow->key.wildcards); + ofm->match.in_port = iter->flow->key.in_port; + ofm->match.dl_vlan = iter->flow->key.dl_vlan; + memcpy(ofm->match.dl_src, iter->flow->key.dl_src, ETH_ALEN); + memcpy(ofm->match.dl_dst, iter->flow->key.dl_dst, ETH_ALEN); + ofm->match.dl_type = iter->flow->key.dl_type; + ofm->match.nw_src = iter->flow->key.nw_src; + ofm->match.nw_dst = iter->flow->key.nw_dst; + ofm->match.nw_proto = iter->flow->key.nw_proto; + ofm->match.tp_src = iter->flow->key.tp_src; + ofm->match.tp_dst = iter->flow->key.tp_dst; + ofm->group_id = iter->flow->group_id; + ofm->max_idle = iter->flow->max_idle; + /* TODO support multiple actions */ + ofm->actions[0] = iter->flow->actions[0]; + + return 0; +} + +static int dp_genl_show(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + int err = -ENOMEM; + struct sk_buff *ans_skb = NULL; + void *data; + struct nlattr *attr; + struct ofp_data_hello *odh; + size_t odh_max_len, odh_len, port_max_len, len; + int port_count; + + if (!info->attrs[DP_GENL_A_DP_IDX]) + return -EINVAL; + + mutex_lock(&dp_mutex); + dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]))); + if (!dp) + goto error; + + /* Overallocate, since we can't reliably determine the number of + * ports a priori. */ + port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX; + + len = nla_total_size(sizeof(*odh) + port_max_len) + + nla_total_size(sizeof(uint32_t)); + + ans_skb = nlmsg_new(len, GFP_KERNEL); + if (!ans_skb) + goto error; + + data = genlmsg_put_reply(ans_skb, info, &dp_genl_family, + 0, DP_GENL_C_SHOW_DP); + if (data == NULL) + goto error; + + NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx); + + odh_max_len = sizeof(*odh) + port_max_len; + attr = nla_reserve(ans_skb, DP_GENL_A_DP_INFO, odh_max_len); + if (!attr) + goto error; + odh = nla_data(attr); + port_count = fill_data_hello(dp, odh); + + /* Only now that we know how many ports we've added can we say + * say something about the length. */ + odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count); + odh->header.length = htons(odh_len); + + /* Take back the unused part that was reserved */ + nla_unreserve(ans_skb, attr, (odh_max_len - odh_len)); + + genlmsg_end(ans_skb, data); + err = genlmsg_reply(ans_skb, info); + if (!err) + ans_skb = NULL; + +error: +nla_put_failure: + if (ans_skb) + kfree_skb(ans_skb); + mutex_unlock(&dp_mutex); + return err; +} + +static struct genl_ops dp_genl_ops_show_dp = { + .cmd = DP_GENL_C_SHOW_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_show, + .dumpit = NULL, +}; + +/* Convenience function */ +static +void* +dp_init_nl_flow_msg(uint32_t dp_idx, uint16_t table_idx, + struct genl_info *info, struct sk_buff* skb) +{ + void* data; + + data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, + DP_GENL_C_QUERY_FLOW); + if (data == NULL) + return NULL; + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U16(skb, DP_GENL_A_TABLEIDX, table_idx); + + return data; + +nla_put_failure: + return NULL; +} + +/* Iterate through the specified table and send all flow entries over + * netlink to userspace. Each flow message has the following format: + * + * 32bit dpix + * 16bit tabletype + * 32bit number of flows + * openflow-flow-entries + * + * The full table may require multiple messages. A message with 0 flows + * signifies end-of message. + */ + +static +int +dp_dump_table(struct datapath *dp, uint16_t table_idx, struct genl_info *info, struct ofp_flow_mod* matchme) +{ + struct sk_buff *skb = 0; + struct sw_table *table = 0; + struct swt_iterator iter; + struct sw_flow_key in_flow; + struct nlattr *attr; + int count = 0, sum_count = 0; + void *data; + uint8_t* ofm_ptr = 0; + struct nlattr *num_attr; + int err = -ENOMEM; + + table = dp->chain->tables[table_idx]; + if ( table == NULL ) { + dprintk("dp::dp_dump_table error, non-existant table at position %d\n", table_idx); + return -EINVAL; + } + + if (!table->iterator(table, &iter)) { + dprintk("dp::dp_dump_table couldn't initialize empty table iterator\n"); + return -ENOMEM; + } + + while (iter.flow) { + + /* verify that we can fit all NL_FLOWS_PER_MESSAGE in a single + * sk_buf */ + if( (sizeof(dp_genl_family) + sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + + (NL_FLOWS_PER_MESSAGE * sizeof(struct ofp_flow_mod))) > (8192 - 64)){ + dprintk("dp::dp_dump_table NL_FLOWS_PER_MESSAGE may cause overrun in skbuf\n"); + return -ENOMEM; + } + + skb = nlmsg_new(8192 - 64, GFP_ATOMIC); + if (skb == NULL) { + return -ENOMEM; + } + + data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb); + if (data == NULL){ + err= -ENOMEM; + goto error_free_skb; + } + + /* reserve space to put the number of flows for this message, to + * be filled after the loop*/ + num_attr = nla_reserve(skb, DP_GENL_A_NUMFLOWS, sizeof(uint32_t)); + if(!num_attr){ + err = -ENOMEM; + goto error_free_skb; + } + + /* Only load NL_FLOWS_PER_MESSAGE flows at a time */ + attr = nla_reserve(skb, DP_GENL_A_FLOW, + (sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action)) * NL_FLOWS_PER_MESSAGE); + if (!attr){ + err = -ENOMEM; + goto error_free_skb; + } + + /* internal loop to fill NL_FLOWS_PER_MESSAGE flows */ + ofm_ptr = nla_data(attr); + flow_extract_match(&in_flow, &matchme->match); + while (iter.flow && count < NL_FLOWS_PER_MESSAGE) { + if(flow_matches(&in_flow, &iter.flow->key)){ + if((err = dp_fill_flow((struct ofp_flow_mod*)ofm_ptr, &iter))) + goto error_free_skb; + count++; + /* TODO support multiple actions */ + ofm_ptr += sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action); + } + table->iterator_next(&iter); + } + + *((uint32_t*)nla_data(num_attr)) = count; + genlmsg_end(skb, data); + + sum_count += count; + count = 0; + + err = genlmsg_unicast(skb, info->snd_pid); + skb = 0; + } + + /* send a sentinal message saying we're done */ + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) { + return -ENOMEM; + } + data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb); + if (data == NULL){ + err= -ENOMEM; + goto error_free_skb; + } + + NLA_PUT_U32(skb, DP_GENL_A_NUMFLOWS, 0); + /* dummy flow so nl doesn't complain */ + attr = nla_reserve(skb, DP_GENL_A_FLOW, sizeof(struct ofp_flow_mod)); + if (!attr){ + err = -ENOMEM; + goto error_free_skb; + } + genlmsg_end(skb, data); + err = genlmsg_reply(skb, info); skb = 0; + +nla_put_failure: +error_free_skb: + if(skb) + kfree_skb(skb); + return err; +} + +/* Helper function to query_table which creates and sends a message packed with + * table stats. Message form is: + * + * u32 DP_IDX + * u32 NUM_TABLES + * OFP_TABLE (list of OFP_TABLES) + * + */ + +static +int +dp_dump_table_stats(struct datapath *dp, int dp_idx, struct genl_info *info) +{ + struct sk_buff *skb = 0; + struct ofp_table *ot = 0; + struct nlattr *attr; + struct sw_table_stats stats; + void *data; + int err = -ENOMEM; + int i = 0; + int nt = dp->chain->n_tables; + + /* u32 IDX, u32 NUMTABLES, list-of-tables */ + skb = nlmsg_new(4 + 4 + (sizeof(struct ofp_table) * nt), GFP_ATOMIC); + if (skb == NULL) { + return -ENOMEM; + } + + data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, + DP_GENL_C_QUERY_TABLE); + if (data == NULL){ + return -ENOMEM; + } + + NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U32(skb, DP_GENL_A_NUMTABLES, nt); + + /* ... we assume that all tables can fit in a single message. + * Probably a reasonable assumption seeing that we only have + * 3 atm */ + attr = nla_reserve(skb, DP_GENL_A_TABLE, (sizeof(struct ofp_table) * nt)); + if (!attr){ + err = -ENOMEM; + goto error_free_skb; + } + + ot = nla_data(attr); + + for (i = 0; i < nt; ++i) { + dp->chain->tables[i]->stats(dp->chain->tables[i], &stats); + ot->header.version = OFP_VERSION; + ot->header.type = OFPT_TABLE; + ot->header.length = htons(sizeof(struct ofp_table)); + ot->header.xid = htonl(0); + + strncpy(ot->name, stats.name, OFP_MAX_TABLE_NAME_LEN); + ot->table_id = htons(i); + ot->n_flows = htonl(stats.n_flows); + ot->max_flows = htonl(stats.max_flows); + ot++; + } + + + genlmsg_end(skb, data); + err = genlmsg_reply(skb, info); skb = 0; + +nla_put_failure: +error_free_skb: + if(skb) + kfree_skb(skb); + return err; +} + +/* + * Queries a datapath for flow-table statistics + */ + + +static int dp_genl_table_query(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath* dp; + int err = 0; + + if (!info->attrs[DP_GENL_A_DP_IDX]) { + dprintk("dp::dp_genl_table_query received message with missing attributes\n"); + return -EINVAL; + } + + rcu_read_lock(); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto err_out; + } + + err = dp_dump_table_stats(dp, nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]), info); + +err_out: + rcu_read_unlock(); + return err; +} + +/* + * Queries a datapath for flow-table entries. + */ + +static int dp_genl_flow_query(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath* dp; + struct ofp_flow_mod* ofm; + u16 table_idx; + int err = 0; + + if (!info->attrs[DP_GENL_A_DP_IDX] + || !info->attrs[DP_GENL_A_TABLEIDX] + || !info->attrs[DP_GENL_A_FLOW]) { + dprintk("dp::dp_genl_flow_query received message with missing attributes\n"); + return -EINVAL; + } + + rcu_read_lock(); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto err_out; + } + + table_idx = nla_get_u16(info->attrs[DP_GENL_A_TABLEIDX]); + + if (dp->chain->n_tables <= table_idx){ + printk("table index %d invalid (dp has %d tables)\n", + table_idx, dp->chain->n_tables); + err = -EINVAL; + goto err_out; + } + + ofm = nla_data(info->attrs[DP_GENL_A_FLOW]); + err = dp_dump_table(dp, table_idx, info, ofm); + +err_out: + rcu_read_unlock(); + return err; +} + +static struct nla_policy dp_genl_flow_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_TABLEIDX] = { .type = NLA_U16 }, + [DP_GENL_A_NUMFLOWS] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_query_flow = { + .cmd = DP_GENL_C_QUERY_FLOW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_flow_policy, + .doit = dp_genl_flow_query, + .dumpit = NULL, +}; + +static struct nla_policy dp_genl_table_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_query_table = { + .cmd = DP_GENL_C_QUERY_TABLE, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_table_policy, + .doit = dp_genl_table_query, + .dumpit = NULL, +}; + + +static struct genl_ops dp_genl_ops_query_dp = { + .cmd = DP_GENL_C_QUERY_DP, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_query, + .dumpit = NULL, +}; + +static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + struct net_device *port; + int err; + + if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_PORTNAME]) + return -EINVAL; + + /* Get datapath. */ + mutex_lock(&dp_mutex); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto out; + } + + /* Get interface to add/remove. */ + port = dev_get_by_name(&init_net, + nla_data(info->attrs[DP_GENL_A_PORTNAME])); + if (!port) { + err = -ENOENT; + goto out; + } + + /* Execute operation. */ + if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT) + err = add_switch_port(dp, port); + else { + if (port->br_port == NULL || port->br_port->dp != dp) { + err = -ENOENT; + goto out_put; + } + err = del_switch_port(port->br_port); + } + +out_put: + dev_put(port); +out: + mutex_unlock(&dp_mutex); + return err; +} + +static struct genl_ops dp_genl_ops_add_port = { + .cmd = DP_GENL_C_ADD_PORT, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_add_del_port, + .dumpit = NULL, +}; + +static struct genl_ops dp_genl_ops_del_port = { + .cmd = DP_GENL_C_DEL_PORT, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_policy, + .doit = dp_genl_add_del_port, + .dumpit = NULL, +}; + +static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW]; + struct datapath *dp; + int err; + + if (!info->attrs[DP_GENL_A_DP_IDX] || !va) + return -EINVAL; + + rcu_read_lock(); + dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (!dp) { + err = -ENOENT; + goto out; + } + + va = info->attrs[DP_GENL_A_OPENFLOW]; + + err = fwd_control_input(dp->chain, nla_data(va), nla_len(va)); + +out: + rcu_read_unlock(); + return err; +} + +static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_openflow = { + .cmd = DP_GENL_C_OPENFLOW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_openflow_policy, + .doit = dp_genl_openflow, + .dumpit = NULL, +}; + +static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = { + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_NPACKETS] = { .type = NLA_U32 }, + [DP_GENL_A_PSIZE] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_genl_ops_benchmark_nl = { + .cmd = DP_GENL_C_BENCHMARK_NL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = dp_genl_benchmark_policy, + .doit = dp_genl_benchmark_nl, + .dumpit = NULL, +}; + +static struct genl_ops *dp_genl_all_ops[] = { + /* Keep this operation first. Generic Netlink dispatching + * looks up operations with linear search, so we want it at the + * front. */ + &dp_genl_ops_openflow, + + &dp_genl_ops_query_flow, + &dp_genl_ops_query_table, + &dp_genl_ops_show_dp, + &dp_genl_ops_add_dp, + &dp_genl_ops_del_dp, + &dp_genl_ops_query_dp, + &dp_genl_ops_add_port, + &dp_genl_ops_del_port, + &dp_genl_ops_benchmark_nl, +}; + +static int dp_init_netlink(void) +{ + int err; + int i; + + err = genl_register_family(&dp_genl_family); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) { + err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]); + if (err) + goto err_unregister; + } + + strcpy(mc_group.name, "openflow"); + err = genl_register_mc_group(&dp_genl_family, &mc_group); + if (err < 0) + goto err_unregister; + + return 0; + +err_unregister: + genl_unregister_family(&dp_genl_family); + return err; +} + +static void dp_uninit_netlink(void) +{ + genl_unregister_family(&dp_genl_family); +} + +#define DRV_NAME "openflow" +#define DRV_VERSION VERSION +#define DRV_DESCRIPTION "OpenFlow switching datapath implementation" +#define DRV_COPYRIGHT "Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior University" + + +static int __init dp_init(void) +{ + int err; + + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + printk(KERN_INFO DRV_NAME ": " VERSION" built on "__DATE__" "__TIME__"\n"); + printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n"); + + err = flow_init(); + if (err) + goto error; + + err = dp_init_netlink(); + if (err) + goto error_flow_exit; + + /* Hook into callback used by the bridge to intercept packets. + * Parasites we are. */ + if (br_handle_frame_hook) + printk("openflow: hijacking bridge hook\n"); + br_handle_frame_hook = dp_frame_hook; + + return 0; + +error_flow_exit: + flow_exit(); +error: + printk(KERN_EMERG "openflow: failed to install!"); + return err; +} + +static void dp_cleanup(void) +{ + fwd_exit(); + dp_uninit_netlink(); + flow_exit(); + br_handle_frame_hook = NULL; +} + +module_init(dp_init); +module_exit(dp_cleanup); + +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR(DRV_COPYRIGHT); +MODULE_LICENSE("GPL"); diff --git a/datapath/datapath.h b/datapath/datapath.h new file mode 100644 index 00000000..cba2b793 --- /dev/null +++ b/datapath/datapath.h @@ -0,0 +1,72 @@ +/* Interface exported by OpenFlow module. */ + +#ifndef DATAPATH_H +#define DATAPATH_H 1 + +#include +#include +#include +#include "openflow.h" +#include "flow.h" + + +#define NL_FLOWS_PER_MESSAGE 100 + +#ifdef NDEBUG +#define dprintk(x...) +#else +#define dprintk(x...) printk(x) +#endif + +/* Capabilities supported by this implementation. */ +#define OFP_SUPPORTED_CAPABILITIES (OFPC_MULTI_PHY_TX) + +/* Actions supported by this implementation. */ +#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ + | (1 << OFPAT_SET_DL_VLAN) \ + | (1 << OFPAT_SET_DL_SRC) \ + | (1 << OFPAT_SET_DL_DST) \ + | (1 << OFPAT_SET_NW_SRC) \ + | (1 << OFPAT_SET_NW_DST) \ + | (1 << OFPAT_SET_TP_SRC) \ + | (1 << OFPAT_SET_TP_DST) ) + +struct sk_buff; + +struct datapath { + int dp_idx; + + /* Unique identifier for this datapath, incorporates the dp_idx and + * a hardware address */ + uint64_t id; + + struct timer_list timer; /* Expiration timer. */ + struct sw_chain *chain; /* Forwarding rules. */ + + /* Data related to the "of" device of this datapath */ + struct net_device dev; + struct net_device_stats stats; + + /* Flags from the control hello message */ + uint16_t hello_flags; + + /* Maximum number of bytes that should be sent for flow misses */ + uint16_t miss_send_len; + + /* Switch ports. */ + struct net_bridge_port *ports[OFPP_MAX]; + struct list_head port_list; /* List of ports, for flooding. */ +}; + +int dp_output_port(struct datapath *, struct sk_buff *, int out_port); +int dp_output_control(struct datapath *, struct sk_buff *, + uint32_t buffer_id, size_t max_len, int reason); +int dp_set_origin(struct datapath *, uint16_t, struct sk_buff *); +int dp_send_hello(struct datapath *); +int dp_send_flow_expired(struct datapath *, struct sw_flow *); +int dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp); + +/* Should hold at least RCU read lock when calling */ +struct datapath *dp_get(int dp_idx); + +#endif /* datapath.h */ diff --git a/datapath/datapath_t.c b/datapath/datapath_t.c new file mode 100644 index 00000000..33a64a60 --- /dev/null +++ b/datapath/datapath_t.c @@ -0,0 +1,118 @@ +#include "datapath_t.h" +#include +#include +#include +#include +#include +#include + +#include "datapath.h" + +static struct sk_buff * +gen_sk_buff(struct datapath *dp, uint32_t packet_size) +{ + int in_port; + struct sk_buff *skb; + struct ethhdr *eh; + struct iphdr *ih; + struct udphdr *uh; + + for (in_port = 0; in_port < OFPP_MAX; in_port++) { + if (dp->ports[in_port] != NULL) + break; + } + + if (in_port == OFPP_MAX) { + printk("benchmark: no in_port to send packets as\n"); + return NULL; + } + + skb = alloc_skb(packet_size, GFP_ATOMIC); + if (!skb) { + printk("benchmark: cannot allocate skb for benchmark\n"); + return NULL; + } + + skb_put(skb, packet_size); + skb_set_mac_header(skb, 0); + eh = eth_hdr(skb); + memcpy(eh->h_dest, "\x12\x34\x56\x78\x9a\xbc", ETH_ALEN); + memcpy(eh->h_source, "\xab\xcd\xef\x12\x34\x56", ETH_ALEN); + eh->h_proto = htons(ETH_P_IP); + skb_set_network_header(skb, sizeof(*eh)); + ih = ip_hdr(skb); + ih->ihl = 5; + ih->version = IPVERSION; + ih->tos = 0; + ih->tot_len = htons(packet_size - sizeof(*eh)); + ih->id = htons(12345); + ih->frag_off = 0; + ih->ttl = IPDEFTTL; + ih->protocol = IPPROTO_UDP; + ih->check = 0; /* want this to be right?! */ + ih->saddr = 0x12345678; + ih->daddr = 0x1234abcd; + skb_set_transport_header(skb, sizeof(*eh) + sizeof(*ih)); + uh = udp_hdr(skb); + uh->source = htons(1234); + uh->dest = htons(5678); + uh->len = htons(packet_size - sizeof(*eh) - sizeof(*ih)); + uh->check = 0; + if (dp_set_origin(dp, in_port, skb)) { + printk("benchmark: could not set origin\n"); + kfree_skb(skb); + return NULL; + } + + return skb; +} + +int +dp_genl_benchmark_nl(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + uint32_t num_packets = 0; + int i, err = 0; + struct sk_buff *skb2; + + if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_NPACKETS] + || !info->attrs[DP_GENL_A_PSIZE]) + return -EINVAL; + + num_packets = nla_get_u32((info->attrs[DP_GENL_A_NPACKETS])); + + rcu_read_lock(); + dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]))); + if (!dp) + err = -ENOENT; + else { + if (num_packets == 0) + goto benchmark_unlock; + + skb2 = gen_sk_buff(dp, nla_get_u32((info->attrs[DP_GENL_A_PSIZE]))); + if (skb2 == NULL) { + err = -ENOMEM; + goto benchmark_unlock; + } + + for (i = 0; i < num_packets; i++) { + struct sk_buff *copy = skb_get(skb2); + if (copy == NULL) { + printk("benchmark: skb_get failed\n"); + err = -ENOMEM; + break; + } + if ((err = dp_output_control(dp, copy, -1, + 0, OFPR_ACTION))) + { + printk("benchmark: output control ret %d on iter %d\n", err, i); + break; + } + } + kfree_skb(skb2); + } + +benchmark_unlock: + rcu_read_unlock(); + return err; +} diff --git a/datapath/datapath_t.h b/datapath/datapath_t.h new file mode 100644 index 00000000..868e734d --- /dev/null +++ b/datapath/datapath_t.h @@ -0,0 +1,12 @@ +#ifndef DATAPATH_T_H +#define DATAPATH_T_H 1 + +#include +#include +#include +#include +#include "openflow-netlink.h" + +int dp_genl_benchmark_nl(struct sk_buff *, struct genl_info *); + +#endif diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c new file mode 100644 index 00000000..e2ebf5a4 --- /dev/null +++ b/datapath/dp_dev.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include + +#include "datapath.h" +#include "forward.h" + +static int dp_dev_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + printk("xxx_do_ioctl called\n"); + return 0; +} + +static struct net_device_stats *dp_dev_get_stats(struct net_device *dev) +{ + struct datapath *dp = netdev_priv(dev); + return &dp->stats; +} + +int dp_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct datapath *dp = netdev_priv(dev); + + printk("xxx dp_dev_xmit not implemented yet!\n"); + return 0; + + printk("xxx_xmit called send to dp_frame_hook\n"); + + rcu_read_lock(); /* xxx Only for 2.4 kernels? */ + fwd_port_input(dp->chain, skb, OFPP_LOCAL); + rcu_read_unlock(); /* xxx Only for 2.4 kernels? */ + + return 0; +} + +static int dp_dev_open(struct net_device *dev) +{ + netif_start_queue(dev); + return 0; +} + +static void dp_dev_set_multicast_list(struct net_device *dev) +{ + printk("xxx_set_multi called\n"); +} + +static int dp_dev_stop(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +int dp_dev_setup(struct net_device *dev) +{ + int err; + + strncpy(dev->name, "of%d", IFNAMSIZ); + err = dev_alloc_name(dev, dev->name); + if (err < 0) + return err; + + dev->do_ioctl = dp_dev_do_ioctl; + dev->get_stats = dp_dev_get_stats; + dev->hard_start_xmit = dp_dev_xmit; + dev->open = dp_dev_open; + dev->set_multicast_list = dp_dev_set_multicast_list; + dev->stop = dp_dev_stop; + dev->tx_queue_len = 0; + dev->set_mac_address = NULL; + + dev->flags = IFF_BROADCAST | IFF_NOARP | IFF_MULTICAST; + + random_ether_addr(dev->dev_addr); + + ether_setup(dev); + return register_netdevice(dev); +} diff --git a/datapath/flow.c b/datapath/flow.c new file mode 100644 index 00000000..5aa726d8 --- /dev/null +++ b/datapath/flow.c @@ -0,0 +1,311 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "flow.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow.h" +#include "compat.h" +#include "snap.h" + +struct kmem_cache *flow_cache; + +/* Internal function used to compare fields in flow. */ +static inline +int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b, + uint16_t w) +{ + return ((w & OFPFW_IN_PORT || a->in_port == b->in_port) + && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan) + && (w & OFPFW_DL_SRC || !memcmp(a->dl_src, b->dl_src, ETH_ALEN)) + && (w & OFPFW_DL_DST || !memcmp(a->dl_dst, b->dl_dst, ETH_ALEN)) + && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type) + && (w & OFPFW_NW_SRC || a->nw_src == b->nw_src) + && (w & OFPFW_NW_DST || a->nw_dst == b->nw_dst) + && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto) + && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src) + && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst)); +} + +/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal + * modulo wildcards, zero otherwise. */ +inline +int flow_matches(const struct sw_flow_key *a, const struct sw_flow_key *b) +{ + return flow_fields_match(a, b, (a->wildcards | b->wildcards)); +} + +/* Returns nonzero if 't' (the table entry's key) and 'd' (the key + * describing the deletion) match, that is, if their fields are + * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the + * wildcards must match in both 't_key' and 'd_key'. Note that the + * table's wildcards are ignored unless 'strict' is set. */ +inline +int flow_del_matches(const struct sw_flow_key *t, const struct sw_flow_key *d, int strict) +{ + if (strict && (t->wildcards != d->wildcards)) + return 0; + + return flow_fields_match(t, d, d->wildcards); +} + +void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from) +{ + to->wildcards = ntohs(from->wildcards) & OFPFW_ALL; + to->in_port = from->in_port; + to->dl_vlan = from->dl_vlan; + memcpy(to->dl_src, from->dl_src, ETH_ALEN); + memcpy(to->dl_dst, from->dl_dst, ETH_ALEN); + to->dl_type = from->dl_type; + to->nw_src = from->nw_src; + to->nw_dst = from->nw_dst; + to->nw_proto = from->nw_proto; + to->tp_src = from->tp_src; + to->tp_dst = from->tp_dst; + memset(to->pad, '\0', sizeof(to->pad)); +} + +void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from) +{ + to->wildcards = htons(from->wildcards); + to->in_port = from->in_port; + to->dl_vlan = from->dl_vlan; + memcpy(to->dl_src, from->dl_src, ETH_ALEN); + memcpy(to->dl_dst, from->dl_dst, ETH_ALEN); + to->dl_type = from->dl_type; + to->nw_src = from->nw_src; + to->nw_dst = from->nw_dst; + to->nw_proto = from->nw_proto; + to->tp_src = from->tp_src; + to->tp_dst = from->tp_dst; + memset(to->pad, '\0', sizeof(to->pad)); +} + +/* Returns true if 'flow' can be deleted and set up for a deferred free, false + * if deletion has already been scheduled (by another thread). + * + * Caller must hold rcu_read_lock. */ +int flow_del(struct sw_flow *flow) +{ + return !atomic_cmpxchg(&flow->deleted, 0, 1); +} + +/* Allocates and returns a new flow with 'n_actions' action, using allocation + * flags 'flags'. Returns the new flow or a null pointer on failure. */ +struct sw_flow *flow_alloc(int n_actions, gfp_t flags) +{ + struct sw_flow *flow = kmem_cache_alloc(flow_cache, flags); + if (unlikely(!flow)) + return NULL; + + flow->n_actions = n_actions; + flow->actions = kmalloc(n_actions * sizeof *flow->actions, + flags); + if (unlikely(!flow->actions) && n_actions > 0) { + kmem_cache_free(flow_cache, flow); + return NULL; + } + return flow; +} + +/* Frees 'flow' immediately. */ +void flow_free(struct sw_flow *flow) +{ + if (unlikely(!flow)) + return; + kfree(flow->actions); + kmem_cache_free(flow_cache, flow); +} + +/* RCU callback used by flow_deferred_free. */ +static void rcu_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + flow_free(flow); +} + +/* Schedules 'flow' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void flow_deferred_free(struct sw_flow *flow) +{ + call_rcu(&flow->rcu, rcu_callback); +} + +/* Prints a representation of 'key' to the kernel log. */ +void print_flow(const struct sw_flow_key *key) +{ + printk("wild%04x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x" + "->%02x:%02x:%02x:%02x:%02x:%02x " + "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n", + key->wildcards, ntohs(key->in_port), ntohs(key->dl_vlan), + key->dl_src[0], key->dl_src[1], key->dl_src[2], + key->dl_src[3], key->dl_src[4], key->dl_src[5], + key->dl_dst[0], key->dl_dst[1], key->dl_dst[2], + key->dl_dst[3], key->dl_dst[4], key->dl_dst[5], + ntohs(key->dl_type), + ((unsigned char *)&key->nw_src)[0], + ((unsigned char *)&key->nw_src)[1], + ((unsigned char *)&key->nw_src)[2], + ((unsigned char *)&key->nw_src)[3], + ((unsigned char *)&key->nw_dst)[0], + ((unsigned char *)&key->nw_dst)[1], + ((unsigned char *)&key->nw_dst)[2], + ((unsigned char *)&key->nw_dst)[3], + ntohs(key->tp_src), ntohs(key->tp_dst)); +} + +uint32_t hash_in6(const struct in6_addr *in) +{ + return (in->s6_addr32[0] ^ in->s6_addr32[1] + ^ in->s6_addr32[2] ^ in->s6_addr32[3]); +} + +// with inspiration from linux/if_arp.h +struct arp_eth_hdr { + uint16_t ar_hrd; /* format of hardware address */ + uint16_t ar_pro; /* format of protocol address */ + uint8_t ar_hln; /* length of hardware address */ + uint8_t ar_pln; /* length of protocol address */ + uint16_t ar_op; /* ARP opcode (command) */ + + uint8_t ar_sha[ETH_ALEN]; /* source hardware addr */ + uint32_t ar_sip; /* source protocol addr */ + uint8_t ar_tha[ETH_ALEN]; /* dest hardware addr */ + uint32_t ar_tip; /* dest protocol addr */ +} __attribute__((packed)); + +/* Parses the Ethernet frame in 'skb', which was received on 'in_port', + * and initializes 'key' to match. */ +void flow_extract(struct sk_buff *skb, uint16_t in_port, + struct sw_flow_key *key) +{ + struct ethhdr *mac; + struct udphdr *th; + int nh_ofs, th_ofs; + + key->in_port = htons(in_port); + key->wildcards = 0; + memset(key->pad, '\0', sizeof(key->pad)); + + /* This code doesn't check that skb->len is long enough to contain the + * MAC or network header. With a 46-byte minimum length frame this + * assumption is always correct. */ + + /* Doesn't verify checksums. Should it? */ + + /* Data link layer. We only support Ethernet. */ + mac = eth_hdr(skb); + nh_ofs = sizeof(struct ethhdr); + if (likely(ntohs(mac->h_proto) >= OFP_DL_TYPE_ETH2_CUTOFF)) { + /* This is an Ethernet II frame */ + key->dl_type = mac->h_proto; + } else { + /* This is an 802.2 frame */ + if (snap_get_ethertype(skb, &key->dl_type) != -EINVAL) { + nh_ofs += sizeof(struct snap_hdr); + } else { + key->dl_type = OFP_DL_TYPE_NOT_ETH_TYPE; + nh_ofs += sizeof(struct llc_pdu_un); + } + } + + /* Check for a VLAN tag */ + if (likely(key->dl_type != __constant_htons(ETH_P_8021Q))) { + key->dl_vlan = __constant_htons(OFP_VLAN_NONE); + } else { + struct vlan_hdr *vh = (struct vlan_hdr *)(skb_mac_header(skb) + nh_ofs); + key->dl_type = vh->h_vlan_encapsulated_proto; + key->dl_vlan = vh->h_vlan_TCI & __constant_htons(VLAN_VID_MASK); + nh_ofs += sizeof(*vh); + } + memcpy(key->dl_src, mac->h_source, ETH_ALEN); + memcpy(key->dl_dst, mac->h_dest, ETH_ALEN); + skb_set_network_header(skb, nh_ofs); + + /* Network layer. */ + if (likely(key->dl_type == htons(ETH_P_IP))) { + struct iphdr *nh = ip_hdr(skb); + key->nw_src = nh->saddr; + key->nw_dst = nh->daddr; + key->nw_proto = nh->protocol; + th_ofs = nh_ofs + nh->ihl * 4; + skb_set_transport_header(skb, th_ofs); + + /* Transport layer. */ + if ((key->nw_proto != IPPROTO_TCP && key->nw_proto != IPPROTO_UDP) + || skb->len < th_ofs + sizeof(struct udphdr)) { + goto no_th; + } + th = udp_hdr(skb); + key->tp_src = th->source; + key->tp_dst = th->dest; + + return; + } else if (key->dl_type == htons(ETH_P_IPV6)) { + struct ipv6hdr *nh = ipv6_hdr(skb); + key->nw_src = hash_in6(&nh->saddr); + key->nw_dst = hash_in6(&nh->daddr); + /* FIXME: Need to traverse next-headers until we find the + * upper-layer header. */ + key->nw_proto = 0; + goto no_th; + } else if (key->dl_type == htons(ETH_P_ARP)) { + /* just barely within 46-byte minimum packet */ + struct arp_eth_hdr *ah = (struct arp_eth_hdr *)skb_network_header(skb); + if (ah->ar_hrd == htons(ARPHRD_ETHER) + && ah->ar_pro == htons(ETH_P_IP) + && ah->ar_hln == ETH_ALEN + && ah->ar_pln == sizeof(key->nw_src)) + { + /* check if sha/tha match dl_src/dl_dst? */ + key->nw_src = ah->ar_sip; + key->nw_dst = ah->ar_tip; + key->nw_proto = 0; + goto no_th; + } + } else { + /* Fall through. */ + } + + key->nw_src = 0; + key->nw_dst = 0; + key->nw_proto = 0; + +no_th: + key->tp_src = 0; + key->tp_dst = 0; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int flow_init(void) +{ + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} + diff --git a/datapath/flow.h b/datapath/flow.h new file mode 100644 index 00000000..5faeaf9d --- /dev/null +++ b/datapath/flow.h @@ -0,0 +1,131 @@ +#ifndef FLOW_H +#define FLOW_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow.h" + +struct sk_buff; +struct ofp_flow_mod; + +/* Identification data for a flow. + Network byte order except for the "wildcards" field. + In decreasing order by size, so that sw_flow_key structures can + be hashed or compared bytewise. + It might be useful to reorder members from (expected) greatest to least + inter-flow variability, so that failing bytewise comparisons with memcmp + terminate as quickly as possible on average. */ +struct sw_flow_key { + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint16_t in_port; /* Input switch port */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ + uint16_t wildcards; /* Wildcard fields (host byte order). */ + uint8_t dl_src[6]; /* Ethernet source address. */ + uint8_t dl_dst[6]; /* Ethernet destination address. */ + uint8_t nw_proto; /* IP protocol. */ + uint8_t pad[3]; /* NB: Pad to make 32-bit aligned */ +}; + +/* We need to manually make sure that the structure is 32-bit aligned, + * since we don't want garbage values in compiler-generated pads from + * messing up hash matches. + */ +static inline void check_key_align(void) +{ + BUILD_BUG_ON(sizeof(struct sw_flow_key) != 36); +} + +/* Maximum number of actions in a single flow entry. */ +#define MAX_ACTIONS 16 + +/* Locking: + * + * - Readers must take rcu_read_lock and hold it the entire time that the flow + * must continue to exist. Readers need not take delete_lock. They *may* + * examine 'deleted' *if* it is important not to read stale data. + * + * - Deleters must take rcu_read_lock and call flow_del to verify that another + * thread has not already deleted the flow. If not, do a deferred free of + * the flow with call_rcu, then rcu_assign_pointer or [h]list_del_rcu the + * flow. + * + * - In-place update not yet contemplated. + */ +struct sw_flow { + struct sw_flow_key key; + + uint32_t group_id; /* Flow group ID (for QoS). */ + uint16_t max_idle; /* Idle time before discarding (seconds). */ + unsigned long timeout; /* Expiration time (in jiffies). */ + + /* FIXME? Probably most flows have only a single action. */ + unsigned int n_actions; + struct ofp_action *actions; + + /* For use by table implementation. */ + union { + struct list_head node; + struct hlist_node hnode; + } u; + + spinlock_t lock; /* Lock this entry...mostly for stat updates */ + unsigned long init_time; /* When the flow was created (in jiffies). */ + uint64_t packet_count; /* Number of packets associated with this entry */ + uint64_t byte_count; /* Number of bytes associated with this entry */ + + atomic_t deleted; /* 0 if not deleted, 1 if deleted. */ + struct rcu_head rcu; +}; + +int flow_matches(const struct sw_flow_key *, const struct sw_flow_key *); +int flow_del_matches(const struct sw_flow_key *, const struct sw_flow_key *, + int); +struct sw_flow *flow_alloc(int n_actions, gfp_t flags); +void flow_free(struct sw_flow *); +void flow_deferred_free(struct sw_flow *); +void flow_extract(struct sk_buff *, uint16_t in_port, struct sw_flow_key *); +int flow_del(struct sw_flow *); +void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from); +void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from); + +void print_flow(const struct sw_flow_key *); + +#include +static inline int flow_timeout(struct sw_flow *flow) +{ + if (flow->max_idle == OFP_FLOW_PERMANENT) + return 0; + + return time_after(jiffies, flow->timeout); +} + +static inline void flow_used(struct sw_flow *flow, struct sk_buff *skb) +{ + unsigned long flags; + + if (flow->max_idle != OFP_FLOW_PERMANENT) + flow->timeout = jiffies + HZ * flow->max_idle; + + spin_lock_irqsave(&flow->lock, flags); + flow->packet_count++; + flow->byte_count += skb->len; + spin_unlock_irqrestore(&flow->lock, flags); +} + +extern struct kmem_cache *flow_cache; + +int flow_init(void); +void flow_exit(void); + +#endif /* flow.h */ diff --git a/datapath/forward.c b/datapath/forward.c new file mode 100644 index 00000000..37dcd689 --- /dev/null +++ b/datapath/forward.c @@ -0,0 +1,585 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "forward.h" +#include "datapath.h" +#include "chain.h" +#include "flow.h" + +/* FIXME: do we need to use GFP_ATOMIC everywhere here? */ + +static void execute_actions(struct datapath *, struct sk_buff *, + const struct sw_flow_key *, + const struct ofp_action *, int n_actions); +static int make_writable(struct sk_buff **); + +static struct sk_buff *retrieve_skb(uint32_t id); +static void discard_skb(uint32_t id); + +/* 'skb' was received on 'in_port', a physical switch port between 0 and + * OFPP_MAX. Process it according to 'chain'. */ +void fwd_port_input(struct sw_chain *chain, struct sk_buff *skb, int in_port) +{ + struct sw_flow_key key; + struct sw_flow *flow; + + flow_extract(skb, in_port, &key); + flow = chain_lookup(chain, &key); + if (likely(flow != NULL)) { + flow_used(flow, skb); + execute_actions(chain->dp, skb, &key, + flow->actions, flow->n_actions); + } else { + dp_output_control(chain->dp, skb, fwd_save_skb(skb), + chain->dp->miss_send_len, OFPR_NO_MATCH); + } +} + +static int do_output(struct datapath *dp, struct sk_buff *skb, size_t max_len, + int out_port) +{ + if (!skb) + return -ENOMEM; + return (likely(out_port != OFPP_CONTROLLER) + ? dp_output_port(dp, skb, out_port) + : dp_output_control(dp, skb, fwd_save_skb(skb), + max_len, OFPR_ACTION)); +} + +static void execute_actions(struct datapath *dp, struct sk_buff *skb, + const struct sw_flow_key *key, + const struct ofp_action *actions, int n_actions) +{ + /* Every output action needs a separate clone of 'skb', but the common + * case is just a single output action, so that doing a clone and + * then freeing the original skbuff is wasteful. So the following code + * is slightly obscure just to avoid that. */ + int prev_port; + size_t max_len=0; /* Initialze to make compiler happy */ + uint16_t eth_proto; + int i; + + prev_port = -1; + eth_proto = ntohs(key->dl_type); + + for (i = 0; i < n_actions; i++) { + const struct ofp_action *a = &actions[i]; + + if (prev_port != -1) { + do_output(dp, skb_clone(skb, GFP_ATOMIC), + max_len, prev_port); + prev_port = -1; + } + + if (likely(a->type == ntohs(OFPAT_OUTPUT))) { + prev_port = ntohs(a->arg.output.port); + max_len = ntohs(a->arg.output.max_len); + } else { + if (!make_writable(&skb)) { + printk("make_writable failed\n"); + break; + } + skb = execute_setter(skb, eth_proto, key, a); + } + } + if (prev_port != -1) + do_output(dp, skb, max_len, prev_port); + else + kfree_skb(skb); +} + +/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field + * covered by the sum has been changed from 'from' to 'to'. If set, + * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header. + * Based on nf_proto_csum_replace4. */ +static void update_csum(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, int pseudohdr) +{ + __be32 diff[] = { ~from, to }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + csum_unfold(*sum))); +} + +static void modify_nh(struct sk_buff *skb, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a) +{ + if (eth_proto == ETH_P_IP) { + struct iphdr *nh = ip_hdr(skb); + uint32_t new, *field; + + new = a->arg.nw_addr; + + if (a->type == OFPAT_SET_NW_SRC) + field = &nh->saddr; + else + field = &nh->daddr; + + if (nw_proto == IPPROTO_TCP) { + struct tcphdr *th = tcp_hdr(skb); + update_csum(&th->check, skb, *field, new, 1); + } else if (nw_proto == IPPROTO_UDP) { + struct udphdr *th = udp_hdr(skb); + update_csum(&th->check, skb, *field, new, 1); + } + update_csum(&nh->check, skb, *field, new, 0); + *field = new; + } +} + +static void modify_th(struct sk_buff *skb, uint16_t eth_proto, + uint8_t nw_proto, const struct ofp_action *a) +{ + if (eth_proto == ETH_P_IP) { + uint16_t new, *field; + + new = a->arg.tp; + + if (nw_proto == IPPROTO_TCP) { + struct tcphdr *th = tcp_hdr(skb); + + if (a->type == OFPAT_SET_TP_SRC) + field = &th->source; + else + field = &th->dest; + + update_csum(&th->check, skb, *field, new, 1); + *field = new; + } else if (nw_proto == IPPROTO_UDP) { + struct udphdr *th = udp_hdr(skb); + + if (a->type == OFPAT_SET_TP_SRC) + field = &th->source; + else + field = &th->dest; + + update_csum(&th->check, skb, *field, new, 1); + *field = new; + } + } +} + +static struct sk_buff *vlan_pull_tag(struct sk_buff *skb) +{ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + struct ethhdr *eh; + + + /* Verify we were given a vlan packet */ + if (vh->h_vlan_proto != __constant_htons(ETH_P_8021Q)) + return skb; + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); + + eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); + + skb->protocol = eh->h_proto; + skb->mac_header += VLAN_HLEN; + + return skb; +} + +static struct sk_buff *modify_vlan(struct sk_buff *skb, + const struct sw_flow_key *key, const struct ofp_action *a) +{ + uint16_t new_id = a->arg.vlan_id; + + if (new_id != OFP_VLAN_NONE) { + if (key->dl_vlan != __constant_htons(OFP_VLAN_NONE)) { + /* Modify vlan id, but maintain other TCI values */ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + vh->h_vlan_TCI = (vh->h_vlan_TCI + & ~(__constant_htons(VLAN_VID_MASK))) | htons(new_id); + } else { + /* Add vlan header */ + skb = vlan_put_tag(skb, new_id); + } + } else { + /* Remove an existing vlan header if it exists */ + vlan_pull_tag(skb); + } + + return skb; +} + +struct sk_buff *execute_setter(struct sk_buff *skb, uint16_t eth_proto, + const struct sw_flow_key *key, const struct ofp_action *a) +{ + switch (a->type) { + case OFPAT_SET_DL_VLAN: + skb = modify_vlan(skb, key, a); + break; + + case OFPAT_SET_DL_SRC: { + struct ethhdr *eh = eth_hdr(skb); + memcpy(eh->h_source, a->arg.dl_addr, sizeof eh->h_source); + break; + } + case OFPAT_SET_DL_DST: { + struct ethhdr *eh = eth_hdr(skb); + memcpy(eh->h_dest, a->arg.dl_addr, sizeof eh->h_dest); + break; + } + + case OFPAT_SET_NW_SRC: + case OFPAT_SET_NW_DST: + modify_nh(skb, eth_proto, key->nw_proto, a); + break; + + case OFPAT_SET_TP_SRC: + case OFPAT_SET_TP_DST: + modify_th(skb, eth_proto, key->nw_proto, a); + break; + + default: + BUG(); + } + + return skb; +} + +static int +recv_control_hello(struct sw_chain *chain, const void *msg) +{ + const struct ofp_control_hello *och = msg; + + printk("control_hello(version=%d)\n", ntohl(och->version)); + + if (ntohs(och->miss_send_len) != OFP_MISS_SEND_LEN_UNCHANGED) { + chain->dp->miss_send_len = ntohs(och->miss_send_len); + } + + chain->dp->hello_flags = ntohs(och->flags); + + dp_send_hello(chain->dp); + + return 0; +} + +static int +recv_packet_out(struct sw_chain *chain, const void *msg) +{ + const struct ofp_packet_out *opo = msg; + struct sk_buff *skb; + struct vlan_ethhdr *mac; + int nh_ofs; + + if (ntohl(opo->buffer_id) == (uint32_t) -1) { + int data_len = ntohs(opo->header.length) - sizeof *opo; + + /* FIXME: there is likely a way to reuse the data in msg. */ + skb = alloc_skb(data_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* FIXME? We don't reserve NET_IP_ALIGN or NET_SKB_PAD since + * we're just transmitting this raw without examining anything + * at those layers. */ + memcpy(skb_put(skb, data_len), opo->u.data, data_len); + dp_set_origin(chain->dp, ntohs(opo->in_port), skb); + + skb_set_mac_header(skb, 0); + mac = vlan_eth_hdr(skb); + if (likely(mac->h_vlan_proto != htons(ETH_P_8021Q))) + nh_ofs = sizeof(struct ethhdr); + else + nh_ofs = sizeof(struct vlan_ethhdr); + skb_set_network_header(skb, nh_ofs); + + dp_output_port(chain->dp, skb, ntohs(opo->out_port)); + } else { + struct sw_flow_key key; + int n_acts; + + skb = retrieve_skb(ntohl(opo->buffer_id)); + if (!skb) + return -ESRCH; + dp_set_origin(chain->dp, ntohs(opo->in_port), skb); + + n_acts = (ntohs(opo->header.length) - sizeof *opo) + / sizeof *opo->u.actions; + flow_extract(skb, ntohs(opo->in_port), &key); + execute_actions(chain->dp, skb, &key, opo->u.actions, n_acts); + } + return 0; +} + +static int +recv_port_mod(struct sw_chain *chain, const void *msg) +{ + const struct ofp_port_mod *opm = msg; + + dp_update_port_flags(chain->dp, &opm->desc); + + return 0; +} + +static int +add_flow(struct sw_chain *chain, const struct ofp_flow_mod *ofm) +{ + int error = -ENOMEM; + int n_acts; + struct sw_flow *flow; + + + /* Check number of actions. */ + n_acts = (ntohs(ofm->header.length) - sizeof *ofm) / sizeof *ofm->actions; + if (n_acts > MAX_ACTIONS) { + error = -E2BIG; + goto error; + } + + /* Allocate memory. */ + flow = flow_alloc(n_acts, GFP_ATOMIC); + if (flow == NULL) + goto error; + + /* Fill out flow. */ + flow_extract_match(&flow->key, &ofm->match); + flow->group_id = ntohl(ofm->group_id); + flow->max_idle = ntohs(ofm->max_idle); + flow->timeout = jiffies + flow->max_idle * HZ; + flow->n_actions = n_acts; + flow->init_time = jiffies; + flow->byte_count = 0; + flow->packet_count = 0; + atomic_set(&flow->deleted, 0); + spin_lock_init(&flow->lock); + memcpy(flow->actions, ofm->actions, n_acts * sizeof *flow->actions); + + /* Act. */ + error = chain_insert(chain, flow); + if (error) + goto error_free_flow; + error = 0; + if (ntohl(ofm->buffer_id) != (uint32_t) -1) { + struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id)); + if (skb) { + struct sw_flow_key key; + flow_used(flow, skb); + flow_extract(skb, ntohs(ofm->match.in_port), &key); + execute_actions(chain->dp, skb, &key, + ofm->actions, n_acts); + } + else + error = -ESRCH; + } + return error; + +error_free_flow: + flow_free(flow); +error: + if (ntohl(ofm->buffer_id) != (uint32_t) -1) + discard_skb(ntohl(ofm->buffer_id)); + return error; +} + +static int +recv_flow(struct sw_chain *chain, const void *msg) +{ + const struct ofp_flow_mod *ofm = msg; + uint16_t command = ntohs(ofm->command); + + if (command == OFPFC_ADD) { + return add_flow(chain, ofm); + } else if (command == OFPFC_DELETE) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(chain, &key, 0) ? 0 : -ESRCH; + } else if (command == OFPFC_DELETE_STRICT) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(chain, &key, 1) ? 0 : -ESRCH; + } else { + return -ENOTSUPP; + } +} + +/* 'msg', which is 'length' bytes long, was received from the control path. + * Apply it to 'chain'. */ +int +fwd_control_input(struct sw_chain *chain, const void *msg, size_t length) +{ + + struct openflow_packet { + size_t min_size; + int (*handler)(struct sw_chain *, const void *); + }; + + static const struct openflow_packet packets[] = { + [OFPT_CONTROL_HELLO] = { + sizeof (struct ofp_control_hello), + recv_control_hello, + }, + [OFPT_PACKET_OUT] = { + sizeof (struct ofp_packet_out), + recv_packet_out, + }, + [OFPT_FLOW_MOD] = { + sizeof (struct ofp_flow_mod), + recv_flow, + }, + [OFPT_PORT_MOD] = { + sizeof (struct ofp_port_mod), + recv_port_mod, + }, + }; + + const struct openflow_packet *pkt; + struct ofp_header *oh; + + if (length < sizeof(struct ofp_header)) + return -EINVAL; + + oh = (struct ofp_header *) msg; + if (oh->version != 1 || oh->type >= ARRAY_SIZE(packets) + || ntohs(oh->length) > length) + return -EINVAL; + + pkt = &packets[oh->type]; + if (!pkt->handler) + return -ENOSYS; + if (length < pkt->min_size) + return -EFAULT; + + return pkt->handler(chain, msg); +} + +/* Packet buffering. */ + +#define OVERWRITE_SECS 1 +#define OVERWRITE_JIFFIES (OVERWRITE_SECS * HZ) + +struct packet_buffer { + struct sk_buff *skb; + uint32_t cookie; + unsigned long exp_jiffies; +}; + +static struct packet_buffer buffers[N_PKT_BUFFERS]; +static unsigned int buffer_idx; +static DEFINE_SPINLOCK(buffer_lock); + +uint32_t fwd_save_skb(struct sk_buff *skb) +{ + struct packet_buffer *p; + unsigned long int flags; + uint32_t id; + + spin_lock_irqsave(&buffer_lock, flags); + buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; + p = &buffers[buffer_idx]; + if (p->skb) { + /* Don't buffer packet if existing entry is less than + * OVERWRITE_SECS old. */ + if (time_before(jiffies, p->exp_jiffies)) { + spin_unlock_irqrestore(&buffer_lock, flags); + return -1; + } else + kfree_skb(p->skb); + } + /* Don't use maximum cookie value since the all-bits-1 id is + * special. */ + if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) + p->cookie = 0; + skb_get(skb); + p->skb = skb; + p->exp_jiffies = jiffies + OVERWRITE_JIFFIES; + id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); + spin_unlock_irqrestore(&buffer_lock, flags); + + return id; +} + +static struct sk_buff *retrieve_skb(uint32_t id) +{ + unsigned long int flags; + struct sk_buff *skb = NULL; + struct packet_buffer *p; + + spin_lock_irqsave(&buffer_lock, flags); + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + skb = p->skb; + p->skb = NULL; + } else { + printk("cookie mismatch: %x != %x\n", + id >> PKT_BUFFER_BITS, p->cookie); + } + spin_unlock_irqrestore(&buffer_lock, flags); + + return skb; +} + +static void discard_skb(uint32_t id) +{ + unsigned long int flags; + struct packet_buffer *p; + + spin_lock_irqsave(&buffer_lock, flags); + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + kfree_skb(p->skb); + p->skb = NULL; + } + spin_unlock_irqrestore(&buffer_lock, flags); +} + +void fwd_exit(void) +{ + int i; + + for (i = 0; i < N_PKT_BUFFERS; i++) + kfree_skb(buffers[i].skb); +} + +/* Utility functions. */ + +/* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to + * the copy. + * Returns 1 if successful, 0 on failure. */ +static int +make_writable(struct sk_buff **pskb) +{ + /* Based on skb_make_writable() in net/netfilter/core.c. */ + struct sk_buff *nskb; + + /* Not exclusive use of packet? Must copy. */ + if (skb_shared(*pskb) || skb_cloned(*pskb)) + goto copy_skb; + + return pskb_may_pull(*pskb, 64); /* FIXME? */ + +copy_skb: + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return 0; + BUG_ON(skb_is_nonlinear(nskb)); + + /* Rest of kernel will get very unhappy if we pass it a + suddenly-orphaned skbuff */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + return 1; +} diff --git a/datapath/forward.h b/datapath/forward.h new file mode 100644 index 00000000..8e92330f --- /dev/null +++ b/datapath/forward.h @@ -0,0 +1,33 @@ +#ifndef FORWARD_H +#define FORWARD_H 1 + +#include +#include "flow.h" + +struct sk_buff; +struct sw_chain; +struct ofp_action; + +/* Buffers are identified to userspace by a 31-bit opaque ID. We divide the ID + * into a buffer number (low bits) and a cookie (high bits). The buffer number + * is an index into an array of buffers. The cookie distinguishes between + * different packets that have occupied a single buffer. Thus, the more + * buffers we have, the lower-quality the cookie... */ +#define PKT_BUFFER_BITS 8 +#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) +#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) + +#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) + + +void fwd_port_input(struct sw_chain *, struct sk_buff *, int in_port); +int fwd_control_input(struct sw_chain *, const void *, size_t); + +uint32_t fwd_save_skb(struct sk_buff *skb); + +void fwd_exit(void); + +struct sk_buff *execute_setter(struct sk_buff *, uint16_t, + const struct sw_flow_key *, const struct ofp_action *); + +#endif /* forward.h */ diff --git a/datapath/forward_t.c b/datapath/forward_t.c new file mode 100644 index 00000000..ef284621 --- /dev/null +++ b/datapath/forward_t.c @@ -0,0 +1,581 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "forward.h" +#include "tests/forward_t.h" +#include "openflow.h" +#include "unit.h" +#include "flow.h" + +/* + * Tests execute_settings() in forward.c to check that actions are + * appropriately taken on packets, meaning: + * + * 1. Checksums are correct. + * 2. Actions are only taken on compatible packets (IP action not taken on + * non-IP packet) + * 3. Other packet data remains untouched. + + * forward_t.h contains static packet definitions. forward_t.h should be + * generated using gen_forward_t.c. This test is run on whatever packets are + * defined in forward_t.h. + * + * NOTE: Tests assume packets in forward_t.h are present in full and IP and + * transport checksums are correct. (Can prevent offloading of checksum + * computation using ethtool. + */ + +/* + * Sets 'a->data'. If 'key' != NULL, sets 'data' to equal 'key's value for type + * specified by 'a->type'. If 'key' == NULL, sets data to a random value. + */ + +static void +set_action_data(struct sk_buff *skb, struct sw_flow_key *key, struct ofp_action *a) +{ + if (key != NULL) { + switch(a->type) { + case(OFPAT_SET_DL_SRC): + memcpy(a->arg.dl_addr, key->dl_src, sizeof key->dl_src); + break; + case(OFPAT_SET_DL_DST): + memcpy(a->arg.dl_addr, key->dl_dst, sizeof key->dl_dst); + break; + case(OFPAT_SET_NW_SRC): + if (key->dl_type == htons(ETH_P_IP)) + a->arg.nw_addr = key->nw_src; + else + a->arg.nw_addr = random32(); + break; + case(OFPAT_SET_NW_DST): + if (key->dl_type == htons(ETH_P_IP)) + a->arg.nw_addr = key->nw_dst; + else + a->arg.nw_addr = random32(); + break; + case(OFPAT_SET_TP_SRC): + if (key->nw_proto == IPPROTO_TCP || key->nw_proto == IPPROTO_UDP) + a->arg.tp = key->tp_src; + else + a->arg.tp = (uint16_t) random32(); + break; + case(OFPAT_SET_TP_DST): + if (key->nw_proto == IPPROTO_TCP || key->nw_proto == IPPROTO_UDP) + a->arg.tp = key->tp_dst; + else + a->arg.tp = (uint16_t) random32(); + break; + default: + BUG(); + } + } else { + ((uint32_t*)a->arg.dl_addr)[0] = random32(); + ((uint16_t*)a->arg.dl_addr)[2] = random32(); + } +} + + +/* + * Checks the IP sum of an IP packet. Returns 0 if correct, else -1. + */ + +static void +check_IP_csum(struct iphdr *ih) +{ + uint16_t check, *data; + uint32_t n_bytes, sum; + + check = ih->check; + ih->check = 0; + data = (uint16_t*) ih; + sum = 0; + n_bytes = ih->ihl * 4; + + while (n_bytes > 1) { + sum += ntohs(*data); + sum = (sum >> 16) + (uint16_t)sum; + data++; + n_bytes -= 2; + } + + if (n_bytes == 1) { + sum += *(uint8_t*)data; + sum = (sum >> 16) + (uint16_t)sum; + } + + ih->check = htons((uint16_t)(~sum)); + if (ih->check != check) { + unit_fail("IP checksum %hu does not match %hu", + ntohs(ih->check), ntohs(check)); + } +} + +/* + * Partially computes TCP checksum over 'n_bytes' pointed to by 'data'. Can be + * called multiple times if data csum is to be computed on is fragmented. If + * 'is_last' == 0, assumes will be called again on more data and returns the + * value that should be passed in as 'incr_sum' on the next call. Else if + * 'is_last' == 1, returns the final checksum. On the first call, 'incr_sum' + * should equal 0. If 'is_last' == 0, 'n_bytes' must be even. i.e. Should + * first be called on pseudo header fields that are multiples of two, and then + * on the TCP packet. + */ +static uint32_t +compute_transport_checksum(uint16_t *data, uint32_t n_bytes, + uint32_t incr_sum, uint8_t is_last) +{ + uint8_t arr[2]; + + if (n_bytes % 2 != 0 && is_last == 0) + BUG(); + + while (n_bytes > 1) { + incr_sum += ntohs(*data); + incr_sum = (incr_sum >> 16) + (uint16_t)incr_sum; + data++; + n_bytes -= 2; + } + + if (is_last == 0) + return incr_sum; + + if(n_bytes == 1) { + arr[0] = *(uint8_t*)data; + arr[1] = 0; + incr_sum += ntohs(*((uint16_t*)arr)); + incr_sum = (incr_sum >> 16) + (uint16_t)incr_sum; + } + + return ~incr_sum; +} + +/* + * Checks the transport layer's checksum of a packet. Returns '0' if correct, + * else '1'. 'ih' should point to the IP header of the packet, if TCP, 'th' + * should point the TCP header, and if UDP, 'uh' should point to the UDP + * header. + */ +static int +check_transport_csum(struct iphdr *ih, struct tcphdr *th, + struct udphdr *uh) +{ + uint32_t tmp; + uint16_t len, check; + uint8_t arr[2]; + + tmp = compute_transport_checksum((uint16_t*)(&ih->saddr), + 2 * sizeof ih->saddr, 0, 0); + arr[0] = 0; + arr[1] = ih->protocol; + tmp = compute_transport_checksum((uint16_t*)arr, 2, tmp, 0); + len = ntohs(ih->tot_len) - (ih->ihl * 4); + *((uint16_t*)arr) = htons(len); + tmp = compute_transport_checksum((uint16_t*)arr, 2, tmp, 0); + + if (th != NULL) { + check = th->check; + th->check = 0; + th->check = htons((uint16_t)compute_transport_checksum((uint16_t*)th, + len, tmp, 1)); + if (th->check != check) { + unit_fail("TCP checksum %hu does not match %hu", + ntohs(th->check), ntohs(check)); + return -1; + } + } else if (uh != NULL) { + check = uh->check; + uh->check = 0; + uh->check = htons((uint16_t)compute_transport_checksum((uint16_t*)uh, + len, tmp, 1)); + if (uh->check != check) { + unit_fail("UDP checksum %hu does not match %hu", + ntohs(uh->check), ntohs(check)); + return -1; + } + } + + return 0; +} + + +/* + * Compares 'pkt_len' bytes of 'data' to 'pkt'. excl_start and excl_end point + * together delineate areas of 'data' that are not supposed to match 'pkt'. + * 'num_excl' specify how many such areas exist. An 'excl_start' entry is + * ignored if it equals NULL. See 'check_packet()' for usage. + */ + +static void +compare(uint8_t *data, uint8_t *pkt, uint32_t pkt_len, + uint8_t **excl_start, uint8_t **excl_end, uint32_t num_excl) +{ + uint32_t i; + uint8_t *d, *p, *end; + int ret; + + end = data + pkt_len; + d = data; + p = pkt; + ret = 0; + + for (i = 0; i < num_excl; i++) { + if(*excl_start != NULL) { + if ((ret = memcmp(d, p, *excl_start - d)) != 0) + break; + p += (*excl_end - d); + d = *excl_end; + } + excl_start++; + excl_end++; + } + + if (ret == 0) + ret = memcmp(d, p, end - d); + + if (ret != 0) { + unit_fail("skb and packet comparison failed:"); + for (i = 0; i < pkt_len; i++) { + if (data[i] != pkt[i]) { + unit_fail("skb[%u] = 0x%x != 0x%x", + i, data[i], pkt[i]); + } + } + } +} + + +/* + * Checks that a packet's data has remained consistent after an action has been + * applied. 'skb' is the modified packet, 'a' is the action that was taken on + * the packet, 'p' is a copy of the packet's data before action 'a' was taken. + * Checks that the action was in fact taken, that the checksums of the packet + * are correct, and that no other data in the packet was altered. + */ + +static void +check_packet(struct sk_buff *skb, struct ofp_action *a, struct pkt *p) +{ + struct ethhdr *eh; + struct iphdr *ih; + struct tcphdr *th; + struct udphdr *uh; + uint8_t *excl_start[5], *excl_end[5]; + + eh = eth_hdr(skb); + ih = NULL; + th = NULL; + uh = NULL; + + memset(excl_start, 0, sizeof excl_start); + memset(excl_end, 0, sizeof excl_end); + + if (eh->h_proto == htons(ETH_P_IP)) { + ih = ip_hdr(skb); + excl_start[1] = (uint8_t*)&ih->check; + excl_end[1] = (uint8_t*)(&ih->check + 1); + if (ih->protocol == IPPROTO_TCP) { + th = tcp_hdr(skb); + excl_start[4] = (uint8_t*)&th->check; + excl_end[4] = (uint8_t*)(&th->check + 1); + } else if (ih->protocol == IPPROTO_UDP) { + uh = udp_hdr(skb); + excl_start[4] = (uint8_t*)&uh->check; + excl_end[4] = (uint8_t*)(&uh->check + 1); + } + } + + if (a != NULL) { + switch(a->type) { + case(OFPAT_SET_DL_SRC): + if (memcmp(a->arg.dl_addr, eh->h_source, sizeof eh->h_source) != 0) { + unit_fail("Source eth addr has not been set"); + return; + } + excl_start[0] = (uint8_t*)(&eh->h_source); + excl_end[0] = (uint8_t*)(&eh->h_proto); + break; + case(OFPAT_SET_DL_DST): + if (memcmp(a->arg.dl_addr, eh->h_dest, sizeof eh->h_dest) != 0) { + unit_fail("Dest eth addr has not been set"); + return; + } + excl_start[0] = (uint8_t*)(&eh->h_dest); + excl_end[0] = (uint8_t*)(&eh->h_source); + break; + case(OFPAT_SET_NW_SRC): + if (ih != NULL) { + if (a->arg.nw_addr != ih->saddr) { + unit_fail("Source IP addr has not been set"); + return; + } + excl_start[2] = (uint8_t*)(&ih->saddr); + excl_end[2] = (uint8_t*)(&ih->saddr + 1); + } + break; + case(OFPAT_SET_NW_DST): + if (ih != NULL) { + if (a->arg.nw_addr != ih->daddr) { + unit_fail("Dest IP addr has not been set"); + return; + } + excl_start[2] = (uint8_t*)(&ih->daddr); + excl_end[2] = (uint8_t*)(&ih->daddr + 1); + } + break; + case(OFPAT_SET_TP_SRC): + if (th != NULL) { + if (a->arg.tp != th->source) { + unit_fail("Source port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&th->source); + excl_end[3] = (uint8_t*)(&th->source + 1); + } else if (uh != NULL) { + if (a->arg.tp != uh->source) { + unit_fail("Source port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&uh->source); + excl_end[3] = (uint8_t*)(&uh->source + 1); + } + break; + case(OFPAT_SET_TP_DST): + if (th != NULL) { + if (a->arg.tp != th->dest) { + unit_fail("Dest port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&th->dest); + excl_end[3] = (uint8_t*)(&th->dest + 1); + } else if (uh != NULL) { + if (a->arg.tp != uh->dest) { + unit_fail("Dest port has not been set"); + return; + } + excl_start[3] = (uint8_t*)(&uh->dest); + excl_end[3] = (uint8_t*)(&uh->dest + 1); + } + break; + default: + BUG(); + } + } + + compare(skb->data, p->data, p->len, excl_start, excl_end, 5); + if (unit_failed()) + return; + + if (ih == NULL) + return; + + check_IP_csum(ih); + if (unit_failed()) + return; + + if (th == NULL && uh == NULL) + return; + + check_transport_csum(ih, th, uh); +} + +/* + * Layers 3 & 4 Tests: Given packets in forward_t.h, executes all actions + * with random data, checking for consistency described in check_packet(). + */ + +void +test_l3_l4(void) +{ + struct ofp_action action; + struct sk_buff *skb; + struct sw_flow_key key; + unsigned int i, j; + uint16_t eth_proto; + int ret = 0; + + for (i = 0; i < num_packets; i++) { + skb = alloc_skb(packets[i].len, GFP_KERNEL); + if (!skb) { + unit_fail("Couldn't allocate %uth skb", i); + return; + } + + memcpy(skb_put(skb, packets[i].len), packets[i].data, + packets[i].len); + + skb_set_mac_header(skb, 0); + flow_extract(skb, 0, &key); + eth_proto = ntohs(key.dl_type); + + check_packet(skb, NULL, packets+i); + if (unit_failed()) + return; + + for (action.type = OFPAT_SET_DL_SRC; + action.type <= OFPAT_SET_TP_DST; + action.type++) + { + set_action_data(skb, NULL, &action); + for(j = 0; j < 2; j++) { + skb = execute_setter(skb, eth_proto, &key, &action); + check_packet(skb, &action, packets+i); + if (unit_failed()) { + unit_fail("Packet %u inconsistent " + "after setter on action " + "type %d, iteration %u", + i, action.type, j); + return; + } + set_action_data(skb, &key, &action); + } + } + + kfree_skb(skb); + + if (ret != 0) + break; + } +} + +int +test_vlan(void) +{ + struct ofp_action action; + struct sk_buff *skb; + struct sw_flow_key key; + unsigned int i; + uint16_t eth_proto; + int ret = 0; + struct vlan_ethhdr *vh; + struct ethhdr *eh; + struct net_device dev; + uint16_t new_id, orig_id; + + + memset((char *)&dev, '\0', sizeof(dev)); + + printk("Testing vlan\n"); + for (i = 0; i < num_packets; i++) { + skb = alloc_skb(packets[i].len, GFP_KERNEL); + if (!skb) { + unit_fail("Couldn't allocate %uth skb", i); + return -ENOMEM; + } + + memcpy(skb_put(skb, packets[i].len), packets[i].data, + packets[i].len); + skb->dev = &dev; + + skb_set_mac_header(skb, 0); + flow_extract(skb, 0, &key); + eth_proto = ntohs(key.dl_type); + +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum unmodified", + i); + goto free_skb; + } +#endif + + eh = eth_hdr(skb); + orig_id = eh->h_proto; + + action.type = OFPAT_SET_DL_VLAN; + + // Add a random vlan tag + new_id = (uint16_t) random32() & VLAN_VID_MASK; + action.arg.vlan_id = new_id; + skb = execute_setter(skb, eth_proto, &key, &action); + vh = vlan_eth_hdr(skb); + if (ntohs(vh->h_vlan_TCI) != new_id) { + unit_fail("add: vlan id doesn't match: %#x != %#x", + ntohs(vh->h_vlan_TCI), new_id); + return -1; + } + flow_extract(skb, 0, &key); +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum after adding vlan", + i); + goto free_skb; + } +#endif + + // Modify the tag + new_id = (uint16_t) random32() & VLAN_VID_MASK; + action.arg.vlan_id = new_id; + skb = execute_setter(skb, eth_proto, &key, &action); + vh = vlan_eth_hdr(skb); + if (ntohs(vh->h_vlan_TCI) != new_id) { + unit_fail("mod: vlan id doesn't match: %#x != %#x", + ntohs(vh->h_vlan_TCI), new_id); + return -1; + } + flow_extract(skb, 0, &key); +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum after modifying vlan", + i); + goto free_skb; + } +#endif + + // Remove the tag + action.arg.vlan_id = OFP_VLAN_NONE; + skb = execute_setter(skb, eth_proto, &key, &action); + + eh = eth_hdr(skb); + + if (eh->h_proto != orig_id) { + unit_fail("del: vlan id doesn't match: %#x != %#x", + ntohs(eh->h_proto), ntohs(orig_id)); + return -1; + } +#if 0 + if ((ret = check_packet(skb, NULL, packets+i)) < 0) { + unit_fail("Packet %u has incorrect checksum after removing vlan", + i); + goto free_skb; + } + + free_skb: +#endif + + kfree_skb(skb); + + if (ret != 0) + break; + } + + if (ret == 0) + printk("\nVLAN actions test passed.\n"); + + return ret; + + +} + +/* + * Actual test: Given packets in forward_t.h, executes all actions with random + * data, checking for consistency described in check_packet(). + */ + +void +run_forward_t(void) +{ + test_vlan(); + test_l3_l4(); +} diff --git a/datapath/linux-2.4/.gitignore b/datapath/linux-2.4/.gitignore new file mode 100644 index 00000000..c7afe5b2 --- /dev/null +++ b/datapath/linux-2.4/.gitignore @@ -0,0 +1,19 @@ +/Makefile +/datapath.c +/chain.c +/crc32.c +/crc_t.c +/dp_dev.c +/flow.c +/forward.c +/forward_t.c +/datapath_t.c +/kernel-src.inc +/table-hash.c +/table-linear.c +/table-mac.c +/table_t.c +/unit-exports.c +/unit.c +/kernel-src.inc +/tmp diff --git a/datapath/linux-2.4/Makefile.in b/datapath/linux-2.4/Makefile.in new file mode 100644 index 00000000..8e4ba5ba --- /dev/null +++ b/datapath/linux-2.4/Makefile.in @@ -0,0 +1,100 @@ +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRC24@ +export KVERSION = 2.4 +export VMDIR = @VMDIR@ + +SHAREDFILES = ../chain.c ../crc32.c ../table-hash.c ../table-linear.c \ + ../table-mac.c ../forward.c ../flow.c ../unit-exports.c \ + ../dp_dev.c ../datapath_t.c + +SIMLINKFILES = $(patsubst ../%,%, $(SHAREDFILES)) + +CFILES = datapath.c $(SIMLINKFILES) + +# create local symlinks of shared files +%.c : ../%.c + ln -s $< $@ + +COMPAT24_CFILES = \ + compat-2.4/random32.c \ + compat-2.4/genetlink.c \ + compat-2.4/netlink.c \ + compat-2.4/attr.c \ + compat-2.4/rcupdate.c \ + compat-2.4/string.c \ + compat-2.4/kernel.c \ + compat-2.4/compat24.c + +UNIT_CFILES = \ + ../table_t.c \ + ../unit.c + +SHARED_T_FILES = \ + ../forward_t.c \ + ../table_t.c \ + ../crc_t.c \ + ../unit.c +UNIT_CFILES = $(patsubst ../%,%, $(SHARED_T_FILES)) + +kFILES = ../datapath.h \ + ../chain.h \ + ../crc32.h \ + ../flow.h \ + ../forward.h \ + ../table.h \ + ../datapath_t.h \ + compat-2.4/include/linux/rcupdate.h \ + compat-2.4/include/linux/jiffies.h \ + compat-2.4/include/linux/ipv6.h \ + compat-2.4/include/linux/timer.h \ + compat-2.4/include/linux/if_vlan.h \ + compat-2.4/include/linux/types.h \ + compat-2.4/include/linux/skbuff.h \ + compat-2.4/include/linux/etherdevice.h \ + compat-2.4/include/linux/tcp.h \ + compat-2.4/include/linux/genetlink.h \ + compat-2.4/include/linux/sockios.h \ + compat-2.4/include/linux/list.h \ + compat-2.4/include/linux/udp.h \ + compat-2.4/include/linux/slab.h \ + compat-2.4/include/linux/random.h \ + compat-2.4/include/linux/mutex.h \ + compat-2.4/include/linux/ip.h \ + compat-2.4/include/linux/string.h \ + compat-2.4/include/linux/netlink.h \ + compat-2.4/include/linux/compiler.h \ + compat-2.4/include/linux/kernel.h \ + compat-2.4/include/linux/if_ether.h \ + compat-2.4/include/net/checksum.h \ + compat-2.4/include/net/genetlink.h \ + compat-2.4/include/net/netlink.h \ + compat-2.4/include/asm/system.h \ + compat-2.4/compat24.h + +VERSION = @VERSION@ + +CFLAGS_EXTRA += -DVERSION=\"$(VERSION)\" +CFLAGS_EXTRA += -O2 -g +CFLAGS_EXTRA += -I $(srcdir)/.. -I $(top_srcdir)/include + +# Conditional include so that make dist doesn't puke when +# this build isn't turned on by automake +ifneq (,$(KSRC)) +include $(srcdir)/kbuild.inc +else +clean: +endif + +distclean : clean +distdir : clean +install : + + +PWD := $(shell pwd) + +all: default + + +check: all diff --git a/datapath/linux-2.4/README b/datapath/linux-2.4/README new file mode 100644 index 00000000..87f4cdef --- /dev/null +++ b/datapath/linux-2.4/README @@ -0,0 +1,6 @@ +Linux kernel 2.4 specific build: + + - Backports: compat-2.4/genetlink.c is a back-port of the + Linux 2.6 Generic Netlink functionality. It must be loaded + as module genetlink_mod.o before openflow_mod.o may be + loaded. diff --git a/datapath/linux-2.4/compat-2.4/TODO b/datapath/linux-2.4/compat-2.4/TODO new file mode 100644 index 00000000..c3e45611 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/TODO @@ -0,0 +1,2 @@ +* Properly port RCU to Linux 2.4. In particular we will need support + for call_rcu to properly do flow-table updating. diff --git a/datapath/linux-2.4/compat-2.4/attr.c b/datapath/linux-2.4/compat-2.4/attr.c new file mode 100644 index 00000000..a00841cf --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/attr.c @@ -0,0 +1,436 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf + * Alexey Kuznetsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_U8] = sizeof(u8), + [NLA_U16] = sizeof(u16), + [NLA_U32] = sizeof(u32), + [NLA_U64] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + +static int validate_nla(struct nlattr *nla, int maxtype, + const struct nla_policy *policy) +{ + const struct nla_policy *pt; + int minlen = 0, attrlen = nla_len(nla); + + if (nla->nla_type <= 0 || nla->nla_type > maxtype) + return 0; + + pt = &policy[nla->nla_type]; + + BUG_ON(pt->type > NLA_TYPE_MAX); + + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; + + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; + + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + case NLA_BINARY: + if (pt->len && attrlen > pt->len) + return -ERANGE; + break; + + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } + + return 0; +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + nla_for_each_attr(nla, head, len, rem) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + err = 0; +errout: + return err; +} + +/** + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessable via the attribute type. Attributes with a type + * exceeding maxtype will be silently ignored for backwards compatibility + * reasons. policy may be set to NULL if no validation is required. + * + * Returns 0 on success or a negative error code. + */ +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla->nla_type; + + if (type > 0 && type <= maxtype) { + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + tb[type] = nla; + } + } + + if (unlikely(rem > 0)) + printk(KERN_WARNING "netlink: %d bytes leftover after parsing " + "attributes.\n", rem); + + err = 0; +errout: + return err; +} + +/** + * nla_find - Find a specific attribute in a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @attrtype: type of attribute to look for + * + * Returns the first attribute in the stream matching the specified type. + */ +struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +{ + struct nlattr *nla; + int rem; + + nla_for_each_attr(nla, head, len, rem) + if (nla->nla_type == attrtype) + return nla; + + return NULL; +} + +/** + * nla_strlcpy - Copy string attribute payload into a sized buffer + * @dst: where to copy the string to + * @src: attribute to copy the string from + * @dstsize: size of destination buffer + * + * Copies at most dstsize - 1 bytes into the destination buffer. + * The result is always a valid NUL-terminated string. Unlike + * strlcpy the destination buffer is always padded out. + * + * Returns the length of the source buffer. + */ +size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) +{ + size_t srclen = nla_len(nla); + char *src = nla_data(nla); + + if (srclen > 0 && src[srclen - 1] == '\0') + srclen--; + + if (dstsize > 0) { + size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; + + memset(dst, 0, dstsize); + memcpy(dst, src, len); + } + + return srclen; +} + +/** + * nla_memcpy - Copy a netlink attribute into another memory area + * @dest: where to copy to memcpy + * @src: netlink attribute to copy from + * @count: size of the destination area + * + * Note: The number of bytes copied is limited by the length of + * attribute's payload. memcpy + * + * Returns the number of bytes copied. + */ +int nla_memcpy(void *dest, struct nlattr *src, int count) +{ + int minlen = min_t(int, count, nla_len(src)); + + memcpy(dest, nla_data(src), minlen); + + return minlen; +} + +/** + * nla_memcmp - Compare an attribute with sized memory area + * @nla: netlink attribute + * @data: memory area + * @size: size of memory area + */ +int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size) +{ + int d = nla_len(nla) - size; + + if (d == 0) + d = memcmp(nla_data(nla), data, size); + + return d; +} + +/** + * nla_strcmp - Compare a string attribute against a string + * @nla: netlink string attribute + * @str: another string + */ +int nla_strcmp(const struct nlattr *nla, const char *str) +{ + int len = strlen(str) + 1; + int d = nla_len(nla) - len; + + if (d == 0) + d = memcmp(nla_data(nla), str, len); + + return d; +} + +/** + * __nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + struct nlattr *nla; + + nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); + nla->nla_type = attrtype; + nla->nla_len = nla_attr_size(attrlen); + + memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); + + return nla; +} + +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + +/** + * nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return NULL; + + return __nla_reserve(skb, attrtype, attrlen); +} + +/** + * nla_reserve - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @len: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + +/** + * __nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nlattr *nla; + + nla = __nla_reserve(skb, attrtype, attrlen); + memcpy(nla_data(nla), data, attrlen); +} + +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} + +/** + * nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return -1; + + __nla_put(skb, attrtype, attrlen, data); + return 0; +} + +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -1; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} + +EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(nla_parse); +EXPORT_SYMBOL(nla_find); +EXPORT_SYMBOL(nla_strlcpy); +EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); +EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); +EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); +EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); +EXPORT_SYMBOL(nla_memcpy); +EXPORT_SYMBOL(nla_memcmp); +EXPORT_SYMBOL(nla_strcmp); diff --git a/datapath/linux-2.4/compat-2.4/compat24.c b/datapath/linux-2.4/compat-2.4/compat24.c new file mode 100644 index 00000000..13641ff1 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/compat24.c @@ -0,0 +1,27 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include "compat24.h" + +int __init compat24_init(void) +{ + int err; + + rcu_init(); + + err = random32_init(); + if (err) + return err; + + return genl_init(); + +} +module_init(compat24_init); + +void __exit compat24_exit(void) +{ + genl_exit(); +} +module_exit(compat24_exit); diff --git a/datapath/linux-2.4/compat-2.4/compat24.h b/datapath/linux-2.4/compat-2.4/compat24.h new file mode 100644 index 00000000..4e7038d8 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/compat24.h @@ -0,0 +1,11 @@ +#ifndef __COMPAT24_H +#define __COMPAT24_H 1 + +int genl_init(void); +void genl_exit(void); + +int random32_init(void); + +void rcu_init(void); + +#endif /* compat24.h */ diff --git a/datapath/linux-2.4/compat-2.4/genetlink.c b/datapath/linux-2.4/compat-2.4/genetlink.c new file mode 100644 index 00000000..c9fc55a6 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/genetlink.c @@ -0,0 +1,810 @@ +/* + * NETLINK Generic Netlink Family + * + * Authors: Jamal Hadi Salim + * Thomas Graf + * Johannes Berg + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat24.h" + +struct sock *genl_sock = NULL; + +static DECLARE_MUTEX(genl_mutex); /* serialization of message processing */ + +static void genl_lock(void) +{ + down(&genl_mutex); +} + +static int genl_trylock(void) +{ + return down_trylock(&genl_mutex); +} + +static void genl_unlock(void) +{ + up(&genl_mutex); + + if (genl_sock && genl_sock->receive_queue.qlen) + genl_sock->data_ready(genl_sock, 0); +} + +#define GENL_FAM_TAB_SIZE 16 +#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) + +static struct list_head family_ht[GENL_FAM_TAB_SIZE]; +/* + * Bitmap of multicast groups that are currently in use. + * + * To avoid an allocation at boot of just one unsigned long, + * declare it global instead. + * Bit 0 is marked as already used since group 0 is invalid. + */ +static unsigned long mc_group_start = 0x1; +static unsigned long *mc_groups = &mc_group_start; +static unsigned long mc_groups_longs = 1; + +static int genl_ctrl_event(int event, void *data); + +static inline unsigned int genl_family_hash(unsigned int id) +{ + return id & GENL_FAM_TAB_MASK; +} + +static inline struct list_head *genl_family_chain(unsigned int id) +{ + return &family_ht[genl_family_hash(id)]; +} + +static struct genl_family *genl_family_find_byid(unsigned int id) +{ + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(id), family_list) + if (f->id == id) + return f; + + return NULL; +} + +static struct genl_family *genl_family_find_byname(char *name) +{ + struct genl_family *f; + int i; + + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) + list_for_each_entry(f, genl_family_chain(i), family_list) + if (strcmp(f->name, name) == 0) + return f; + + return NULL; +} + +static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) +{ + struct genl_ops *ops; + + list_for_each_entry(ops, &family->ops_list, ops_list) + if (ops->cmd == cmd) + return ops; + + return NULL; +} + +/* Of course we are going to have problems once we hit + * 2^16 alive types, but that can only happen by year 2K +*/ +static inline u16 genl_generate_id(void) +{ + static u16 id_gen_idx; + int overflowed = 0; + + do { + if (id_gen_idx == 0) + id_gen_idx = GENL_MIN_ID; + + if (++id_gen_idx > GENL_MAX_ID) { + if (!overflowed) { + overflowed = 1; + id_gen_idx = 0; + continue; + } else + return 0; + } + + } while (genl_family_find_byid(id_gen_idx)); + + return id_gen_idx; +} + +static struct genl_multicast_group notify_grp; + +/** + * genl_register_mc_group - register a multicast group + * + * Registers the specified multicast group and notifies userspace + * about the new group. + * + * Returns 0 on success or a negative error code. + * + * @family: The generic netlink family the group shall be registered for. + * @grp: The group to register, must have a name. + */ +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + int id; + + BUG_ON(grp->name[0] == '\0'); + + genl_lock(); + + /* special-case our own group */ + if (grp == ¬ify_grp) + id = GENL_ID_CTRL; + else + id = find_first_zero_bit(mc_groups, + mc_groups_longs * BITS_PER_LONG); + + + if (id >= mc_groups_longs * BITS_PER_LONG) { + genl_unlock(); + return -ENOMEM; + } + + grp->id = id; + set_bit(id, mc_groups); + list_add_tail(&grp->list, &family->mcast_groups); + grp->family = family; + + genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); + genl_unlock(); + return 0; +} +EXPORT_SYMBOL(genl_register_mc_group); + +static void __genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + BUG_ON(grp->family != family); + + /* We should clear this multicast group from any subscribers, but 2.4 + * doesn't have the proper interface to do it, and we'd need a patch to + * implement it. */ + /*netlink_clear_multicast_users(genl_sock, grp->id);*/ + clear_bit(grp->id, mc_groups); + list_del(&grp->list); + genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, grp); + grp->id = 0; + grp->family = NULL; +} + +/** + * genl_unregister_mc_group - unregister a multicast group + * + * Unregisters the specified multicast group and notifies userspace + * about it. All current listeners on the group are removed. + * + * Note: It is not necessary to unregister all multicast groups before + * unregistering the family, unregistering the family will cause + * all assigned multicast groups to be unregistered automatically. + * + * @family: Generic netlink family the group belongs to. + * @grp: The group to unregister, must have been registered successfully + * previously. + */ +void genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + genl_lock(); + __genl_unregister_mc_group(family, grp); + genl_unlock(); +} +EXPORT_SYMBOL(genl_unregister_mc_group); + +static void genl_unregister_mc_groups(struct genl_family *family) +{ + struct genl_multicast_group *grp, *tmp; + + genl_lock(); + list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list) + __genl_unregister_mc_group(family, grp); + genl_unlock(); +} + +/** + * genl_register_ops - register generic netlink operations + * @family: generic netlink family + * @ops: operations to be registered + * + * Registers the specified operations and assigns them to the specified + * family. Either a doit or dumpit callback must be specified or the + * operation will fail. Only one operation structure per command + * identifier may be registered. + * + * See include/net/genetlink.h for more documenation on the operations + * structure. + * + * Returns 0 on success or a negative error code. + */ +int genl_register_ops(struct genl_family *family, struct genl_ops *ops) +{ + int err = -EINVAL; + + if (ops->dumpit == NULL && ops->doit == NULL) + goto errout; + + if (genl_get_cmd(ops->cmd, family)) { + err = -EEXIST; + goto errout; + } + + if (ops->dumpit) + ops->flags |= GENL_CMD_CAP_DUMP; + if (ops->doit) + ops->flags |= GENL_CMD_CAP_DO; + if (ops->policy) + ops->flags |= GENL_CMD_CAP_HASPOL; + + genl_lock(); + list_add_tail(&ops->ops_list, &family->ops_list); + genl_unlock(); + + genl_ctrl_event(CTRL_CMD_NEWOPS, ops); + err = 0; +errout: + return err; +} + +/** + * genl_unregister_ops - unregister generic netlink operations + * @family: generic netlink family + * @ops: operations to be unregistered + * + * Unregisters the specified operations and unassigns them from the + * specified family. The operation blocks until the current message + * processing has finished and doesn't start again until the + * unregister process has finished. + * + * Note: It is not necessary to unregister all operations before + * unregistering the family, unregistering the family will cause + * all assigned operations to be unregistered automatically. + * + * Returns 0 on success or a negative error code. + */ +int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) +{ + struct genl_ops *rc; + + genl_lock(); + list_for_each_entry(rc, &family->ops_list, ops_list) { + if (rc == ops) { + list_del(&ops->ops_list); + genl_unlock(); + genl_ctrl_event(CTRL_CMD_DELOPS, ops); + return 0; + } + } + genl_unlock(); + + return -ENOENT; +} + +/** + * genl_register_family - register a generic netlink family + * @family: generic netlink family + * + * Registers the specified family after validating it first. Only one + * family may be registered with the same family name or identifier. + * The family id may equal GENL_ID_GENERATE causing an unique id to + * be automatically generated and assigned. + * + * Return 0 on success or a negative error code. + */ +int genl_register_family(struct genl_family *family) +{ + int err = -EINVAL; + + if (family->id && family->id < GENL_MIN_ID) + goto errout; + + if (family->id > GENL_MAX_ID) + goto errout; + + INIT_LIST_HEAD(&family->ops_list); + INIT_LIST_HEAD(&family->mcast_groups); + + genl_lock(); + + if (genl_family_find_byname(family->name)) { + err = -EEXIST; + goto errout_locked; + } + + if (genl_family_find_byid(family->id)) { + err = -EEXIST; + goto errout_locked; + } + + if (family->id == GENL_ID_GENERATE) { + u16 newid = genl_generate_id(); + + if (!newid) { + err = -ENOMEM; + goto errout_locked; + } + + family->id = newid; + } + + if (family->maxattr) { + family->attrbuf = kmalloc((family->maxattr+1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (family->attrbuf == NULL) { + err = -ENOMEM; + goto errout_locked; + } + } else + family->attrbuf = NULL; + + list_add_tail(&family->family_list, genl_family_chain(family->id)); + MOD_INC_USE_COUNT; + genl_unlock(); + + genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); + + return 0; + +errout_locked: + genl_unlock(); +errout: + return err; +} + +/** + * genl_unregister_family - unregister generic netlink family + * @family: generic netlink family + * + * Unregisters the specified family. + * + * Returns 0 on success or a negative error code. + */ +int genl_unregister_family(struct genl_family *family) +{ + struct genl_family *rc; + + genl_unregister_mc_groups(family); + + genl_lock(); + + list_for_each_entry(rc, genl_family_chain(family->id), family_list) { + if (family->id != rc->id || strcmp(rc->name, family->name)) + continue; + + list_del(&rc->family_list); + INIT_LIST_HEAD(&family->ops_list); + genl_unlock(); + + kfree(family->attrbuf); + genl_ctrl_event(CTRL_CMD_DELFAMILY, family); + return 0; + } + + MOD_DEC_USE_COUNT; + genl_unlock(); + + return -ENOENT; +} + +static int null_done_func(struct netlink_callback *cb) +{ + return 0; +} + +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct genl_ops *ops; + struct genl_family *family; + struct genl_info info; + struct genlmsghdr *hdr = nlmsg_data(nlh); + int hdrlen, err; + + family = genl_family_find_byid(nlh->nlmsg_type); + if (family == NULL) + return -ENOENT; + + hdrlen = GENL_HDRLEN + family->hdrsize; + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + ops = genl_get_cmd(hdr->cmd, family); + if (ops == NULL) + return -EOPNOTSUPP; + + if ((ops->flags & GENL_ADMIN_PERM) && !capable(CAP_NET_ADMIN)) + return -EPERM; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (ops->dumpit == NULL) + return -EOPNOTSUPP; + + return netlink_dump_start(genl_sock, skb, nlh, + ops->dumpit, + ops->done ?: null_done_func); + } + + if (ops->doit == NULL) + return -EOPNOTSUPP; + + if (family->attrbuf) { + err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, + ops->policy); + if (err < 0) + return err; + } + + info.snd_seq = nlh->nlmsg_seq; + info.snd_pid = NETLINK_CB(skb).pid; + info.nlhdr = nlh; + info.genlhdr = nlmsg_data(nlh); + info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; + info.attrs = family->attrbuf; + + return ops->doit(skb, &info); +} + +static void genl_rcv(struct sock *sk, int len) +{ + unsigned int qlen = 0; + + do { + if (genl_trylock()) + return; + netlink_run_queue(sk, &qlen, genl_rcv_msg); + genl_unlock(); + } while (qlen && genl_sock && genl_sock->receive_queue.qlen); +} + +/************************************************************************** + * Controller + **************************************************************************/ + +static struct genl_family genl_ctrl = { + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, +}; + +static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, + u32 flags, struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + if (hdr == NULL) + return -1; + + NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); + NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); + NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); + NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); + NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); + + if (!list_empty(&family->ops_list)) { + struct nlattr *nla_ops; + struct genl_ops *ops; + int idx = 1; + + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) + goto nla_put_failure; + + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + + nla_nest_end(skb, nest); + } + + nla_nest_end(skb, nla_ops); + } + + if (!list_empty(&family->mcast_groups)) { + struct genl_multicast_group *grp; + struct nlattr *nla_grps; + int idx = 1; + + nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + if (nla_grps == NULL) + goto nla_put_failure; + + list_for_each_entry(grp, &family->mcast_groups, list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id); + NLA_PUT_STRING(skb, CTRL_ATTR_MCAST_GRP_NAME, + grp->name); + + nla_nest_end(skb, nest); + } + nla_nest_end(skb, nla_grps); + } + + return genlmsg_end(skb, hdr); + +nla_put_failure: + return genlmsg_cancel(skb, hdr); +} + +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, + u32 seq, u32 flags, struct sk_buff *skb, + u8 cmd) +{ + void *hdr; + struct nlattr *nla_grps; + struct nlattr *nest; + + hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + if (hdr == NULL) + return -1; + + NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, grp->family->name); + NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, grp->family->id); + + nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + if (nla_grps == NULL) + goto nla_put_failure; + + nest = nla_nest_start(skb, 1); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id); + NLA_PUT_STRING(skb, CTRL_ATTR_MCAST_GRP_NAME, + grp->name); + + nla_nest_end(skb, nest); + nla_nest_end(skb, nla_grps); + + return genlmsg_end(skb, hdr); + +nla_put_failure: + return genlmsg_cancel(skb, hdr); +} + +static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) +{ + + int i, n = 0; + struct genl_family *rt; + int chains_to_skip = cb->args[0]; + int fams_to_skip = cb->args[1]; + + if (chains_to_skip != 0) + genl_lock(); + + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + if (i < chains_to_skip) + continue; + n = 0; + list_for_each_entry(rt, genl_family_chain(i), family_list) { + if (++n < fams_to_skip) + continue; + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY) < 0) + goto errout; + } + + fams_to_skip = 0; + } + +errout: + if (chains_to_skip != 0) + genl_unlock(); + + cb->args[0] = i; + cb->args[1] = n; + + return skb->len; +} + +static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, + u32 pid, int seq, u8 cmd) +{ + struct sk_buff *skb; + int err; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) + return ERR_PTR(-ENOBUFS); + + err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + if (err < 0) { + nlmsg_free(skb); + return ERR_PTR(err); + } + + return skb; +} + +static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, + u32 pid, int seq, u8 cmd) +{ + struct sk_buff *skb; + int err; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) + return ERR_PTR(-ENOBUFS); + + err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + if (err < 0) { + nlmsg_free(skb); + return ERR_PTR(err); + } + + return skb; +} + +static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { + [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, + [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, + .len = GENL_NAMSIZ - 1 }, +}; + +static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct genl_family *res = NULL; + int err = -EINVAL; + + if (info->attrs[CTRL_ATTR_FAMILY_ID]) { + u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]); + res = genl_family_find_byid(id); + } + + if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { + char *name; + + name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); + res = genl_family_find_byname(name); + } + + if (res == NULL) { + err = -ENOENT; + goto errout; + } + + msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + CTRL_CMD_NEWFAMILY); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto errout; + } + + err = genlmsg_reply(msg, info); +errout: + return err; +} + +static int genl_ctrl_event(int event, void *data) +{ + struct sk_buff *msg; + + if (genl_sock == NULL) + return 0; + + switch (event) { + case CTRL_CMD_NEWFAMILY: + case CTRL_CMD_DELFAMILY: + msg = ctrl_build_family_msg(data, 0, 0, event); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); + break; + case CTRL_CMD_NEWMCAST_GRP: + case CTRL_CMD_DELMCAST_GRP: + msg = ctrl_build_mcgrp_msg(data, 0, 0, event); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); + break; + } + + return 0; +} + +static struct genl_ops genl_ctrl_ops = { + .cmd = CTRL_CMD_GETFAMILY, + .doit = ctrl_getfamily, + .dumpit = ctrl_dumpfamily, + .policy = ctrl_policy, +}; + +static struct genl_multicast_group notify_grp = { + .name = "notify", +}; + +int __init genl_init(void) +{ + int i, err; + + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) + INIT_LIST_HEAD(&family_ht[i]); + + err = genl_register_family(&genl_ctrl); + if (err < 0) + goto errout; + + err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); + if (err < 0) + goto errout_register; + + netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); + genl_sock = netlink_kernel_create(NETLINK_GENERIC, genl_rcv); + if (genl_sock == NULL) + panic("GENL: Cannot initialize generic netlink\n"); + + err = genl_register_mc_group(&genl_ctrl, ¬ify_grp); + if (err < 0) + goto errout_register; + + return 0; + +errout_register: + genl_unregister_family(&genl_ctrl); +errout: + panic("GENL: Cannot register controller: %d\n", err); +} + +void __exit genl_exit(void) +{ + int err; + + err = genl_unregister_ops(&genl_ctrl, &genl_ctrl_ops); + if (err) { + printk("GENL: cannot unregister ops (%d)\n", err); + return; + } + + err = genl_unregister_family(&genl_ctrl); + if (err) { + printk("GENL: cannot unregister family (%d)\n", err); + return; + } + +} + +EXPORT_SYMBOL(genl_sock); +EXPORT_SYMBOL(genl_register_ops); +EXPORT_SYMBOL(genl_unregister_ops); +EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(genl_unregister_family); + +MODULE_LICENSE("GPL"); diff --git a/datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h b/datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h new file mode 100644 index 00000000..1a1bb450 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-arm/asm/atomic.h @@ -0,0 +1,56 @@ +#ifndef __ASM_ARM_ATOMIC_H_WRAPPER +#define __ASM_ARM_ATOMIC_H_WRAPPER 1 + +#include_next + +#error "Cribbed from linux-2.6/include/asm-arm/atomic.h but untested" + +#ifdef __KERNEL__ + +#if __LINUX_ARM_ARCH__ >= 6 + +static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) +{ + unsigned long oldval, res; + + do { + __asm__ __volatile__("@ atomic_cmpxchg\n" + "ldrex %1, [%2]\n" + "mov %0, #0\n" + "teq %1, %3\n" + "strexeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (&ptr->counter), "Ir" (old), "r" (new) + : "cc"); + } while (res); + + return oldval; +} + +#else /* ARM_ARCH_6 */ + +#include + +#ifdef CONFIG_SMP +#error SMP not supported on pre-ARMv6 CPUs +#endif + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int ret; + unsigned long flags; + + raw_local_irq_save(flags); + ret = v->counter; + if (likely(ret == old)) + v->counter = new; + raw_local_irq_restore(flags); + + return ret; +} + +#endif /* __LINUX_ARM_ARCH__ */ + +#endif /* __KERNEL__ */ + +#endif /* asm/atomic.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h b/datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h new file mode 100644 index 00000000..7badb562 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-i386/asm/atomic.h @@ -0,0 +1,10 @@ +#ifndef __ASM_I386_ATOMIC_WRAPPER_H +#define __ASM_I386_ATOMIC_WRAPPER_H 1 + +#include_next + +#include + +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +#endif /* atomic.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h b/datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h new file mode 100644 index 00000000..735c6168 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-mips/asm/atomic.h @@ -0,0 +1,11 @@ +#ifndef __ASM_MIPS_ATOMIC_H_WRAPPER +#define __ASM_MIPS_ATOMIC_H_WRAPPER 1 + +#include_next +#include + +#error "Cribbed from linux-2.6/include/asm-mips but not tested." + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) + +#endif /* asm/atomic.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h b/datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h new file mode 100644 index 00000000..ae1d6460 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-mips/asm/barrier.h @@ -0,0 +1,29 @@ +#ifndef __ASM_MIPS_BARRIER_H_WRAPPER +#define __ASM_MIPS_BARRIER_H_WRAPPER 1 + +#include + +#error "Cribbed from linux-2.6/include/asm-mips/barrier.h but untested." + +/* Not sure whether these really need to be defined, but the conservative + * choice seems to be to define them. */ +#define CONFIG_WEAK_ORDERING 1 +#define CONFIG_WEAK_REORDERING_BEYOND_LLSC 1 + +#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) +#define __WEAK_ORDERING_MB " sync \n" +#else +#define __WEAK_ORDERING_MB " \n" +#endif +#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP) +#define __WEAK_LLSC_MB " sync \n" +#else +#define __WEAK_LLSC_MB " \n" +#endif + +#define smp_mb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") +#define smp_rmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") +#define smp_wmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") + + +#endif /* asm/barrier.h */ diff --git a/datapath/linux-2.4/compat-2.4/include-mips/asm/system.h b/datapath/linux-2.4/compat-2.4/include-mips/asm/system.h new file mode 100644 index 00000000..c1b08154 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include-mips/asm/system.h @@ -0,0 +1,268 @@ +#ifndef __ASM_MIPS_SYSTEM_H_WRAPPER +#define __ASM_MIPS_SYSTEM_H_WRAPPER 1 + +#include_next + +#error "Cribbed from linux-2.6/include/asm-mips/system.h but untested." + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, + unsigned long new) +{ + __u32 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqz $1, 3f \n" + "2: \n" + " .subsection 2 \n" + "3: b 1b \n" + " .previous \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + raw_local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + raw_local_irq_restore(flags); /* implies memory barrier */ + } + + smp_llsc_mb(); + + return retval; +} + +static inline unsigned long __cmpxchg_u32_local(volatile int * m, + unsigned long old, unsigned long new) +{ + __u32 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: ll %0, %2 # __cmpxchg_u32 \n" + " bne %0, %z3, 2f \n" + " .set mips0 \n" + " move $1, %z4 \n" + " .set mips3 \n" + " sc $1, %1 \n" + " beqz $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); /* implies memory barrier */ + } + + return retval; +} + +#ifdef CONFIG_64BIT +static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old, + unsigned long new) +{ + __u64 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqz $1, 3f \n" + "2: \n" + " .subsection 2 \n" + "3: b 1b \n" + " .previous \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + raw_local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + raw_local_irq_restore(flags); /* implies memory barrier */ + } + + smp_llsc_mb(); + + return retval; +} + +static inline unsigned long __cmpxchg_u64_local(volatile int * m, + unsigned long old, unsigned long new) +{ + __u64 retval; + + if (cpu_has_llsc && R10000_LLSC_WAR) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqzl $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else if (cpu_has_llsc) { + __asm__ __volatile__( + " .set push \n" + " .set noat \n" + " .set mips3 \n" + "1: lld %0, %2 # __cmpxchg_u64 \n" + " bne %0, %z3, 2f \n" + " move $1, %z4 \n" + " scd $1, %1 \n" + " beqz $1, 1b \n" + "2: \n" + " .set pop \n" + : "=&r" (retval), "=R" (*m) + : "R" (*m), "Jr" (old), "Jr" (new) + : "memory"); + } else { + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); /* implies memory barrier */ + } + + return retval; +} + +#else +extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( + volatile int * m, unsigned long old, unsigned long new); +#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels +extern unsigned long __cmpxchg_u64_local_unsupported_on_32bit_kernels( + volatile int * m, unsigned long old, unsigned long new); +#define __cmpxchg_u64_local __cmpxchg_u64_local_unsupported_on_32bit_kernels +#endif + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + case 8: + return __cmpxchg_u64(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +static inline unsigned long __cmpxchg_local(volatile void * ptr, + unsigned long old, unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_local(ptr, old, new); + case 8: + return __cmpxchg_u64_local(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr,old,new) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), \ + (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr)))) + +#define cmpxchg_local(ptr,old,new) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ + (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr)))) + +#endif /* asm/system.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/asm/system.h b/datapath/linux-2.4/compat-2.4/include/asm/system.h new file mode 100644 index 00000000..6a1656b9 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/asm/system.h @@ -0,0 +1,18 @@ +#ifndef __ASM_SYSTEM_WRAPPER_H +#define __ASM_SYSTEM_WRAPPER_H 1 + +#include_next + +#ifdef CONFIG_ALPHA +#define read_barrier_depends __asm__ __volatile__("mb": : :"memory") +#else +#define read_barrier_depends() do { } while(0) +#endif + +#ifdef CONFIG_SMP +#define smp_read_barrier_depends() read_barrier_depends() +#else +#define smp_read_barrier_depends() do { } while(0) +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/compiler.h b/datapath/linux-2.4/compat-2.4/include/linux/compiler.h new file mode 100644 index 00000000..3ac3ca1d --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/compiler.h @@ -0,0 +1,8 @@ +#ifndef __LINUX_COMPILER_WRAPPER_H +#define __LINUX_COMPILER_WRAPPER_H + +#include_next + +# define __force + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/delay.h b/datapath/linux-2.4/compat-2.4/include/linux/delay.h new file mode 100644 index 00000000..d6d277c3 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/delay.h @@ -0,0 +1,59 @@ +#ifndef __LINUX_DELAY_WRAPPER_H +#define __LINUX_DELAY_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,29) +#include +#include +/* + * We define MAX_MSEC_OFFSET as the maximal value that can be accepted by + * msecs_to_jiffies() without risking a multiply overflow. This function + * returns MAX_JIFFY_OFFSET for arguments above those values. + */ + +#if HZ <= 1000 && !(1000 % HZ) +# define MAX_MSEC_OFFSET \ + (ULONG_MAX - (1000 / HZ) + 1) +#elif HZ > 1000 && !(HZ % 1000) +# define MAX_MSEC_OFFSET \ + (ULONG_MAX / (HZ / 1000)) +#else +# define MAX_MSEC_OFFSET \ + ((ULONG_MAX - 999) / HZ) +#endif + +/* + * Convert jiffies to milliseconds and back. + * + * Avoid unnecessary multiplications/divisions in the + * two most common HZ cases: + */ +static inline unsigned int jiffies_to_msecs(const unsigned long j) +{ +#if HZ <= 1000 && !(1000 % HZ) + return (1000 / HZ) * j; +#elif HZ > 1000 && !(HZ % 1000) + return (j + (HZ / 1000) - 1)/(HZ / 1000); +#else + return (j * 1000) / HZ; +#endif +} + +static inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + if (MAX_MSEC_OFFSET < UINT_MAX && m > (unsigned int)MAX_MSEC_OFFSET) + return MAX_JIFFY_OFFSET; +#if HZ <= 1000 && !(1000 % HZ) + return ((unsigned long)m + (1000 / HZ) - 1) / (1000 / HZ); +#elif HZ > 1000 && !(HZ % 1000) + return (unsigned long)m * (HZ / 1000); +#else + return ((unsigned long)m * HZ + 999) / 1000; +#endif +} + +#endif /* linux kernel < 2.6.29 */ + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h b/datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h new file mode 100644 index 00000000..05f9d661 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/etherdevice.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_ETHERDEVICE_WRAPPER_H +#define __LINUX_ETHERDEVICE_WRAPPER_H 1 + +#include_next +#include + +/** + * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is a multicast address. + * By definition the broadcast address is also a multicast address. + */ +static inline int is_multicast_ether_addr(const u8 *addr) +{ + return (0x01 & addr[0]); +} + +/** + * is_local_ether_addr - Determine if the Ethernet address is locally-assigned + * one (IEEE 802). + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is a local address. + */ +static inline int is_local_ether_addr(const u8 *addr) +{ + return (0x02 & addr[0]); +} + +/** + * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is the broadcast address. + */ +static inline int is_broadcast_ether_addr(const u8 *addr) +{ + return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff; +} + +/** + * random_ether_addr - Generate software assigned random Ethernet address + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Generate a random Ethernet address (MAC) that is not multicast + * and has the local assigned bit set. + */ +static inline void random_ether_addr(u8 *addr) +{ + get_random_bytes (addr, ETH_ALEN); + addr [0] &= 0xfe; /* clear multicast bit */ + addr [0] |= 0x02; /* set local assignment bit (IEEE802) */ +} + +/** + * compare_ether_addr - Compare two Ethernet addresses + * @addr1: Pointer to a six-byte array containing the Ethernet address + * @addr2: Pointer other six-byte array containing the Ethernet address + * + * Compare two ethernet addresses, returns 0 if equal + */ +static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/genetlink.h b/datapath/linux-2.4/compat-2.4/include/linux/genetlink.h new file mode 100644 index 00000000..7da02c93 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/genetlink.h @@ -0,0 +1,82 @@ +#ifndef __LINUX_GENERIC_NETLINK_H +#define __LINUX_GENERIC_NETLINK_H + +#include + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_GENERATE 0 +#define GENL_ID_CTRL NLMSG_MIN_TYPE + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +#endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/gfp.h b/datapath/linux-2.4/compat-2.4/include/linux/gfp.h new file mode 100644 index 00000000..27881d3b --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/gfp.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_GFP_WRAPPER_H +#define __LINUX_GFP_WRAPPER_H 1 + +#include + +#endif /* linux/gfp.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/if_ether.h b/datapath/linux-2.4/compat-2.4/include/linux/if_ether.h new file mode 100644 index 00000000..2b2d3db3 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/if_ether.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_IF_ETHER_WRAPPER_H +#define __LINUX_IF_ETHER_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) +{ + return (struct ethhdr *)skb_mac_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h b/datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h new file mode 100644 index 00000000..21629460 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/if_vlan.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_IF_VLAN_WRAPPER_H +#define __LINUX_IF_VLAN_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include +#include +#include + +static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb_mac_header(skb); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,26) +static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, unsigned short tag) +{ + struct vlan_ethhdr *veth; + + if (skb_headroom(skb) < VLAN_HLEN) { + struct sk_buff *sk_tmp = skb; + skb = skb_realloc_headroom(sk_tmp, VLAN_HLEN); + kfree_skb(sk_tmp); + if (!skb) { + printk(KERN_ERR "vlan: failed to realloc headroom\n"); + return NULL; + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + printk(KERN_ERR "vlan: failed to unshare skbuff\n"); + return NULL; + } + } + + veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); + + /* Move the mac addresses to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, 2 * VLAN_ETH_ALEN); + + /* first, the ethernet type */ + veth->h_vlan_proto = __constant_htons(ETH_P_8021Q); + + /* now, the tag */ + veth->h_vlan_TCI = htons(tag); + + skb_reset_mac_header(skb); + + return skb; +} + +#else + +#define vlan_put_tag(x,y) fix_vlan_put_tag((x),(y)); + +/* For some reason, older versions of vlan_put_tag do not adjust the + * pointer to the beginning of the MAC header. We get around that by + * this hack. Ugh. */ +static inline struct sk_buff *fix_vlan_put_tag(struct sk_buff *skb, unsigned short tag) +{ + skb = (vlan_put_tag)(skb, tag); + skb_reset_mac_header(skb); + + return skb; +} +#endif + +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/ip.h b/datapath/linux-2.4/compat-2.4/include/linux/ip.h new file mode 100644 index 00000000..b2fbdb93 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/ip.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_IP_WRAPPER_H +#define __LINUX_IP_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct iphdr *ip_hdr(const struct sk_buff *skb) +{ + return (struct iphdr *)skb_network_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/ipv6.h b/datapath/linux-2.4/compat-2.4/include/linux/ipv6.h new file mode 100644 index 00000000..42b5ac0a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/ipv6.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_IPV6_WRAPPER_H +#define __LINUX_IPV6_WRAPPER_H 1 + +#include_next + +static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) +{ + return (struct ipv6hdr *)skb_network_header(skb); +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/jiffies.h b/datapath/linux-2.4/compat-2.4/include/linux/jiffies.h new file mode 100644 index 00000000..718fe91d --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/jiffies.h @@ -0,0 +1,10 @@ +#ifndef __LINUX_JIFFIES_WRAPPER_H +#define __LINUX_JIFFIES_WRAPPER_H 1 + +#include +#include +#include + +extern unsigned long volatile jiffies; + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/kernel.h b/datapath/linux-2.4/compat-2.4/include/linux/kernel.h new file mode 100644 index 00000000..329dab53 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/kernel.h @@ -0,0 +1,43 @@ +#ifndef __LINUX_KERNEL_WRAPPER_H +#define __LINUX_KERNEL_WRAPPER_H 1 + +#include_next + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/* + * Check at compile time that something is of a particular type. + * Always evaluates to 1 so you may use it easily in comparisons. + */ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ +}) + +/* + * Check at compile time that 'function' is a certain type, or is a pointer + * to that type (needs to use typedef for the function type.) + */ +#define typecheck_fn(type,function) \ +({ typeof(type) __tmp = function; \ + (void)__tmp; \ +}) + +int vprintk(const char *msg, ...) + __attribute__((format(printf, 1, 0))); + +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/list.h b/datapath/linux-2.4/compat-2.4/include/linux/list.h new file mode 100644 index 00000000..af98d8c6 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/list.h @@ -0,0 +1,510 @@ +#ifndef __LINUX_LIST_WRAPPER_H +#define __LINUX_LIST_WRAPPER_H + +#ifdef __KERNEL__ + +#include_next +#include + +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head * new, + struct list_head * prev, struct list_head * next) +{ + new->next = next; + new->prev = prev; + smp_wmb(); + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + smp_wmb(); + new->next->prev = new; + new->prev->next = new; + old->prev = LIST_POISON2; +} +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) + +#define __list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) + +/** + * list_for_each_safe_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + * + * Iterate over an rcu-protected list, safe against removal of list entry. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_safe_rcu(pos, n, head) \ + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) + +/* + * Double linked lists with a single pointer list head. + * Mostly useful for hash tables where the two pointer list head is + * too wasteful. + * You lose the ability to access the tail in O(1). + */ + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +static inline int hlist_unhashed(const struct hlist_node *h) +{ + return !h->pprev; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +static inline void hlist_del_init(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + INIT_HLIST_NODE(n); + } +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + smp_wmb(); + if (next) + new->next->pprev = &new->next; + *new->pprev = new; + old->pprev = LIST_POISON2; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + smp_wmb(); + if (first) + first->pprev = &n->next; + h->first = n; +} + +/* next must be != NULL */ +static inline void hlist_add_before(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_after(struct hlist_node *n, + struct hlist_node *next) +{ + next->next = n->next; + n->next = next; + next->pprev = &n->next; + + if(next->next) + next->next->pprev = &next->next; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ + pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ + pos = n) + +/** + * hlist_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_continue - iterate over a hlist continuing after current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = (head)->first; \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) +/** + * list_for_each_entry_safe - iterate over list of given type safe against remov +al of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +#endif /* linux kernel < 2.4.23 */ + + +#else +#warning "don't include kernel headers in userspace" +#endif /* __KERNEL__ */ +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/module.h b/datapath/linux-2.4/compat-2.4/include/linux/module.h new file mode 100644 index 00000000..797a330a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/module.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_MODULE_WRAPPER_H +#define __LINUX_MODULE_WRAPPER_H 1 + +#include_next + +static inline int try_module_get(struct module *module) +{ + BUG_ON(module != THIS_MODULE); + MOD_INC_USE_COUNT; + return 1; +} + +static inline void module_put(struct module *module) +{ + BUG_ON(module != THIS_MODULE); + MOD_DEC_USE_COUNT; +} + +#endif /* module.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/mutex.h b/datapath/linux-2.4/compat-2.4/include/linux/mutex.h new file mode 100644 index 00000000..98cf07a5 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/mutex.h @@ -0,0 +1,58 @@ +#ifndef __LINUX_MUTEX_H +#define __LINUX_MUTEX_H + +#include + +struct mutex { + struct semaphore sema; +}; + +#define mutex_init(mutex) init_MUTEX(&mutex->sema) +#define mutex_destroy(mutex) do { } while (0) + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { __MUTEX_INITIALIZER(mutexname.sema) } + +/** + * mutex_is_locked - is the mutex locked + * @lock: the mutex to be queried + * + * Returns 1 if the mutex is locked, 0 if unlocked. + */ +static inline int mutex_is_locked(struct mutex *lock) +{ + return sem_getcount(&lock->sema) == 0; +} + +/* + * See kernel/mutex.c for detailed documentation of these APIs. + * Also see Documentation/mutex-design.txt. + */ +static inline void mutex_lock(struct mutex *lock) +{ + down(&lock->sema); +} + +static inline int mutex_lock_interruptible(struct mutex *lock) +{ + return down_interruptible(&lock->sema); +} + +#define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) + +/* + * NOTE: mutex_trylock() follows the spin_trylock() convention, + * not the down_trylock() convention! + */ +static inline int mutex_trylock(struct mutex *lock) +{ + return !down_trylock(&lock->sema); +} + +static inline void mutex_unlock(struct mutex *lock) +{ + up(&lock->sema); +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h b/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h new file mode 100644 index 00000000..6eba6513 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_NETDEVICE_WRAPPER_H +#define __LINUX_NETDEVICE_WRAPPER_H 1 + +#include_next + +/*---------------------------------------------------------------------------- + * In 2.6.24, a namespace argument became required for dev_get_by_name. + */ +#define net_init NULL + +#ifdef dev_get_by_name +#undef dev_get_by_name +#define dev_get_by_name(net, name) \ + compat_dev_get_by_name((name)) +static inline struct net_device *compat_dev_get_by_name(const char *name) +{ + return (_set_ver(dev_get_by_name))(name); +} +#else +#define dev_get_by_name(net, name) \ + dev_get_by_name((name)) +#endif /* dev_get_by_name */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) +static inline void *netdev_priv(struct net_device *dev) +{ + return dev->priv; +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/netlink.h b/datapath/linux-2.4/compat-2.4/include/linux/netlink.h new file mode 100644 index 00000000..69089e44 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/netlink.h @@ -0,0 +1,78 @@ +#ifndef __LINUX_NETLINK_WRAPPER_H +#define __LINUX_NETLINK_WRAPPER_H 1 + +#include_next + +#define NETLINK_GENERIC 16 + +#undef NLMSG_LENGTH +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN)) + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr +{ + __u16 nla_len; + __u16 nla_type; +}; + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +#ifdef __KERNEL__ + +#include +#include + +static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) +{ + return (struct nlmsghdr *)skb->data; +} + +#define __nlmsg_put __rpl_nlmsg_put +static __inline__ struct nlmsghdr * +__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +{ + struct nlmsghdr *nlh; + int size = NLMSG_LENGTH(len); + + nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = size; + nlh->nlmsg_flags = flags; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + return nlh; +} + +#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) + +#undef NLMSG_NEW +#define NLMSG_NEW(skb, pid, seq, type, len, flags) \ +({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \ + goto nlmsg_failure; \ + __nlmsg_put(skb, pid, seq, type, len, flags); }) +#endif + +#undef NLMSG_PUT +#define NLMSG_PUT(skb, pid, seq, type, len) \ + NLMSG_NEW(skb, pid, seq, type, len, 0) + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/random.h b/datapath/linux-2.4/compat-2.4/include/linux/random.h new file mode 100644 index 00000000..381f955c --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/random.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_RANDOM_WRAPPER_H +#define __LINUX_RANDOM_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +u32 random32(void); +void srandom32(u32 seed); +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h b/datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h new file mode 100644 index 00000000..ae197ece --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/rcupdate.h @@ -0,0 +1,205 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2001 + * + * Author: Dipankar Sarma + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#ifndef __LINUX_RCUPDATE_H +#define __LINUX_RCUPDATE_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#ifdef CONFIG_SMP +#error "SMP configurations not supported for RCU backport." +#endif + +/** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list + * @func: actual update function to call after the grace period. + */ +struct rcu_head { + struct rcu_head *next; + void (*func)(struct rcu_head *head); +}; + +#define RCU_HEAD_INIT { } +#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT +#define INIT_RCU_HEAD(ptr) do { } while (0) + + + +/** + * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * + * When synchronize_rcu() is invoked on one CPU while other CPUs + * are within RCU read-side critical sections, then the + * synchronize_rcu() is guaranteed to block until after all the other + * CPUs exit their critical sections. Similarly, if call_rcu() is invoked + * on one CPU while other CPUs are within RCU read-side critical + * sections, invocation of the corresponding RCU callback is deferred + * until after the all the other CPUs exit their critical sections. + * + * Note, however, that RCU callbacks are permitted to run concurrently + * with RCU read-side critical sections. One way that this can happen + * is via the following sequence of events: (1) CPU 0 enters an RCU + * read-side critical section, (2) CPU 1 invokes call_rcu() to register + * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, + * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU + * callback is invoked. This is legal, because the RCU read-side critical + * section that was running concurrently with the call_rcu() (and which + * therefore might be referencing something that the corresponding RCU + * callback would free up) has completed before the corresponding + * RCU callback is invoked. + * + * RCU read-side critical sections may be nested. Any deferred actions + * will be deferred until the outermost RCU read-side critical section + * completes. + * + * It is illegal to block while in an RCU read-side critical section. + */ +#define rcu_read_lock() \ + do { } while(0) + +/** + * rcu_read_unlock - marks the end of an RCU read-side critical section. + * + * See rcu_read_lock() for more information. + */ +#define rcu_read_unlock() \ + do { } while(0) + +/* + * So where is rcu_write_lock()? It does not exist, as there is no + * way for writers to lock out RCU readers. This is a feature, not + * a bug -- this property is what provides RCU's performance benefits. + * Of course, writers must coordinate with each other. The normal + * spinlock primitives work well for this, but any other technique may be + * used as well. RCU does not care how the writers keep out of each + * others' way, as long as they do so. + */ + +/** + * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks + * consider completion of a softirq handler to be a quiescent state, + * a process in RCU read-side critical section must be protected by + * disabling softirqs. Read-side critical sections in interrupt context + * can use just rcu_read_lock(). + * + */ +#define rcu_read_lock_bh() \ + do { \ + local_bh_disable(); \ + } while(0) + +/* + * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section + * + * See rcu_read_lock_bh() for more information. + */ +#define rcu_read_unlock_bh() \ + do { \ + local_bh_enable(); \ + } while(0) + +/** + * rcu_dereference - fetch an RCU-protected pointer in an + * RCU read-side critical section. This pointer may later + * be safely dereferenced. + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), and, more importantly, documents + * exactly which pointers are protected by RCU. + */ + +#define rcu_dereference(p) ({ \ + typeof(p) _________p1 = p; \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) + +/** + * rcu_assign_pointer - assign (publicize) a pointer to a newly + * initialized structure that will be dereferenced by RCU read-side + * critical sections. Returns the value assigned. + * + * Inserts memory barriers on architectures that require them + * (pretty much all of them other than x86), and also prevents + * the compiler from reordering the code that initializes the + * structure after the pointer assignment. More importantly, this + * call documents which pointers will be dereferenced by RCU read-side + * code. + */ + +#define rcu_assign_pointer(p, v) ({ \ + smp_wmb(); \ + (p) = (v); \ + }) + +/** + * synchronize_sched - block until all CPUs have exited any non-preemptive + * kernel code sequences. + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +#define synchronize_sched() synchronize_rcu() + +/* Exported interfaces */ +void synchronize_rcu(void); +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); + +static inline void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head)) +{ + synchronize_rcu(); + func(head); +} +void synchronize_idle(void); +extern void rcu_barrier(void); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_RCUPDATE_H */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/skbuff.h b/datapath/linux-2.4/compat-2.4/include/linux/skbuff.h new file mode 100644 index 00000000..2758520a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/skbuff.h @@ -0,0 +1,130 @@ +#ifndef __LINUX_SKBUFF_WRAPPER_H +#define __LINUX_SKBUFF_WRAPPER_H 1 + +#include_next + + +#define mac_header mac.raw +#define network_header nh.raw + +/* Emulate Linux 2.6 behavior, in which kfree_skb silently ignores null pointer + * arguments. */ +#define kfree_skb(skb) kfree_skb_maybe_null(skb) +static inline void kfree_skb_maybe_null(struct sk_buff *skb) +{ + if (likely(skb != NULL)) + (kfree_skb)(skb); +} + +/* Note that CHECKSUM_PARTIAL is not implemented, but this allows us to at + * least test against it: see update_csum() in forward.c. */ +#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_COMPLETE CHECKSUM_HW + +static inline unsigned char *skb_transport_header(const struct sk_buff *skb) +{ + return skb->h.raw; +} + +static inline void skb_reset_transport_header(struct sk_buff *skb) +{ + skb->h.raw = skb->data; +} + +static inline void skb_set_transport_header(struct sk_buff *skb, + const int offset) +{ + skb->h.raw = skb->data + offset; +} + +static inline unsigned char *skb_network_header(const struct sk_buff *skb) +{ + return skb->nh.raw; +} + +static inline void skb_reset_network_header(struct sk_buff *skb) +{ + skb->nh.raw = skb->data; +} + +static inline void skb_set_network_header(struct sk_buff *skb, const int offset) +{ + skb->nh.raw = skb->data + offset; +} + +static inline unsigned char *skb_mac_header(const struct sk_buff *skb) +{ + return skb->mac.raw; +} + +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac.raw != NULL; +} + +static inline void skb_reset_mac_header(struct sk_buff *skb) +{ + skb->mac.raw = skb->data; +} + +static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) +{ + skb->mac.raw = skb->data + offset; +} +static inline int skb_transport_offset(const struct sk_buff *skb) +{ + return skb_transport_header(skb) - skb->data; +} + +static inline u32 skb_network_header_len(const struct sk_buff *skb) +{ + return skb->h.raw - skb->nh.raw; +} + +static inline int skb_network_offset(const struct sk_buff *skb) +{ + return skb_network_header(skb) - skb->data; +} + +static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) +{ + return skb->tail; +} + +static inline void skb_reset_tail_pointer(struct sk_buff *skb) +{ + skb->tail = skb->data; +} + +static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) +{ + skb->tail = skb->data + offset; +} + +/* + * CPUs often take a performance hit when accessing unaligned memory + * locations. The actual performance hit varies, it can be small if the + * hardware handles it or large if we have to take an exception and fix it + * in software. + * + * Since an ethernet header is 14 bytes network drivers often end up with + * the IP header at an unaligned offset. The IP header can be aligned by + * shifting the start of the packet by 2 bytes. Drivers should do this + * with: + * + * skb_reserve(NET_IP_ALIGN); + * + * The downside to this alignment of the IP header is that the DMA is now + * unaligned. On some architectures the cost of an unaligned DMA is high + * and this cost outweighs the gains made by aligning the IP header. + * + * Since this trade off varies between architectures, we allow NET_IP_ALIGN + * to be overridden. + */ +#ifndef NET_IP_ALIGN +#define NET_IP_ALIGN 2 +#endif + + + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/slab.h b/datapath/linux-2.4/compat-2.4/include/linux/slab.h new file mode 100644 index 00000000..e9342596 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/slab.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_SLAB_WRAPPER_H +#define __LINUX_SLAB_WRAPPER_H 1 + +/* Kluge to let "struct kmem_cache" work in both 2.4 and 2.6. */ +#define kmem_cache_s kmem_cache + +#include_next + +static inline void *kzalloc(size_t size, gfp_t flags) +{ + void *p = kmalloc(size, flags); + if (p) + memset(p, 0, size); + return p; +} + +/* Mega-kluge to wrap 2.4 kmem_cache_create for compatibility with 2.6. */ +#ifdef kmem_cache_create +#undef kmem_cache_create +#define kmem_cache_create(name, size, align, flags, ctor) \ + compat_kmem_cache_create(name, size, align, flags, ctor) +static inline struct kmem_cache * +compat_kmem_cache_create(const char *name, size_t size, + size_t align, unsigned long flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)) +{ + return (_set_ver(kmem_cache_create))(name, size, align, flags, ctor, + NULL); +} +#else +#define kmem_cache_create(name, size, align, flags, ctor) \ + kmem_cache_create(name, size, align, flags, ctor, NULL) +#endif /* kmem_cache_create */ + +static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) +{ + void *p = kmem_cache_alloc(k, flags); + if (p) + memset(p, 0, kmem_cache_size(k)); + return p; +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/sockios.h b/datapath/linux-2.4/compat-2.4/include/linux/sockios.h new file mode 100644 index 00000000..262fb389 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/sockios.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_SOCKIOS_WRAPPER_H +#define __LINUX_SOCKIOS_WRAPPER_H 1 + +#include_next + +/* bridge calls */ +#define SIOCBRADDBR 0x89a0 /* create new bridge device */ +#define SIOCBRDELBR 0x89a1 /* remove bridge device */ +#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ +#define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/spinlock.h b/datapath/linux-2.4/compat-2.4/include/linux/spinlock.h new file mode 100644 index 00000000..c18eb637 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/spinlock.h @@ -0,0 +1,8 @@ +#ifndef __LINUX_SPINLOCK_WRAPPER_H +#define __LINUX_SPINLOCK_WRAPPER_H 1 + +#include_next + +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED + +#endif /* linux/spinlock.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/string.h b/datapath/linux-2.4/compat-2.4/include/linux/string.h new file mode 100644 index 00000000..d491226a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/string.h @@ -0,0 +1,10 @@ +#ifndef __LINUX_STRING_WRAPPER_H +#define __LINUX_STRING_WRAPPER_H 1 + +#include_next + +#ifndef __HAVE_ARCH_STRCSPN +size_t strcspn(const char *s, const char *reject); +#endif + +#endif /* linux/string.h */ diff --git a/datapath/linux-2.4/compat-2.4/include/linux/tcp.h b/datapath/linux-2.4/compat-2.4/include/linux/tcp.h new file mode 100644 index 00000000..7178e6b4 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/tcp.h @@ -0,0 +1,25 @@ +#ifndef __LINUX_TCP_WRAPPER_H +#define __LINUX_TCP_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) +{ + return (struct tcphdr *)skb_transport_header(skb); +} + +static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) +{ + return tcp_hdr(skb)->doff * 4; +} + +static inline unsigned int tcp_optlen(const struct sk_buff *skb) +{ + return (tcp_hdr(skb)->doff - 5) * 4; +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/timer.h b/datapath/linux-2.4/compat-2.4/include/linux/timer.h new file mode 100644 index 00000000..5a03721f --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/timer.h @@ -0,0 +1,96 @@ +#ifndef __LINUX_TIMER_WRAPPER_H +#define __LINUX_TIMER_WRAPPER_H 1 + +#include_next +#include +#include + +extern unsigned long volatile jiffies; + +static inline void setup_timer(struct timer_list * timer, + void (*function)(unsigned long), + unsigned long data) +{ + timer->function = function; + timer->data = data; + init_timer(timer); +} + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long __round_jiffies(unsigned long j, int cpu) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + */ + if (rem < HZ/4) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + + +/** + * round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long round_jiffies(unsigned long j) +{ + return __round_jiffies(j, 0); // FIXME +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/types.h b/datapath/linux-2.4/compat-2.4/include/linux/types.h new file mode 100644 index 00000000..7c048f44 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/types.h @@ -0,0 +1,49 @@ +#ifndef __LINUX_TYPES_WRAPPER_H +#define __LINUX_TYPES_WRAPPER_H 1 + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#error These replacement header files are for use with Linux 2.4.x only. +#endif + +#include_next + +/* + * Below are truly Linux-specific types that should never collide with + * any application/library that wants linux/types.h. + */ + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; +#endif +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#ifdef __KERNEL__ +typedef unsigned __bitwise__ gfp_t; + +#ifdef CONFIG_RESOURCES_64BIT +typedef u64 resource_size_t; +#else +typedef u32 resource_size_t; +#endif + +#endif /* __KERNEL__ */ + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/linux/udp.h b/datapath/linux-2.4/compat-2.4/include/linux/udp.h new file mode 100644 index 00000000..7fdf5b9d --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/linux/udp.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_UDP_WRAPPER_H +#define __LINUX_UDP_WRAPPER_H 1 + +#include_next + +#ifdef __KERNEL__ +#include + +static inline struct udphdr *udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/net/checksum.h b/datapath/linux-2.4/compat-2.4/include/net/checksum.h new file mode 100644 index 00000000..9868c32c --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/checksum.h @@ -0,0 +1,11 @@ +#ifndef __NET_CHECKSUM_WRAPPER_H +#define __NET_CHECKSUM_WRAPPER_H 1 + +#include_next + +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/net/genetlink.h b/datapath/linux-2.4/compat-2.4/include/net/genetlink.h new file mode 100644 index 00000000..decdda54 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/genetlink.h @@ -0,0 +1,252 @@ +#ifndef __NET_GENERIC_NETLINK_H +#define __NET_GENERIC_NETLINK_H + +#include +#include + +/** + * struct genl_multicast_group - generic netlink multicast group + * @name: name of the multicast group, names are per-family + * @id: multicast group ID, assigned by the core, to use with + * genlmsg_multicast(). + * @list: list entry for linking + * @family: pointer to family, need not be set before registering + */ +struct genl_multicast_group +{ + struct genl_family *family; /* private */ + struct list_head list; /* private */ + char name[GENL_NAMSIZ]; + u32 id; +}; + +/** + * struct genl_family - generic netlink family + * @id: protocol family idenfitier + * @hdrsize: length of user specific header in bytes + * @name: name of family + * @version: protocol version + * @maxattr: maximum number of attributes supported + * @attrbuf: buffer to store parsed attributes + * @ops_list: list of all assigned operations + * @family_list: family list + * @mcast_groups: multicast groups list + */ +struct genl_family +{ + unsigned int id; + unsigned int hdrsize; + char name[GENL_NAMSIZ]; + unsigned int version; + unsigned int maxattr; + struct nlattr ** attrbuf; /* private */ + struct list_head ops_list; /* private */ + struct list_head family_list; /* private */ + struct list_head mcast_groups; /* private */ +}; + +/** + * struct genl_info - receiving information + * @snd_seq: sending sequence number + * @snd_pid: netlink pid of sender + * @nlhdr: netlink message header + * @genlhdr: generic netlink message header + * @userhdr: user specific header + * @attrs: netlink attributes + */ +struct genl_info +{ + u32 snd_seq; + u32 snd_pid; + struct nlmsghdr * nlhdr; + struct genlmsghdr * genlhdr; + void * userhdr; + struct nlattr ** attrs; +}; + +/** + * struct genl_ops - generic netlink operations + * @cmd: command identifier + * @flags: flags + * @policy: attribute validation policy + * @doit: standard command callback + * @dumpit: callback for dumpers + * @done: completion callback for dumps + * @ops_list: operations list + */ +struct genl_ops +{ + u8 cmd; + unsigned int flags; + const struct nla_policy *policy; + int (*doit)(struct sk_buff *skb, + struct genl_info *info); + int (*dumpit)(struct sk_buff *skb, + struct netlink_callback *cb); + int (*done)(struct netlink_callback *cb); + struct list_head ops_list; +}; + +extern int genl_register_family(struct genl_family *family); +extern int genl_unregister_family(struct genl_family *family); +extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); +extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); +extern int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); +extern void genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); + +extern struct sock *genl_sock; + +/** + * genlmsg_put - Add generic netlink header to netlink message + * @skb: socket buffer holding the message + * @pid: netlink pid the message is addressed to + * @seq: sequence number (usually the one of the sender) + * @family: generic netlink family + * @flags netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, + struct genl_family *family, int flags, u8 cmd) +{ + struct nlmsghdr *nlh; + struct genlmsghdr *hdr; + + nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + family->hdrsize, flags); + if (nlh == NULL) + return NULL; + + hdr = nlmsg_data(nlh); + hdr->cmd = cmd; + hdr->version = family->version; + hdr->reserved = 0; + + return (char *) hdr + GENL_HDRLEN; +} + +/** + * genlmsg_put_reply - Add generic netlink header to a reply message + * @skb: socket buffer holding the message + * @info: receiver info + * @family: generic netlink family + * @flags: netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put_reply(struct sk_buff *skb, + struct genl_info *info, + struct genl_family *family, + int flags, u8 cmd) +{ + return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + flags, cmd); +} + +/** + * genlmsg_end - Finalize a generic netlink message + * @skb: socket buffer the message is stored in + * @hdr: user specific header + */ +static inline int genlmsg_end(struct sk_buff *skb, void *hdr) +{ + return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_cancel - Cancel construction of a generic netlink message + * @skb: socket buffer the message is stored in + * @hdr: generic netlink message header + */ +static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) +{ + return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_multicast - multicast a netlink message + * @skb: netlink message as socket buffer + * @pid: own netlink pid to avoid sending to yourself + * @group: multicast group id + * @flags: allocation flags + */ +static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, + unsigned int group, gfp_t flags) +{ + return nlmsg_multicast(genl_sock, skb, pid, group, flags); +} + +/** + * genlmsg_unicast - unicast a netlink message + * @skb: netlink message as socket buffer + * @pid: netlink pid of the destination socket + */ +static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid) +{ + return nlmsg_unicast(genl_sock, skb, pid); +} + +/** + * genlmsg_reply - reply to a request + * @skb: netlink message to be sent back + * @info: receiver information + */ +static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) +{ + return genlmsg_unicast(skb, info->snd_pid); +} + +/** + * gennlmsg_data - head of message payload + * @gnlh: genetlink messsage header + */ +static inline void *genlmsg_data(const struct genlmsghdr *gnlh) +{ + return ((unsigned char *) gnlh + GENL_HDRLEN); +} + +/** + * genlmsg_len - length of message payload + * @gnlh: genetlink message header + */ +static inline int genlmsg_len(const struct genlmsghdr *gnlh) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)((unsigned char *)gnlh - + NLMSG_HDRLEN); + return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + +/** + * genlmsg_new - Allocate a new generic netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + */ +static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) +{ + return nlmsg_new(genlmsg_total_size(payload), flags); +} + + +#endif /* __NET_GENERIC_NETLINK_H */ diff --git a/datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h b/datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h new file mode 100644 index 00000000..e254dd71 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/llc_pdu.h @@ -0,0 +1,11 @@ +#ifndef __NET_LLC_PDU_H +#define __NET_LLC_PDU_H 1 + +/* Un-numbered PDU format (3 bytes in length) */ +struct llc_pdu_un { + u8 dsap; + u8 ssap; + u8 ctrl_1; +}; + +#endif diff --git a/datapath/linux-2.4/compat-2.4/include/net/netlink.h b/datapath/linux-2.4/compat-2.4/include/net/netlink.h new file mode 100644 index 00000000..46cdafd9 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/include/net/netlink.h @@ -0,0 +1,1014 @@ +#ifndef __NET_NETLINK_H +#define __NET_NETLINK_H + +#include +#include +#include + +/* ======================================================================== + * Netlink Messages and Attributes Interface (As Seen On TV) + * ------------------------------------------------------------------------ + * Messages Interface + * ------------------------------------------------------------------------ + * + * Message Format: + * <--- nlmsg_total_size(payload) ---> + * <-- nlmsg_msg_size(payload) -> + * +----------+- - -+-------------+- - -+-------- - - + * | nlmsghdr | Pad | Payload | Pad | nlmsghdr + * +----------+- - -+-------------+- - -+-------- - - + * nlmsg_data(nlh)---^ ^ + * nlmsg_next(nlh)-----------------------+ + * + * Payload Format: + * <---------------------- nlmsg_len(nlh) ---------------------> + * <------ hdrlen ------> <- nlmsg_attrlen(nlh, hdrlen) -> + * +----------------------+- - -+--------------------------------+ + * | Family Header | Pad | Attributes | + * +----------------------+- - -+--------------------------------+ + * nlmsg_attrdata(nlh, hdrlen)---^ + * + * Data Structures: + * struct nlmsghdr netlink message header + * + * Message Construction: + * nlmsg_new() create a new netlink message + * nlmsg_put() add a netlink message to an skb + * nlmsg_put_answer() callback based nlmsg_put() + * nlmsg_end() finanlize netlink message + * nlmsg_get_pos() return current position in message + * nlmsg_trim() trim part of message + * nlmsg_cancel() cancel message construction + * nlmsg_free() free a netlink message + * + * Message Sending: + * nlmsg_multicast() multicast message to several groups + * nlmsg_unicast() unicast a message to a single socket + * nlmsg_notify() send notification message + * + * Message Length Calculations: + * nlmsg_msg_size(payload) length of message w/o padding + * nlmsg_total_size(payload) length of message w/ padding + * nlmsg_padlen(payload) length of padding at tail + * + * Message Payload Access: + * nlmsg_data(nlh) head of message payload + * nlmsg_len(nlh) length of message payload + * nlmsg_attrdata(nlh, hdrlen) head of attributes data + * nlmsg_attrlen(nlh, hdrlen) length of attributes data + * + * Message Parsing: + * nlmsg_ok(nlh, remaining) does nlh fit into remaining bytes? + * nlmsg_next(nlh, remaining) get next netlink message + * nlmsg_parse() parse attributes of a message + * nlmsg_find_attr() find an attribute in a message + * nlmsg_for_each_msg() loop over all messages + * nlmsg_validate() validate netlink message incl. attrs + * nlmsg_for_each_attr() loop over all attributes + * + * Misc: + * nlmsg_report() report back to application? + * + * ------------------------------------------------------------------------ + * Attributes Interface + * ------------------------------------------------------------------------ + * + * Attribute Format: + * <------- nla_total_size(payload) -------> + * <---- nla_attr_size(payload) -----> + * +----------+- - -+- - - - - - - - - +- - -+-------- - - + * | Header | Pad | Payload | Pad | Header + * +----------+- - -+- - - - - - - - - +- - -+-------- - - + * <- nla_len(nla) -> ^ + * nla_data(nla)----^ | + * nla_next(nla)-----------------------------' + * + * Data Structures: + * struct nlattr netlink attribtue header + * + * Attribute Construction: + * nla_reserve(skb, type, len) reserve room for an attribute + * nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr + * nla_put(skb, type, len, data) add attribute to skb + * nla_put_nohdr(skb, len, data) add attribute w/o hdr + * + * Attribute Construction for Basic Types: + * nla_put_u8(skb, type, value) add u8 attribute to skb + * nla_put_u16(skb, type, value) add u16 attribute to skb + * nla_put_u32(skb, type, value) add u32 attribute to skb + * nla_put_u64(skb, type, value) add u64 attribute to skb + * nla_put_string(skb, type, str) add string attribute to skb + * nla_put_flag(skb, type) add flag attribute to skb + * nla_put_msecs(skb, type, jiffies) add msecs attribute to skb + * + * Exceptions Based Attribute Construction: + * NLA_PUT(skb, type, len, data) add attribute to skb + * NLA_PUT_U8(skb, type, value) add u8 attribute to skb + * NLA_PUT_U16(skb, type, value) add u16 attribute to skb + * NLA_PUT_U32(skb, type, value) add u32 attribute to skb + * NLA_PUT_U64(skb, type, value) add u64 attribute to skb + * NLA_PUT_STRING(skb, type, str) add string attribute to skb + * NLA_PUT_FLAG(skb, type) add flag attribute to skb + * NLA_PUT_MSECS(skb, type, jiffies) add msecs attribute to skb + * + * The meaning of these functions is equal to their lower case + * variants but they jump to the label nla_put_failure in case + * of a failure. + * + * Nested Attributes Construction: + * nla_nest_start(skb, type) start a nested attribute + * nla_nest_end(skb, nla) finalize a nested attribute + * nla_nest_cancel(skb, nla) cancel nested attribute construction + * + * Attribute Length Calculations: + * nla_attr_size(payload) length of attribute w/o padding + * nla_total_size(payload) length of attribute w/ padding + * nla_padlen(payload) length of padding + * + * Attribute Payload Access: + * nla_data(nla) head of attribute payload + * nla_len(nla) length of attribute payload + * + * Attribute Payload Access for Basic Types: + * nla_get_u8(nla) get payload for a u8 attribute + * nla_get_u16(nla) get payload for a u16 attribute + * nla_get_u32(nla) get payload for a u32 attribute + * nla_get_u64(nla) get payload for a u64 attribute + * nla_get_flag(nla) return 1 if flag is true + * nla_get_msecs(nla) get payload for a msecs attribute + * + * Attribute Misc: + * nla_memcpy(dest, nla, count) copy attribute into memory + * nla_memcmp(nla, data, size) compare attribute with memory area + * nla_strlcpy(dst, nla, size) copy attribute to a sized string + * nla_strcmp(nla, str) compare attribute with string + * + * Attribute Parsing: + * nla_ok(nla, remaining) does nla fit into remaining bytes? + * nla_next(nla, remaining) get next netlink attribute + * nla_validate() validate a stream of attributes + * nla_validate_nested() validate a stream of nested attributes + * nla_find() find attribute in stream of attributes + * nla_find_nested() find attribute in nested attributes + * nla_parse() parse and validate stream of attrs + * nla_parse_nested() parse nested attribuets + * nla_for_each_attr() loop over all attributes + * nla_for_each_nested() loop over the nested attributes + *========================================================================= + */ + + /** + * Standard attribute types to specify validation policy + */ +enum { + NLA_UNSPEC, + NLA_U8, + NLA_U16, + NLA_U32, + NLA_U64, + NLA_STRING, + NLA_FLAG, + NLA_MSECS, + NLA_NESTED, + NLA_NESTED_COMPAT, + NLA_NUL_STRING, + NLA_BINARY, + __NLA_TYPE_MAX, +}; + +#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) + +/** + * struct nla_policy - attribute validation policy + * @type: Type of attribute or NLA_UNSPEC + * @len: Type specific length of payload + * + * Policies are defined as arrays of this struct, the array must be + * accessible by attribute type up to the highest identifier to be expected. + * + * Meaning of `len' field: + * NLA_STRING Maximum length of string + * NLA_NUL_STRING Maximum length of string (excluding NUL) + * NLA_FLAG Unused + * NLA_BINARY Maximum length of attribute payload + * All other Exact length of attribute payload + * + * Example: + * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { + * [ATTR_FOO] = { .type = NLA_U16 }, + * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, + * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * }; + */ +struct nla_policy { + u16 type; + u16 len; +}; + +/** + * struct nl_info - netlink source information + * @nlh: Netlink message header of original request + * @pid: Netlink PID of requesting application + */ +struct nl_info { + struct nlmsghdr *nlh; + u32 pid; +}; + +extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, + int (*cb)(struct sk_buff *, + struct nlmsghdr *)); +extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, int report, + gfp_t flags); + +extern int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy); +extern int nla_parse(struct nlattr *tb[], int maxtype, + struct nlattr *head, int len, + const struct nla_policy *policy); +extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); +extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, + size_t dstsize); +extern int nla_memcpy(void *dest, struct nlattr *src, int count); +extern int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size); +extern int nla_strcmp(const struct nlattr *nla, const char *str); +extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype, + int attrlen); +extern void * __nla_reserve_nohdr(struct sk_buff *skb, int attrlen); +extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype, + int attrlen); +extern void * nla_reserve_nohdr(struct sk_buff *skb, int attrlen); +extern void __nla_put(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +extern void __nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); +extern int nla_put(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +extern int nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); + +/************************************************************************** + * Netlink Messages + **************************************************************************/ + +/** + * nlmsg_msg_size - length of netlink message not including padding + * @payload: length of message payload + */ +static inline int nlmsg_msg_size(int payload) +{ + return NLMSG_HDRLEN + payload; +} + +/** + * nlmsg_total_size - length of netlink message including padding + * @payload: length of message payload + */ +static inline int nlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(nlmsg_msg_size(payload)); +} + +/** + * nlmsg_padlen - length of padding at the message's tail + * @payload: length of message payload + */ +static inline int nlmsg_padlen(int payload) +{ + return nlmsg_total_size(payload) - nlmsg_msg_size(payload); +} + +/** + * nlmsg_data - head of message payload + * @nlh: netlink messsage header + */ +static inline void *nlmsg_data(const struct nlmsghdr *nlh) +{ + return (unsigned char *) nlh + NLMSG_HDRLEN; +} + +/** + * nlmsg_len - length of message payload + * @nlh: netlink message header + */ +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * nlmsg_attrdata - head of attributes data + * @nlh: netlink message header + * @hdrlen: length of family specific header + */ +static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh, + int hdrlen) +{ + unsigned char *data = nlmsg_data(nlh); + return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen)); +} + +/** + * nlmsg_attrlen - length of attributes data + * @nlh: netlink message header + * @hdrlen: length of family specific header + */ +static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen) +{ + return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen); +} + +/** + * nlmsg_ok - check if the netlink message fits into the remaining bytes + * @nlh: netlink message header + * @remaining: number of bytes remaining in message stream + */ +static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) +{ + return (remaining >= sizeof(struct nlmsghdr) && + nlh->nlmsg_len >= sizeof(struct nlmsghdr) && + nlh->nlmsg_len <= remaining); +} + +/** + * nlmsg_next - next netlink message in message stream + * @nlh: netlink message header + * @remaining: number of bytes remaining in message stream + * + * Returns the next netlink message in the message stream and + * decrements remaining by the size of the current message. + */ +static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining) +{ + int totlen = NLMSG_ALIGN(nlh->nlmsg_len); + + *remaining -= totlen; + + return (struct nlmsghdr *) ((unsigned char *) nlh + totlen); +} + +/** + * nlmsg_parse - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * See nla_parse() + */ +static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy); +} + +/** + * nlmsg_find_attr - find a specific attribute in a netlink message + * @nlh: netlink message header + * @hdrlen: length of familiy specific header + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh, + int hdrlen, int attrtype) +{ + return nla_find(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), attrtype); +} + +/** + * nlmsg_validate - validate a netlink message including attributes + * @nlh: netlinket message header + * @hdrlen: length of familiy specific header + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + */ +static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, + const struct nla_policy *policy) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + return nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), maxtype, policy); +} + +/** + * nlmsg_report - need to report back to application? + * @nlh: netlink message header + * + * Returns 1 if a report back to the application is requested. + */ +static inline int nlmsg_report(struct nlmsghdr *nlh) +{ + return !!(nlh->nlmsg_flags & NLM_F_ECHO); +} + +/** + * nlmsg_for_each_attr - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @nlh: netlink message header + * @hdrlen: length of familiy specific header + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ + nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \ + nlmsg_attrlen(nlh, hdrlen), rem) + +#if 0 +/* FIXME: Enable once all users have been converted */ + +/** + * __nlmsg_put - Add a new netlink message to an skb + * @skb: socket buffer to store message in + * @pid: netlink process id + * @seq: sequence number of message + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for both the netlink header and payload. + */ +static inline struct nlmsghdr *__nlmsg_put(struct sk_buff *skb, u32 pid, + u32 seq, int type, int payload, + int flags) +{ + struct nlmsghdr *nlh; + + nlh = (struct nlmsghdr *) skb_put(skb, nlmsg_total_size(payload)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = nlmsg_msg_size(payload); + nlh->nlmsg_flags = flags; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + + memset((unsigned char *) nlmsg_data(nlh) + payload, 0, + nlmsg_padlen(payload)); + + return nlh; +} +#endif + +/** + * nlmsg_put - Add a new netlink message to an skb + * @skb: socket buffer to store message in + * @pid: netlink process id + * @seq: sequence number of message + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the message header and payload. + */ +static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, + int type, int payload, int flags) +{ + if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) + return NULL; + + return __nlmsg_put(skb, pid, seq, type, payload, flags); +} + +/** + * nlmsg_put_answer - Add a new callback based netlink message to an skb + * @skb: socket buffer to store message in + * @cb: netlink callback + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the message header and payload. + */ +static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, + struct netlink_callback *cb, + int type, int payload, + int flags) +{ + return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + type, payload, flags); +} + +/** + * nlmsg_new - Allocate a new netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + * + * Use NLMSG_DEFAULT_SIZE if the size of the payload isn't known + * and a good default is needed. + */ +static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) +{ + return alloc_skb(nlmsg_total_size(payload), flags); +} + +/** + * nlmsg_end - Finalize a netlink message + * @skb: socket buffer the message is stored in + * @nlh: netlink message header + * + * Corrects the netlink message header to include the appeneded + * attributes. Only necessary if attributes have been added to + * the message. + * + * Returns the total data length of the skb. + */ +static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + + return skb->len; +} + +/** + * nlmsg_get_pos - return current position in netlink message + * @skb: socket buffer the message is stored in + * + * Returns a pointer to the current tail of the message. + */ +static inline void *nlmsg_get_pos(struct sk_buff *skb) +{ + return skb_tail_pointer(skb); +} + +/** + * nlmsg_trim - Trim message to a mark + * @skb: socket buffer the message is stored in + * @mark: mark to trim to + * + * Trims the message to the provided mark. Returns -1. + */ +static inline int nlmsg_trim(struct sk_buff *skb, const void *mark) +{ + if (mark) + skb_trim(skb, (unsigned char *) mark - skb->data); + + return -1; +} + +/** + * nlmsg_cancel - Cancel construction of a netlink message + * @skb: socket buffer the message is stored in + * @nlh: netlink message header + * + * Removes the complete netlink message including all + * attributes from the socket buffer again. Returns -1. + */ +static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + return nlmsg_trim(skb, nlh); +} + +/** + * nlmsg_free - free a netlink message + * @skb: socket buffer of netlink message + */ +static inline void nlmsg_free(struct sk_buff *skb) +{ + kfree_skb(skb); +} + +/** + * nlmsg_multicast - multicast a netlink message + * @sk: netlink socket to spread messages to + * @skb: netlink message as socket buffer + * @pid: own netlink pid to avoid sending to yourself + * @group: multicast group id (*not* bit-mask) + * @flags: allocation flags + */ +static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, gfp_t flags) +{ + NETLINK_CB(skb).dst_groups = 1UL << (group - 1); + netlink_broadcast(sk, skb, pid, 1UL << (group - 1), flags); + return 0; +} + +/** + * nlmsg_unicast - unicast a netlink message + * @sk: netlink socket to spread message to + * @skb: netlink message as socket buffer + * @pid: netlink pid of the destination socket + */ +static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid) +{ + int err; + + err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + if (err > 0) + err = 0; + + return err; +} + +/** + * nlmsg_for_each_msg - iterate over a stream of messages + * @pos: loop counter, set to current message + * @head: head of message stream + * @len: length of message stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_msg(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nlmsg_ok(pos, rem); \ + pos = nlmsg_next(pos, &(rem))) + +/************************************************************************** + * Netlink Attributes + **************************************************************************/ + +/** + * nla_attr_size - length of attribute not including padding + * @payload: length of payload + */ +static inline int nla_attr_size(int payload) +{ + return NLA_HDRLEN + payload; +} + +/** + * nla_total_size - total length of attribute including padding + * @payload: length of payload + */ +static inline int nla_total_size(int payload) +{ + return NLA_ALIGN(nla_attr_size(payload)); +} + +/** + * nla_padlen - length of padding at the tail of attribute + * @payload: length of payload + */ +static inline int nla_padlen(int payload) +{ + return nla_total_size(payload) - nla_attr_size(payload); +} + +/** + * nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +/** + * nla_len - length of payload + * @nla: netlink attribute + */ +static inline int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +/** + * nla_ok - check if the netlink attribute fits into the remaining bytes + * @nla: netlink attribute + * @remaining: number of bytes remaining in attribute stream + */ +static inline int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +/** + * nla_next - next netlink attribte in attribute stream + * @nla: netlink attribute + * @remaining: number of bytes remaining in attribute stream + * + * Returns the next netlink attribute in the attribute stream and + * decrements remaining by the size of the current attribute. + */ +static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +/** + * nla_find_nested - find attribute in a set of nested attributes + * @nla: attribute containing the nested attributes + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) +{ + return nla_find(nla_data(nla), nla_len(nla), attrtype); +} + +/** + * nla_parse_nested - parse nested attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @policy: validation policy + * + * See nla_parse() + */ +static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy) +{ + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); +} +/** + * nla_put_u8 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) +{ + return nla_put(skb, attrtype, sizeof(u8), &value); +} + +/** + * nla_put_u16 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) +{ + return nla_put(skb, attrtype, sizeof(u16), &value); +} + +/** + * nla_put_u32 - Add a u32 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) +{ + return nla_put(skb, attrtype, sizeof(u32), &value); +} + +/** + * nla_put_64 - Add a u64 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value) +{ + return nla_put(skb, attrtype, sizeof(u64), &value); +} + +/** + * nla_put_string - Add a string netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @str: NUL terminated string + */ +static inline int nla_put_string(struct sk_buff *skb, int attrtype, + const char *str) +{ + return nla_put(skb, attrtype, strlen(str) + 1, str); +} + +/** + * nla_put_flag - Add a flag netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + */ +static inline int nla_put_flag(struct sk_buff *skb, int attrtype) +{ + return nla_put(skb, attrtype, 0, NULL); +} + +/** + * nla_put_msecs - Add a msecs netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @jiffies: number of msecs in jiffies + */ +static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, + unsigned long jiffies) +{ + u64 tmp = jiffies_to_msecs(jiffies); + return nla_put(skb, attrtype, sizeof(u64), &tmp); +} + +#define NLA_PUT(skb, attrtype, attrlen, data) \ + do { \ + if (nla_put(skb, attrtype, attrlen, data) < 0) \ + goto nla_put_failure; \ + } while(0) + +#define NLA_PUT_TYPE(skb, type, attrtype, value) \ + do { \ + type __tmp = value; \ + NLA_PUT(skb, attrtype, sizeof(type), &__tmp); \ + } while(0) + +#define NLA_PUT_U8(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u8, attrtype, value) + +#define NLA_PUT_U16(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u16, attrtype, value) + +#define NLA_PUT_LE16(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, __le16, attrtype, value) + +#define NLA_PUT_U32(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u32, attrtype, value) + +#define NLA_PUT_BE32(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, __be32, attrtype, value) + +#define NLA_PUT_U64(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u64, attrtype, value) + +#define NLA_PUT_STRING(skb, attrtype, value) \ + NLA_PUT(skb, attrtype, strlen(value) + 1, value) + +#define NLA_PUT_FLAG(skb, attrtype) \ + NLA_PUT(skb, attrtype, 0, NULL) + +#define NLA_PUT_MSECS(skb, attrtype, jiffies) \ + NLA_PUT_U64(skb, attrtype, jiffies_to_msecs(jiffies)) + +/** + * nla_get_u32 - return payload of u32 attribute + * @nla: u32 netlink attribute + */ +static inline u32 nla_get_u32(struct nlattr *nla) +{ + return *(u32 *) nla_data(nla); +} + +/** + * nla_get_be32 - return payload of __be32 attribute + * @nla: __be32 netlink attribute + */ +static inline __be32 nla_get_be32(struct nlattr *nla) +{ + return *(__be32 *) nla_data(nla); +} + +/** + * nla_get_u16 - return payload of u16 attribute + * @nla: u16 netlink attribute + */ +static inline u16 nla_get_u16(struct nlattr *nla) +{ + return *(u16 *) nla_data(nla); +} + +/** + * nla_get_le16 - return payload of __le16 attribute + * @nla: __le16 netlink attribute + */ +static inline __le16 nla_get_le16(struct nlattr *nla) +{ + return *(__le16 *) nla_data(nla); +} + +/** + * nla_get_u8 - return payload of u8 attribute + * @nla: u8 netlink attribute + */ +static inline u8 nla_get_u8(struct nlattr *nla) +{ + return *(u8 *) nla_data(nla); +} + +/** + * nla_get_u64 - return payload of u64 attribute + * @nla: u64 netlink attribute + */ +static inline u64 nla_get_u64(struct nlattr *nla) +{ + u64 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + + return tmp; +} + +/** + * nla_get_flag - return payload of flag attribute + * @nla: flag netlink attribute + */ +static inline int nla_get_flag(struct nlattr *nla) +{ + return !!nla; +} + +/** + * nla_get_msecs - return payload of msecs attribute + * @nla: msecs netlink attribute + * + * Returns the number of milliseconds in jiffies. + */ +static inline unsigned long nla_get_msecs(struct nlattr *nla) +{ + u64 msecs = nla_get_u64(nla); + + return msecs_to_jiffies((unsigned long) msecs); +} + +/** + * nla_nest_start - Start a new level of nested attributes + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * + * Returns the container attribute + */ +static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +{ + struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); + + if (nla_put(skb, attrtype, 0, NULL) < 0) + return NULL; + + return start; +} + +/** + * nla_nest_end - Finalize nesting of attributes + * @skb: socket buffer the attribtues are stored in + * @start: container attribute + * + * Corrects the container attribute header to include the all + * appeneded attributes. + * + * Returns the total data length of the skb. + */ +static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) +{ + start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; + return skb->len; +} + +/** + * nla_nest_cancel - Cancel nesting of attributes + * @skb: socket buffer the message is stored in + * @start: container attribute + * + * Removes the container attribute and including all nested + * attributes. Returns -1. + */ +static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) +{ + return nlmsg_trim(skb, start); +} + +/** + * nla_validate_nested - Validate a stream of nested attributes + * @start: container attribute + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the nested attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_nested(struct nlattr *start, int maxtype, + const struct nla_policy *policy) +{ + return nla_validate(nla_data(start), nla_len(start), maxtype, policy); +} + +/** + * nla_for_each_attr - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @head: head of attribute stream + * @len: length of attribute stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +/** + * nla_for_each_nested - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested(pos, nla, rem) \ + nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) + +#endif diff --git a/datapath/linux-2.4/compat-2.4/kernel.c b/datapath/linux-2.4/compat-2.4/kernel.c new file mode 100644 index 00000000..a08bb2d8 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/kernel.c @@ -0,0 +1,27 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include +#include + +int vprintk(const char *msg, ...) +{ +#define BUFFER_SIZE 1024 + char *buffer = kmalloc(BUFFER_SIZE, GFP_ATOMIC); + int retval; + if (buffer) { + va_list args; + va_start(args, msg); + vsnprintf(buffer, BUFFER_SIZE, msg, args); + va_end(args); + retval = printk("%s", buffer); + kfree(buffer); + } else { + retval = printk("<> %s", msg); + } + return retval; +} + +EXPORT_SYMBOL(vprintk); diff --git a/datapath/linux-2.4/compat-2.4/netlink.c b/datapath/linux-2.4/compat-2.4/netlink.c new file mode 100644 index 00000000..79aedee2 --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/netlink.c @@ -0,0 +1,116 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf + * Alexey Kuznetsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * netlink_queue_skip - Skip netlink message while processing queue. + * @nlh: Netlink message to be skipped + * @skb: Socket buffer containing the netlink messages. + * + * Pulls the given netlink message off the socket buffer so the next + * call to netlink_queue_run() will not reconsider the message. + */ +static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) +{ + int msglen = NLMSG_ALIGN(nlh->nlmsg_len); + + if (msglen > skb->len) + msglen = skb->len; + + skb_pull(skb, msglen); +} + +static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, + struct nlmsghdr *)) +{ + struct nlmsghdr *nlh; + int err; + + while (skb->len >= nlmsg_total_size(0)) { + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) + return 0; + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) + goto skip; + + /* Skip control messages */ + if (nlh->nlmsg_type < NLMSG_MIN_TYPE) + goto skip; + + err = cb(skb, nlh); + if (err == -EINTR) { + /* Not an error, but we interrupt processing */ + netlink_queue_skip(nlh, skb); + return err; + } +skip: + if (nlh->nlmsg_flags & NLM_F_ACK || err) + netlink_ack(skb, nlh, err); + + netlink_queue_skip(nlh, skb); + } + + return 0; +} + +/** + * netlink_run_queue - Process netlink receive queue. + * @sk: Netlink socket containing the queue + * @qlen: Place to store queue length upon entry + * @cb: Callback function invoked for each netlink message found + * + * Processes as much as there was in the queue upon entry and invokes + * a callback function for each netlink message found. The callback + * function may refuse a message by returning a negative error code + * but setting the error pointer to 0 in which case this function + * returns with a qlen != 0. + * + * qlen must be initialized to 0 before the initial entry, afterwards + * the function may be called repeatedly until qlen reaches 0. + * + * The callback function may return -EINTR to signal that processing + * of netlink messages shall be interrupted. In this case the message + * currently being processed will NOT be requeued onto the receive + * queue. + */ +void netlink_run_queue(struct sock *sk, unsigned int *qlen, + int (*cb)(struct sk_buff *, struct nlmsghdr *)) +{ + struct sk_buff *skb; + + if (!*qlen || *qlen > skb_queue_len(&sk->receive_queue)) + *qlen = skb_queue_len(&sk->receive_queue); + + for (; *qlen; (*qlen)--) { + skb = skb_dequeue(&sk->receive_queue); + if (netlink_rcv_skb(skb, cb)) { + if (skb->len) + skb_queue_head(&sk->receive_queue, skb); + else { + kfree_skb(skb); + (*qlen)--; + } + break; + } + + kfree_skb(skb); + } +} diff --git a/datapath/linux-2.4/compat-2.4/random32.c b/datapath/linux-2.4/compat-2.4/random32.c new file mode 100644 index 00000000..3a19e73a --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/random32.c @@ -0,0 +1,142 @@ +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ + +#include +#include +#include +#include +#include +#include + +#include "compat24.h" + +struct rnd_state { + u32 s1, s2, s3; +}; + +static struct rnd_state net_rand_state[NR_CPUS]; + +static u32 __random32(struct rnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} + +static void __set_random32(struct rnd_state *state, unsigned long s) +{ + if (s == 0) + s = 1; /* default seed is 1 */ + +#define LCG(n) (69069 * n) + state->s1 = LCG(s); + state->s2 = LCG(state->s1); + state->s3 = LCG(state->s2); + + /* "warm it up" */ + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); +} + +/** + * random32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 random32(void) +{ + return __random32(&net_rand_state[smp_processor_id()]); +} +EXPORT_SYMBOL(random32); + +/** + * srandom32 - add entropy to pseudo random number generator + * @seed: seed value + * + * Add some additional seeding to the random32() pool. + * Note: this pool is per cpu so it only affects current CPU. + */ +void srandom32(u32 entropy) +{ + struct rnd_state *state = &net_rand_state[smp_processor_id()]; + __set_random32(state, state->s1 ^ entropy); +} +EXPORT_SYMBOL(srandom32); + +static int __init random32_reseed(void); + +/* + * Generate some initially weak seeding values to allow + * to start the random32() engine. + */ +int __init random32_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + __set_random32(state, i + jiffies); + } + random32_reseed(); + return 0; +} + +/* + * Generate better values after random number generator + * is fully initalized. + */ +static int __init random32_reseed(void) +{ + int i; + unsigned long seed; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + + get_random_bytes(&seed, sizeof(seed)); + __set_random32(state, seed); + } + return 0; +} diff --git a/datapath/linux-2.4/compat-2.4/rcupdate.c b/datapath/linux-2.4/compat-2.4/rcupdate.c new file mode 100644 index 00000000..62066d2f --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/rcupdate.c @@ -0,0 +1,145 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include +#include +#include +#include +#include + +#include "compat24.h" + +#ifdef CONFIG_SMP +#error "SMP configurations not supported for RCU backport." +#endif + +static int default_blimit = 10; +static int blimit; +static int qhimark = 10000; +static int qlowmark = 100; + +static struct rcu_head *head, **tail; +static int qlen = 0; + +static struct tq_struct rcu_task; + +/* + * Invoke the completed RCU callbacks. They are expected to be in + * a per-cpu list. + */ +static void rcu_task_routine(void *unused) +{ + struct rcu_head *list, *next; + int count = 0; + + local_irq_disable(); + list = head; + head = NULL; + tail = &head; + local_irq_enable(); + + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + if (++count >= blimit) + break; + } + + local_irq_disable(); + qlen -= count; + local_irq_enable(); + if (blimit == INT_MAX && qlen <= qlowmark) + blimit = default_blimit; + + if (head) + schedule_task(&rcu_task); +} + + +static inline void force_quiescent_state(void) +{ + current->need_resched = 1; +} + +/** + * call_rcu - Queue an RCU callback for invocation after a grace period. + * @rcu: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void call_rcu(struct rcu_head *rcu, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + + /* FIXME? Following may be mildly expensive, may be worthwhile to + optimize common case. */ + schedule_task(&rcu_task); + + rcu->func = func; + rcu->next = NULL; + local_irq_save(flags); + *tail = rcu; + tail = &rcu->next; + if (unlikely(++qlen > qhimark)) { + blimit = INT_MAX; + force_quiescent_state(); + } + local_irq_restore(flags); +} +EXPORT_SYMBOL(call_rcu); + +void rcu_init(void) +{ + head = NULL; + tail = &head; + blimit = default_blimit; + rcu_task.routine = rcu_task_routine; +} + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(struct rcu_head *head) +{ + struct rcu_synchronize *rcu; + + rcu = container_of(head, struct rcu_synchronize, head); + complete(&rcu->completion); +} + +/** + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + * + * If your read-side code is not protected by rcu_read_lock(), do -not- + * use synchronize_rcu(). + */ +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished */ + call_rcu(&rcu.head, wakeme_after_rcu); + + /* Wait for it */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL(synchronize_rcu); diff --git a/datapath/linux-2.4/compat-2.4/string.c b/datapath/linux-2.4/compat-2.4/string.c new file mode 100644 index 00000000..e15c16bd --- /dev/null +++ b/datapath/linux-2.4/compat-2.4/string.c @@ -0,0 +1,30 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + */ + +#include +#include + +#ifndef __HAVE_ARCH_STRCSPN +/** + * strcspn - Calculate the length of the initial substring of @s which does not contain letters in @reject + * @s: The string to be searched + * @reject: The string to avoid + */ +size_t strcspn(const char *s, const char *reject) +{ + const char *p; + const char *r; + size_t count = 0; + + for (p = s; *p != '\0'; ++p) { + for (r = reject; *r != '\0'; ++r) { + if (*p == *r) + return count; + } + ++count; + } + return count; +} +EXPORT_SYMBOL(strcspn); +#endif diff --git a/datapath/linux-2.4/config/config-linux-2.4.35-kvm b/datapath/linux-2.4/config/config-linux-2.4.35-kvm new file mode 100644 index 00000000..d88f754d --- /dev/null +++ b/datapath/linux-2.4/config/config-linux-2.4.35-kvm @@ -0,0 +1,600 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Code maturity level options +# +# CONFIG_EXPERIMENTAL is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MELAN is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_X86_TSC is not set +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_X86_HAS_TSC=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_F00F_WORKS_OK=y +# CONFIG_X86_MCE is not set +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set +# CONFIG_HIGHMEM is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +# CONFIG_SMP is not set +# CONFIG_X86_UP_APIC is not set +# CONFIG_X86_TSC_DISABLE is not set +CONFIG_X86_TSC=y + +# +# General setup +# +CONFIG_NET=y +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_ISA=y +CONFIG_PCI_NAMES=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_HOTPLUG is not set +# CONFIG_PCMCIA is not set +# CONFIG_HOTPLUG_PCI is not set +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y +# CONFIG_OOM_KILLER is not set +CONFIG_PM=y +# CONFIG_APM is not set + +# +# ACPI Support +# +# CONFIG_ACPI is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_BLK_STATS is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +CONFIG_BRIDGE=y + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_IDEDISK_STROKE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_CMD640 is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_BLK_DEV_GENERIC is not set +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_BLK_DEV_ADMA100 is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_ATIIXP is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +CONFIG_BLK_DEV_RZ1000=y +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_CHIPSETS is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set + +# +# SCSI support +# +# CONFIG_SCSI is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION_BOOT is not set +# CONFIG_FUSION_ISENSE is not set +# CONFIG_FUSION_CTL is not set +# CONFIG_FUSION_LAN is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +# CONFIG_TULIP is not set +# CONFIG_DE4X5 is not set +# CONFIG_DGRS is not set +# CONFIG_DM9102 is not set +CONFIG_EEPRO100=y +# CONFIG_EEPRO100_PIO is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_R8169 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_SK98LIN is not set +# CONFIG_TIGON3 is not set +# CONFIG_FDDI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +# CONFIG_INPUT is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +# CONFIG_SERIAL_EXTENDED is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_MK712_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set + +# +# Input core support is needed for gameports +# + +# +# Input core support is needed for joysticks +# +# CONFIG_QIC02_TAPE is not set +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_SCx200 is not set +# CONFIG_AMD_RNG is not set +# CONFIG_INTEL_RNG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set + +# +# Direct Rendering Manager (XFree86 DRI support) +# +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_REISERFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +# CONFIG_FAT_FS is not set +# CONFIG_CRAMFS is not set +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_JFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_EXT2_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UFS_FS is not set +# CONFIG_XFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SUNRPC is not set +# CONFIG_LOCKD is not set +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_ZISOFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VIDEO_SELECT is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB is not set + +# +# Support for USB gadgets +# +# CONFIG_USB_GADGET is not set + +# +# Bluetooth support +# +# CONFIG_BLUEZ is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_DEBUG_SPINLOCK is not set +CONFIG_FRAME_POINTER=y +CONFIG_LOG_BUF_SHIFT=0 + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Library routines +# +CONFIG_CRC32=y +# CONFIG_ZLIB_INFLATE is not set +# CONFIG_ZLIB_DEFLATE is not set diff --git a/datapath/linux-2.4/kbuild.inc b/datapath/linux-2.4/kbuild.inc new file mode 100644 index 00000000..faa25e6f --- /dev/null +++ b/datapath/linux-2.4/kbuild.inc @@ -0,0 +1,246 @@ +# The remainder of this file is from Intel's e1000 distribution, +# with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS +# e1000-devel Mailing List +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ +ifeq (,$(BUILD_KERNEL)) +BUILD_KERNEL=$(shell uname -r) +endif + +########################################################################### +# Environment tests + +# Kernel Search Path +# All the places we look for kernel source +KSP := /lib/modules/$(BUILD_KERNEL)/build \ + /lib/modules/$(BUILD_KERNEL)/source \ + /usr/src/linux-$(BUILD_KERNEL) \ + /usr/src/linux-$($(BUILD_KERNEL) | sed 's/-.*//') \ + /usr/src/kernel-headers-$(BUILD_KERNEL) \ + /usr/src/kernel-source-$(BUILD_KERNEL) \ + /usr/src/linux-$($(BUILD_KERNEL) | sed 's/\([0-9]*\.[0-9]*\)\..*/\1/') \ + /usr/src/linux + +# prune the list down to only values that exist +# and have an include/linux sub-directory +test_dir = $(shell [ -e $(dir)/include/linux ] && echo $(dir)) +KSP := $(foreach dir, $(KSP), $(test_dir)) + +# we will use this first valid entry in the search path +ifeq (,$(KSRC)) + KSRC := $(firstword $(KSP)) +endif + +CFLAGS += $(CFLAGS_EXTRA) + +ifeq (,$(KSRC)) + $(error Linux kernel source not found) +else +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif +endif + +# version 2.4 or 2.6? +K_VERSION:=$(shell if grep -q 'PATCHLEVEL = 4' $(KSRC)/Makefile; then echo 2.4; else echo 2.6; fi) + +ifneq ($(K_VERSION),2.4) + $(error Linux kernel source in $(KSRC) not 2.4) +endif + +# check for version.h and autoconf.h for running kernel in /boot (SUSE) +ifneq (,$(wildcard /boot/vmlinuz.version.h)) + VERSION_FILE := /boot/vmlinuz.version.h + CONFIG_FILE := /boot/vmlinuz.autoconf.h + KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | \ + grep UTS_RELEASE | awk '{ print $$3 }' | sed 's/\"//g') + ifeq ($(KVER),$(shell uname -r)) + # set up include path to override headers from kernel source + x:=$(shell rm -rf include) + x:=$(shell mkdir -p include/linux) + x:=$(shell cp /boot/vmlinuz.version.h include/linux/version.h) + x:=$(shell cp /boot/vmlinuz.autoconf.h include/linux/autoconf.h) + CFLAGS += -I./include + else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h + endif +else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +endif + +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + +# pick a compiler +ifeq ($(K_VERSION),2.6) + CC := gcc cc +else + # Older kernels require GCC 2.95 + K_SUBLEVEL:=$(shell sed -n 's/SUBLEVEL = // p' $(KSRC)/Makefile) + ifeq ($(K_SUBLEVEL),20) + CC := gcc-2.95 + else + CC := gcc-3.4 gcc-3.3 gcc-2.95 + endif +endif +test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc)) +CC := $(foreach cc, $(CC), $(test_cc)) +CC := $(firstword $(CC)) +ifeq (,$(CC)) + $(error Compiler not found) +endif + +# we need to know what platform the driver is being built on +# some additional features are only built on Intel platforms +ifeq ($(ARCH),) + # Set the architecture if it hasn't been already set for cross-compilation + ARCH := $(shell uname -m | sed 's/i.86/i386/') +endif +ifeq ($(ARCH),alpha) + CFLAGS += -ffixed-8 -mno-fp-regs +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -mcmodel=kernel -mno-red-zone +endif +ifeq ($(ARCH),ppc) + CFLAGS += -msoft-float +endif +ifeq ($(ARCH),ppc64) + CFLAGS += -m64 -msoft-float + LDFLAGS += -melf64ppc +endif + +# standard flags for module builds +CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall +CFLAGS += -I$(KSRC)/include -I. +CFLAGS += -I$(srcdir)/compat-2.4 -I$(srcdir)/compat-2.4/include +CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \ + echo "-DMODVERSIONS -DEXPORT_SYMTAB \ + -include $(KSRC)/include/linux/modversions.h") + +RHC := $(KSRC)/include/linux/rhconfig.h +ifneq (,$(wildcard $(RHC))) + # 7.3 typo in rhconfig.h + ifneq (,$(shell $(CC) $(CFLAGS) -E -dM $(RHC) | grep __module__bigmem)) + CFLAGS += -D__module_bigmem + endif +endif + +# get the kernel version - we use this to find the correct install path +KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \ + awk '{ print $$3 }' | sed 's/\"//g') + +# assume source symlink is the same as build, otherwise adjust KOBJ +ifneq (,$(wildcard /lib/modules/$(KVER)/build)) +ifneq ($(KSRC),$(shell cd /lib/modules/$(KVER)/build ; pwd -P)) + KOBJ=/lib/modules/$(KVER)/build +endif +endif + +KKVER := $(shell echo $(KVER) | \ + awk '{ if ($$0 ~ /2\.[4-9]\./) print "1"; else print "0"}') +ifeq ($(KKVER), 0) + $(error *** Aborting the build. \ + *** This driver is not supported on kernel versions older than 2.4.0) +endif + +# look for SMP in config.h +SMP := $(shell $(CC) $(CFLAGS) -E -dM $(CONFIG_FILE) | \ + grep -w CONFIG_SMP | awk '{ print $$3 }') +ifneq ($(SMP),1) + SMP := 0 +endif + +#ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0)) +# $(warning ***) +# ifeq ($(SMP),1) +# $(warning *** Warning: kernel source configuration (SMP)) +# $(warning *** does not match running kernel (UP)) +# else +# $(warning *** Warning: kernel source configuration (UP)) +# $(warning *** does not match running kernel (SMP)) +# endif +# $(warning *** Continuing with build,) +# $(warning *** resulting driver may not be what you want) +# $(warning ***) +#endif + +ifeq ($(SMP),1) + CFLAGS += -D__SMP__ +endif + +########################################################################### +# 2.4.x + +# Makefile for 2.4.x kernel +TARGET = openflow_mod.o unit_mod.o compat24_mod.o + +CFLAGS += -Wno-sign-compare -fno-strict-aliasing +CFLAGS := -I $(srcdir)/compat-2.4/include $(CFLAGS) +CFLAGS := -I $(srcdir)/compat-2.4/include-$(ARCH) $(CFLAGS) + +default: $(TARGET) + +openflow_mod.o: $(filter-out $(TARGET), $(CFILES:.c=.o)) + $(LD) $(LDFLAGS) -r $^ -o $@ +unit_mod.o: $(UNIT_CFILES:.c=.o) + $(LD) $(LDFLAGS) -r $^ -o $@ +compat24_mod.o: $(COMPAT24_CFILES:.c=.o) + $(LD) $(LDFLAGS) -r $^ -o $@ + +ALL_CFILES = $(FILES) $(UNIT_CFILES) $(COMPAT24_CFILES) +$(ALL_CFILES:.c=.o): $(HFILES) Makefile + +# depmod version for rpm builds +DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \ + awk 'BEGIN {FS="."} NR==1 {print $$2}') + +.PHONY: clean + +clean: + rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c)\ + $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(COMPAT24_CFILES:.c=.o)\ + $(UNIT_CFILES:.c=.o) $(MANFILE).gz .*cmd .tmp_versions\ + compat24_mod.o unit_mod.o tmp/ \ diff --git a/datapath/linux-2.4/kernel-src.inc.in b/datapath/linux-2.4/kernel-src.inc.in new file mode 100644 index 00000000..399cfefe --- /dev/null +++ b/datapath/linux-2.4/kernel-src.inc.in @@ -0,0 +1 @@ +KSRC=@KSRC24@ diff --git a/datapath/linux-2.6-uml/.gitignore b/datapath/linux-2.6-uml/.gitignore new file mode 100644 index 00000000..fd7d3c88 --- /dev/null +++ b/datapath/linux-2.6-uml/.gitignore @@ -0,0 +1,15 @@ +/Makefile +/datapath.c +/chain.c +/crc32.c +/crc_t.c +/flow.c +/forward.c +/kernel-src.inc +/table-hash.c +/table-linear.c +/table-mac.c +/table_t.c +/unit-exports.c +/unit.c +/tmp diff --git a/datapath/linux-2.6-uml/Makefile.in b/datapath/linux-2.6-uml/Makefile.in new file mode 100644 index 00000000..f6130517 --- /dev/null +++ b/datapath/linux-2.6-uml/Makefile.in @@ -0,0 +1,51 @@ +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRCUML@ +export KVERSION = 2.6 +export VMDIR = @VMDIR@ +export VERSION = @VERSION@ + +ARCH = um +export ARCH + +CFLAGS_EXTRA += -DVERSION=\"$(VERSION)\" +CFLAGS_EXTRA += -I $(srcdir)/.. -I $(top_srcdir)/include + +# Files shared between 2.4 and 2.6 builds + +SHAREDFILES = ../chain.c ../crc32.c ../table-hash.c ../table-linear.c \ + ../table-mac.c ../forward.c ../flow.c ../unit-exports.c \ + ../datapath_t.c + +HFILES = ../openflow.h ../chain.h ../crc32.h ../flow.h ../forward.h \ + ../table.h ../datapath_t.h + +SIMLINKFILES = $(patsubst ../%,%, $(SHAREDFILES)) + +CFILES = openflow.c $(SIMLINKFILES) + +# Testing files used for both 2.6 and 2.4 kernels. Are symlinked +# locally +SHARED_T_FILES = ../table_t.c ../crc_t.c ../unit.c +UNIT_CFILES = $(patsubst ../%,%, $(SHARED_T_FILES)) + +# General rule to create symlinks of shared files +%.c : ../%.c + ln -s $< $@ + +# Conditional include so that make dist doesn't puke when +# this build isn't turned on by automake +ifneq (,$(KSRC)) +include $(srcdir)/kbuild.inc +else +clean: +endif + +distclean : clean +distdir : clean +install : + +all: default + +check: all diff --git a/datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm b/datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm new file mode 100644 index 00000000..687e8841 --- /dev/null +++ b/datapath/linux-2.6-uml/config/config-linux-2.6.23-rc5-kvm @@ -0,0 +1,896 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.23-rc3 +# Wed Sep 26 08:31:01 2007 +# +CONFIG_DEFCONFIG_LIST="arch/$ARCH/defconfig" +CONFIG_GENERIC_HARDIRQS=y +CONFIG_UML=y +CONFIG_MMU=y +CONFIG_NO_IOMEM=y +# CONFIG_TRACE_IRQFLAGS_SUPPORT is not set +CONFIG_LOCKDEP_SUPPORT=y +# CONFIG_STACKTRACE_SUPPORT is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_BUG=y +CONFIG_IRQ_RELEASE_METHOD=y + +# +# UML-specific options +# +# CONFIG_STATIC_LINK is not set +CONFIG_MODE_SKAS=y + +# +# Host processor type and features +# +# CONFIG_M386 is not set +CONFIG_M486=y +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_F00F_BUG=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_ALIGNMENT_16=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_UML_X86=y +# CONFIG_64BIT is not set +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_HOST_VMSPLIT_3G=y +# CONFIG_HOST_VMSPLIT_3G_OPT is not set +# CONFIG_HOST_VMSPLIT_2G is not set +# CONFIG_HOST_VMSPLIT_1G is not set +CONFIG_TOP_ADDR=0xC0000000 +# CONFIG_3_LEVEL_PGTABLES is not set +CONFIG_STUB_CODE=0xbfffe000 +CONFIG_STUB_DATA=0xbffff000 +CONFIG_STUB_START=0xbfffe000 +CONFIG_ARCH_HAS_SC_SIGNALS=y +CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_LD_SCRIPT_DYN=y +CONFIG_NET=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_HOSTFS=m +# CONFIG_HPPFS is not set +CONFIG_MCONSOLE=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_NEST_LEVEL=0 +CONFIG_HIGHMEM=y +CONFIG_KERNEL_STACK_ORDER=0 +CONFIG_UML_REAL_TIME_CLOCK=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=128 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_BLOCK=y +CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_LSF=y +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_UBD=y +# CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_COW_COMMON=y +# CONFIG_MMAPPER is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_ATA_OVER_ETH is not set + +# +# Character Devices +# +CONFIG_STDERR_CONSOLE=y +CONFIG_STDIO_CONSOLE=y +CONFIG_SSL=y +CONFIG_NULL_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y +# CONFIG_NOCONFIG_CHAN is not set +CONFIG_CON_ZERO_CHAN="fd:0,fd:1" +CONFIG_CON_CHAN="xterm" +CONFIG_SSL_CHAN="pty" +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=256 +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=y +CONFIG_UML_WATCHDOG=m +# CONFIG_UML_SOUND is not set +# CONFIG_SOUND is not set +# CONFIG_HOSTAUDIO is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_UML_RANDOM is not set + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Networking +# + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=m +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +# CONFIG_IP_ROUTE_VERBOSE is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_GRE=m +CONFIG_NF_CT_PROTO_SCTP=m +# CONFIG_NF_CT_PROTO_UDPLITE is not set +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_TRACE is not set +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_U32 is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PROTO_GRE=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +CONFIG_NF_CONNTRACK_IPV6=m +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m + +# +# DECnet: Netfilter Configuration +# +# CONFIG_DECNET_NF_GRABULATOR is not set + +# +# Bridge: Netfilter Configuration +# +# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m +CONFIG_IP_DCCP_ACKVEC=y + +# +# DCCP CCIDs Configuration (EXPERIMENTAL) +# +CONFIG_IP_DCCP_CCID2=m +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_TFRC_LIB=m +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_CCID3_RTO=100 + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_TIPC=m +CONFIG_TIPC_ADVANCED=y +CONFIG_TIPC_ZONES=3 +CONFIG_TIPC_CLUSTERS=1 +CONFIG_TIPC_NODES=255 +CONFIG_TIPC_SLAVE_NODES=0 +CONFIG_TIPC_PORTS=8191 +CONFIG_TIPC_LOG=0 +# CONFIG_TIPC_DEBUG is not set +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +# CONFIG_ATM_CLIP_NO_ICMP is not set +CONFIG_ATM_LANE=m +# CONFIG_ATM_MPOA is not set +CONFIG_ATM_BR2684=m +CONFIG_ATM_BR2684_IPFILTER=y +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_DECNET=m +# CONFIG_DECNET_ROUTER is not set +CONFIG_LLC=m +CONFIG_LLC2=m +CONFIG_IPX=m +CONFIG_IPX_INTERN=y +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +CONFIG_ECONET=m +CONFIG_ECONET_AUNUDP=y +CONFIG_ECONET_NATIVE=y +CONFIG_WAN_ROUTER=m + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +# CONFIG_NET_SCH_RR is not set +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +# CONFIG_NET_CLS_POLICE is not set +CONFIG_NET_CLS_IND=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_AF_RXRPC=m +# CONFIG_AF_RXRPC_DEBUG is not set +CONFIG_RXKAD=m +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# UML Network Devices +# +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_UML_NET_PCAP=y +CONFIG_UML_NET_SLIRP=y +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_WAN is not set +CONFIG_ATM_DRIVERS=y +# CONFIG_ATM_DUMMY is not set +# CONFIG_ATM_TCP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +CONFIG_CONNECTOR=m + +# +# File systems +# +# CONFIG_EXT2_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +CONFIG_ROMFS_FS=m +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=m +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY is not set +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_HMAC=m +# CONFIG_CRYPTO_XCBC is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_GF128MUL=m +# CONFIG_CRYPTO_ECB is not set +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_LRW=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set +# CONFIG_CRYPTO_SERPENT is not set +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_TEA=m +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_HW=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_DMA=y + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_MD is not set +# CONFIG_INPUT is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +CONFIG_DEBUG_LIST=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCED_INLINING=y +CONFIG_RCU_TORTURE_TEST=m +# CONFIG_FAULT_INJECTION is not set +# CONFIG_GPROF is not set +# CONFIG_GCOV is not set +# CONFIG_DEBUG_STACK_USAGE is not set diff --git a/datapath/linux-2.6-uml/kbuild.inc b/datapath/linux-2.6-uml/kbuild.inc new file mode 100644 index 00000000..07276c78 --- /dev/null +++ b/datapath/linux-2.6-uml/kbuild.inc @@ -0,0 +1,210 @@ +# The remainder of this file is from Intel's e1000 distribution, +# with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS +# e1000-devel Mailing List +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ +ifeq (,$(BUILD_KERNEL)) +BUILD_KERNEL=$(shell uname -r) +endif + +########################################################################### +# Environment tests + +CC := gcc cc +CFLAGS += $(CFLAGS_EXTRA) + +ifeq (,$(KSRC)) + $(error Linux kernel source not found) +else +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif +endif + +# version 2.4 or 2.6? (we assume 2.2 isn't in use anymore +K_VERSION:=$(shell if grep -q 'PATCHLEVEL = 4' $(KSRC)/Makefile; then echo 2.4; else echo 2.6; fi) + +ifneq ($(K_VERSION),2.6) + $(error Linux kernel source not not 2.6) +endif + +# check for version.h and autoconf.h for running kernel in /boot (SUSE) +ifneq (,$(wildcard /boot/vmlinuz.version.h)) + VERSION_FILE := /boot/vmlinuz.version.h + CONFIG_FILE := /boot/vmlinuz.autoconf.h + KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | \ + grep UTS_RELEASE | awk '{ print $$3 }' | sed 's/\"//g') + ifeq ($(KVER),$(shell uname -r)) + # set up include path to override headers from kernel source + x:=$(shell rm -rf include) + x:=$(shell mkdir -p include/linux) + x:=$(shell cp /boot/vmlinuz.version.h include/linux/version.h) + x:=$(shell cp /boot/vmlinuz.autoconf.h include/linux/autoconf.h) + CFLAGS += -I./include + else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h + endif +else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +endif + +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + + +test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc)) +CC := $(foreach cc, $(CC), $(test_cc)) +CC := $(firstword $(CC)) +ifeq (,$(CC)) + $(error Compiler not found) +endif + +# we need to know what platform the driver is being built on +# some additional features are only built on Intel platforms +ARCH := $(shell uname -m | sed 's/i.86/i386/') +ifeq ($(ARCH),alpha) + CFLAGS += -ffixed-8 -mno-fp-regs +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -mcmodel=kernel -mno-red-zone +endif +ifeq ($(ARCH),ppc) + CFLAGS += -msoft-float +endif +ifeq ($(ARCH),ppc64) + CFLAGS += -m64 -msoft-float + LDFLAGS += -melf64ppc +endif + +# standard flags for module builds +CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall +CFLAGS += -I$(KSRC)/include -I. +CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \ + echo "-DMODVERSIONS -DEXPORT_SYMTAB \ + -include $(KSRC)/include/linux/modversions.h") + +RHC := $(KSRC)/include/linux/rhconfig.h +ifneq (,$(wildcard $(RHC))) + # 7.3 typo in rhconfig.h + ifneq (,$(shell $(CC) $(CFLAGS) -E -dM $(RHC) | grep __module__bigmem)) + CFLAGS += -D__module_bigmem + endif +endif + +# get the kernel version - we use this to find the correct install path +KVER := $(shell $(CC) $(CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \ + awk '{ print $$3 }' | sed 's/\"//g') + +# assume source symlink is the same as build, otherwise adjust KOBJ +ifneq (,$(wildcard /lib/modules/$(KVER)/build)) +ifneq ($(KSRC),$(shell cd /lib/modules/$(KVER)/build ; pwd -P)) + KOBJ=/lib/modules/$(KVER)/build +endif +endif + +KKVER := $(shell echo $(KVER) | \ + awk '{ if ($$0 ~ /2\.[6]\./) print "1"; else print "0"}') +ifeq ($(KKVER), 0) + $(error *** Aborting the build. \ + *** This driver is not supported on kernel versions older than 2.6.0) +endif + +# look for SMP in config.h +SMP := $(shell $(CC) $(CFLAGS) -E -dM $(CONFIG_FILE) | \ + grep -w CONFIG_SMP | awk '{ print $$3 }') +ifneq ($(SMP),1) + SMP := 0 +endif + +#ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0)) +# $(warning ***) +# ifeq ($(SMP),1) +# $(warning *** Warning: kernel source configuration (SMP)) +# $(warning *** does not match running kernel (UP)) +# else +# $(warning *** Warning: kernel source configuration (UP)) +# $(warning *** does not match running kernel (SMP)) +# endif +# $(warning *** Continuing with build,) +# $(warning *** resulting driver may not be what you want) +# $(warning ***) +#endif + +ifeq ($(SMP),1) + CFLAGS += -D__SMP__ +endif + +########################################################################### +# Makefile for 2.6.x kernel +all: $(TARGET) +TARGET = openflow_mod.ko unit_mod.ko + +$(UNIT_CFILES): + $(foreach UNIT_CFILE, $(UNIT_CFILES), $(shell ln -s $(patsubst %,../t/%,$(UNIT_CFILE)) $(UNIT_CFILE))) + +ifneq ($(PATCHLEVEL),) +EXTRA_CFLAGS += $(CFLAGS_EXTRA) +obj-m += openflow_mod.o unit_mod.o +openflow_mod-objs := $(CFILES:.c=.o) +unit_mod-objs := $(UNIT_CFILES:.c=.o) +else +default: +ifeq ($(KOBJ),$(KSRC)) + $(MAKE) -C $(KSRC) SUBDIRS=$(shell pwd) modules +else + $(MAKE) -C $(KSRC) O=$(KOBJ) SUBDIRS=$(shell pwd) modules +endif +endif + +# depmod version for rpm builds +DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \ + awk 'BEGIN {FS="."} NR==1 {print $$2}') + +.PHONY: clean + +clean: + rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c) \ + $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(MANFILE).gz .*cmd \ + .tmp_versions t/ tmp/ diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore new file mode 100644 index 00000000..098a312e --- /dev/null +++ b/datapath/linux-2.6/.gitignore @@ -0,0 +1,18 @@ +/Makefile +/datapath.c +/chain.c +/crc32.c +/crc_t.c +/dp_dev.c +/flow.c +/forward.c +/forward_t.c +/datapath_t.c +/kernel-src.inc +/table-hash.c +/table-linear.c +/table-mac.c +/table_t.c +/unit-exports.c +/unit.c +/tmp diff --git a/datapath/linux-2.6/Makefile.in b/datapath/linux-2.6/Makefile.in new file mode 100644 index 00000000..5a7cb55f --- /dev/null +++ b/datapath/linux-2.6/Makefile.in @@ -0,0 +1,65 @@ +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRC26@ +export KVERSION = 2.6 +export VMDIR = @VMDIR@ +export VERSION = @VERSION@ + +CFLAGS_EXTRA += -DVERSION=\"$(VERSION)\" +CFLAGS_EXTRA += -I $(srcdir)/.. -I $(srcdir)/datapath/ -I $(top_srcdir)/include + +# Files shared between 2.4 and 2.6 builds + +SHAREDFILES = ../chain.c ../crc32.c ../table-hash.c ../table-linear.c \ + ../table-mac.c ../forward.c ../flow.c ../unit-exports.c \ + ../datapath_t.c ../dp_dev.c \ + compat-2.6/genetlink.c \ + compat-2.6/random32.c + +HFILES = ../datapath.h ../chain.h ../crc32.h ../flow.h ../forward.h \ + ../table.h ../datapath_t.h \ + compat-2.6/include/compat26.h \ + compat-2.6/include/linux/ip.h \ + compat-2.6/include/linux/ipv6.h \ + compat-2.6/include/linux/lockdep.h \ + compat-2.6/include/linux/mutex.h \ + compat-2.6/include/linux/netlink.h \ + compat-2.6/include/linux/random.h \ + compat-2.6/include/linux/skbuff.h \ + compat-2.6/include/linux/tcp.h \ + compat-2.6/include/linux/timer.h \ + compat-2.6/include/linux/types.h \ + compat-2.6/include/linux/udp.h \ + compat-2.6/include/net/checksum.h \ + compat-2.6/include/net/genetlink.h + +SIMLINKFILES = $(patsubst ../%,%, $(SHAREDFILES)) + +CFILES = datapath.c $(SIMLINKFILES) + +# Testing files used for both 2.6 and 2.4 kernels. Are symlinked +# locally +SHARED_T_FILES = ../table_t.c ../crc_t.c ../forward_t.c ../unit.c + +UNIT_CFILES = $(patsubst ../%,%, $(SHARED_T_FILES)) + +# General rule to create symlinks of shared files +%.c : ../%.c + ln -s $< $@ + +# Conditional include so that make dist doesn't puke when +# this build isn't turned on by automake +ifneq (,$(KSRC)) +include $(srcdir)/kbuild.inc +else +clean: +endif + +distclean : clean +distdir : clean +install : + +all: default + +check: all diff --git a/datapath/linux-2.6/compat-2.6/compat26.h b/datapath/linux-2.6/compat-2.6/compat26.h new file mode 100644 index 00000000..80132324 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/compat26.h @@ -0,0 +1,25 @@ +#ifndef __COMPAT26_H +#define __COMPAT26_H 1 + +#include + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) +/*---------------------------------------------------------------------------- + * In 2.6.24, a namespace argument became required for dev_get_by_name. */ +#define net_init NULL + +#define dev_get_by_name(net, name) \ + dev_get_by_name((name)) + +#endif /* linux kernel <= 2.6.23 */ + + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,22) +/*---------------------------------------------------------------------------- + * In 2.6.23, the last argument was dropped from kmem_cache_create. */ +#define kmem_cache_create(n, s, a, f, c) \ + kmem_cache_create((n), (s), (a), (f), (c), NULL) + +#endif /* linux kernel <= 2.6.22 */ + +#endif /* compat26.h */ diff --git a/datapath/linux-2.6/compat-2.6/genetlink.c b/datapath/linux-2.6/compat-2.6/genetlink.c new file mode 100644 index 00000000..c0e6ae9f --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/genetlink.c @@ -0,0 +1,15 @@ +#include "net/genetlink.h" + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + grp->id = 1; + grp->family = family; + + return 0; +} + +#endif /* kernel < 2.6.23 */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ip.h b/datapath/linux-2.6/compat-2.6/include/linux/ip.h new file mode 100644 index 00000000..79158735 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/ip.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_IP_WRAPPER_H +#define __LINUX_IP_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +#include + +static inline struct iphdr *ip_hdr(const struct sk_buff *skb) +{ + return (struct iphdr *)skb_network_header(skb); +} +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h new file mode 100644 index 00000000..e735a780 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_IPV6_WRAPPER_H +#define __LINUX_IPV6_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +#include + +static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) +{ + return (struct ipv6hdr *)skb_network_header(skb); +} +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h new file mode 100644 index 00000000..1c839423 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h @@ -0,0 +1,450 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_WRAPPER_H +#define __LINUX_LOCKDEP_WRAPPER_H + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) + +struct task_struct; +struct lockdep_map; + +#ifdef CONFIG_LOCKDEP + +#include +#include +#include +#include + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[4]; +#endif +}; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + +struct lock_class_stats { + unsigned long contention_point[4]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache; + const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; +#endif +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; + int distance; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key, 0) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key, 0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + lockdep_init_map(&(lock)->dep_map, #key, key, sub) +#define lockdep_set_subclass(lock, sub) \ + lockdep_init_map(&(lock)->dep_map, #lock, \ + (lock)->dep_map.key, sub) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + do { (void)(key); } while (0) +#define lockdep_set_subclass(lock, sub) do { } while (0) + +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; + +#define lockdep_depth(tsk) (0) + +#endif /* !LOCKDEP */ + +#ifdef CONFIG_LOCK_STAT + +extern void lock_contended(struct lockdep_map *lock, unsigned long ip); +extern void lock_acquired(struct lockdep_map *lock); + +#define LOCK_CONTENDED(_lock, try, lock) \ +do { \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + lock(_lock); \ + } \ + lock_acquired(&(_lock)->dep_map); \ +} while (0) + +#else /* CONFIG_LOCK_STAT */ + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map) do {} while (0) + +#define LOCK_CONTENDED(_lock, try, lock) \ + lock(_lock) + +#endif /* CONFIG_LOCK_STAT */ + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +extern void early_init_irq_lock_class(void); +#else +static inline void early_init_irq_lock_class(void) +{ +} +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +extern void print_irqtrace_events(struct task_struct *curr); +#else +static inline void early_boot_irqs_off(void) +{ +} +static inline void early_boot_irqs_on(void) +{ +} +static inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* linux kernel < 2.6.18 */ + +#endif /* __LINUX_LOCKDEP_WRAPPER_H */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/mutex.h b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h new file mode 100644 index 00000000..cb5b2738 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h @@ -0,0 +1,59 @@ +#ifndef __LINUX_MUTEX_WRAPPER_H +#define __LINUX_MUTEX_WRAPPER_H + + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + +#include + +struct mutex { + struct semaphore sema; +}; + +#define mutex_init(mutex) init_MUTEX(&mutex->sema) +#define mutex_destroy(mutex) do { } while (0) + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { __MUTEX_INITIALIZER(mutexname.sema) } + +/* + * See kernel/mutex.c for detailed documentation of these APIs. + * Also see Documentation/mutex-design.txt. + */ +static inline void mutex_lock(struct mutex *lock) +{ + down(&lock->sema); +} + +static inline int mutex_lock_interruptible(struct mutex *lock) +{ + return down_interruptible(&lock->sema); +} + +#define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) + +/* + * NOTE: mutex_trylock() follows the spin_trylock() convention, + * not the down_trylock() convention! + */ +static inline int mutex_trylock(struct mutex *lock) +{ + return !down_trylock(&lock->sema); +} + +static inline void mutex_unlock(struct mutex *lock) +{ + up(&lock->sema); +} +#else + +#include_next + +#endif /* linux version < 2.6.16 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netlink.h b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h new file mode 100644 index 00000000..f1588af0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_NETLINK_WRAPPER_H +#define __LINUX_NETLINK_WRAPPER_H 1 + +#include +#include_next +#include + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) + +#define nlmsg_new(s, f) nlmsg_new_proper((s), (f)) +static inline struct sk_buff *nlmsg_new_proper(int size, gfp_t flags) +{ + return alloc_skb(size, flags); +} + +#endif /* linux kernel < 2.6.19 */ + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h new file mode 100644 index 00000000..4e4932c9 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/random.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_RANDOM_WRAPPER_H +#define __LINUX_RANDOM_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#ifdef __KERNEL__ +u32 random32(void); +void srandom32(u32 seed); +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.19 */ + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h new file mode 100644 index 00000000..67726747 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h @@ -0,0 +1,63 @@ +#ifndef __LINUX_SKBUFF_WRAPPER_H +#define __LINUX_SKBUFF_WRAPPER_H 1 + +#include_next + +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +/* Emulate Linux 2.6.17 and later behavior, in which kfree_skb silently ignores + * null pointer arguments. */ +#define kfree_skb(skb) kfree_skb_maybe_null(skb) +static inline void kfree_skb_maybe_null(struct sk_buff *skb) +{ + if (likely(skb != NULL)) + (kfree_skb)(skb); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#define mac_header mac.raw +#define network_header nh.raw + + +/* Note that CHECKSUM_PARTIAL is not implemented, but this allows us to at + * least test against it: see update_csum() in forward.c. */ +#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_COMPLETE CHECKSUM_HW + +static inline unsigned char *skb_transport_header(const struct sk_buff *skb) +{ + return skb->h.raw; +} + +static inline void skb_set_transport_header(struct sk_buff *skb, + const int offset) +{ + skb->h.raw = skb->data + offset; +} + +static inline unsigned char *skb_network_header(const struct sk_buff *skb) +{ + return skb->nh.raw; +} + +static inline void skb_set_network_header(struct sk_buff *skb, const int offset) +{ + skb->nh.raw = skb->data + offset; +} + +static inline unsigned char *skb_mac_header(const struct sk_buff *skb) +{ + return skb->mac.raw; +} + +static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) +{ + skb->mac.raw = skb->data + offset; +} + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/tcp.h b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h new file mode 100644 index 00000000..528f16af --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_TCP_WRAPPER_H +#define __LINUX_TCP_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) +{ + return (struct tcphdr *)skb_transport_header(skb); +} +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/timer.h b/datapath/linux-2.6/compat-2.6/include/linux/timer.h new file mode 100644 index 00000000..d37fcadd --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/timer.h @@ -0,0 +1,90 @@ +#ifndef __LINUX_TIMER_WRAPPER_H +#define __LINUX_TIMER_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +extern unsigned long volatile jiffies; + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long __round_jiffies(unsigned long j, int cpu) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + */ + if (rem < HZ/4) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + + +/** + * round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long round_jiffies(unsigned long j) +{ + return __round_jiffies(j, 0); // FIXME +} + +#endif /* linux kernel < 2.6.20 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h new file mode 100644 index 00000000..c1f375eb --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h @@ -0,0 +1,14 @@ +#ifndef __LINUX_TYPES_WRAPPER_H +#define __LINUX_TYPES_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#endif /* linux kernel < 2.6.20 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/udp.h b/datapath/linux-2.6/compat-2.6/include/linux/udp.h new file mode 100644 index 00000000..ffab1873 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/udp.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_UDP_WRAPPER_H +#define __LINUX_UDP_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifdef __KERNEL__ +static inline struct udphdr *udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif /* __KERNEL__ */ + + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/net/checksum.h b/datapath/linux-2.6/compat-2.6/include/net/checksum.h new file mode 100644 index 00000000..c64c6bd0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/checksum.h @@ -0,0 +1,16 @@ +#ifndef __NET_CHECKSUM_WRAPPER_H +#define __NET_CHECKSUM_WRAPPER_H 1 + +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} + +#endif /* linux kernel < 2.6.20 */ + +#endif /* checksum.h */ diff --git a/datapath/linux-2.6/compat-2.6/include/net/genetlink.h b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h new file mode 100644 index 00000000..57a47316 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h @@ -0,0 +1,123 @@ +#ifndef __NET_GENERIC_NETLINK_WRAPPER_H +#define __NET_GENERIC_NETLINK_WRAPPER_H 1 + + +#include +#include_next + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +#include + +/*---------------------------------------------------------------------------- + * In 2.6.23, registering of multicast groups was added. Our compatability + * layer just supports registering a single group, since that's all we + * need. + */ + +/** + * struct genl_multicast_group - generic netlink multicast group + * @name: name of the multicast group, names are per-family + * @id: multicast group ID, assigned by the core, to use with + * genlmsg_multicast(). + * @list: list entry for linking + * @family: pointer to family, need not be set before registering + */ +struct genl_multicast_group +{ + struct genl_family *family; /* private */ + struct list_head list; /* private */ + char name[GENL_NAMSIZ]; + u32 id; +}; + +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); +#endif /* linux kernel < 2.6.23 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + +#define genlmsg_multicast(s, p, g, f) \ + genlmsg_multicast_flags((s), (p), (g), (f)) + +static inline int genlmsg_multicast_flags(struct sk_buff *skb, u32 pid, + unsigned int group, gfp_t flags) +{ + int err; + + NETLINK_CB(skb).dst_group = group; + + err = netlink_broadcast(genl_sock, skb, pid, group, flags); + if (err > 0) + err = 0; + + return err; +} +#endif /* linux kernel < 2.6.19 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#define genlmsg_put(skb, p, seq, fam, flg, c) \ + genlmsg_put((skb), (p), (seq), (fam)->id, (fam)->hdrsize, \ + (flg), (c), (fam)->version) + +/** + * genlmsg_put_reply - Add generic netlink header to a reply message + * @skb: socket buffer holding the message + * @info: receiver info + * @family: generic netlink family + * @flags: netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put_reply(struct sk_buff *skb, + struct genl_info *info, struct genl_family *family, + int flags, u8 cmd) +{ + return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + flags, cmd); +} + +/** + * genlmsg_reply - reply to a request + * @skb: netlink message to be sent back + * @info: receiver information + */ +static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) +{ + return genlmsg_unicast(skb, info->snd_pid); +} + +/** + * genlmsg_new - Allocate a new generic netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + */ +static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) +{ + return nlmsg_new(genlmsg_total_size(payload), flags); +} +#endif /* linux kernel < 2.6.20 */ + +#endif /* genetlink.h */ diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c new file mode 100644 index 00000000..981b55c1 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/random32.c @@ -0,0 +1,146 @@ +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ + +#include +#include +#include +#include +#include + +#include "compat26.h" + +struct rnd_state { + u32 s1, s2, s3; +}; + +static struct rnd_state net_rand_state[NR_CPUS]; + +static u32 __random32(struct rnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} + +static void __set_random32(struct rnd_state *state, unsigned long s) +{ + if (s == 0) + s = 1; /* default seed is 1 */ + +#define LCG(n) (69069 * n) + state->s1 = LCG(s); + state->s2 = LCG(state->s1); + state->s3 = LCG(state->s2); + + /* "warm it up" */ + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); +} + +/** + * random32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 random32(void) +{ + return __random32(&net_rand_state[smp_processor_id()]); +} +EXPORT_SYMBOL(random32); + +/** + * srandom32 - add entropy to pseudo random number generator + * @seed: seed value + * + * Add some additional seeding to the random32() pool. + * Note: this pool is per cpu so it only affects current CPU. + */ +void srandom32(u32 entropy) +{ + struct rnd_state *state = &net_rand_state[smp_processor_id()]; + __set_random32(state, state->s1 ^ entropy); +} +EXPORT_SYMBOL(srandom32); + +static int __init random32_reseed(void); + +/* + * Generate some initially weak seeding values to allow + * to start the random32() engine. + */ +int __init random32_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + __set_random32(state, i + jiffies); + } + random32_reseed(); + return 0; +} + +/* + * Generate better values after random number generator + * is fully initalized. + */ +static int __init random32_reseed(void) +{ + int i; + unsigned long seed; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + + get_random_bytes(&seed, sizeof(seed)); + __set_random32(state, seed); + } + return 0; +} + +#endif /* kernel < 2.6.19 */ diff --git a/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm new file mode 100644 index 00000000..f287cf72 --- /dev/null +++ b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm @@ -0,0 +1,1408 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.23-rc9 +# Fri Oct 19 15:08:37 2007 +# +CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_LSF=y +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Processor type and features +# +# CONFIG_TICK_ONESHOT is not set +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_SMP=y +CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set +# CONFIG_M386 is not set +CONFIG_M486=y +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_F00F_BUG=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_ALIGNMENT_16=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +# CONFIG_HPET_TIMER is not set +CONFIG_NR_CPUS=8 +# CONFIG_SCHED_SMT is not set +CONFIG_SCHED_MC=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +# CONFIG_X86_MCE is not set +CONFIG_VM86=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_X86_REBOOTFIXUPS is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_HIGHMEM=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_STATIC=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y +# CONFIG_HIGHPTE is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +CONFIG_IRQBALANCE=y +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 +CONFIG_HOTPLUG_CPU=y +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND_SMP_POSSIBLE=y +CONFIG_SUSPEND=y +CONFIG_HIBERNATION_SMP_POSSIBLE=y +# CONFIG_HIBERNATION is not set +# CONFIG_ACPI is not set +CONFIG_APM=y +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +# CONFIG_PCI_MSI is not set +# CONFIG_PCI_DEBUG is not set +CONFIG_HT_IRQ=y +CONFIG_ISA_DMA_API=y +CONFIG_ISA=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +CONFIG_BINFMT_MISC=m + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=m +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +# CONFIG_IP_ROUTE_VERBOSE is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_GRE=m +CONFIG_NF_CT_PROTO_SCTP=m +# CONFIG_NF_CT_PROTO_UDPLITE is not set +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_TRACE is not set +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_U32 is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PROTO_GRE=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +CONFIG_NF_CONNTRACK_IPV6=m +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m + +# +# DECnet: Netfilter Configuration +# +# CONFIG_DECNET_NF_GRABULATOR is not set + +# +# Bridge: Netfilter Configuration +# +# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m +CONFIG_IP_DCCP_ACKVEC=y + +# +# DCCP CCIDs Configuration (EXPERIMENTAL) +# +CONFIG_IP_DCCP_CCID2=m +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_TFRC_LIB=m +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_CCID3_RTO=100 + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_TIPC=m +CONFIG_TIPC_ADVANCED=y +CONFIG_TIPC_ZONES=3 +CONFIG_TIPC_CLUSTERS=1 +CONFIG_TIPC_NODES=255 +CONFIG_TIPC_SLAVE_NODES=0 +CONFIG_TIPC_PORTS=8191 +CONFIG_TIPC_LOG=0 +# CONFIG_TIPC_DEBUG is not set +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +# CONFIG_ATM_CLIP_NO_ICMP is not set +CONFIG_ATM_LANE=m +# CONFIG_ATM_MPOA is not set +CONFIG_ATM_BR2684=m +CONFIG_ATM_BR2684_IPFILTER=y +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_DECNET=m +# CONFIG_DECNET_ROUTER is not set +CONFIG_LLC=m +CONFIG_LLC2=m +CONFIG_IPX=m +CONFIG_IPX_INTERN=y +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +# CONFIG_LTPC is not set +# CONFIG_COPS is not set +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +CONFIG_ECONET=m +CONFIG_ECONET_AUNUDP=y +CONFIG_ECONET_NATIVE=y +CONFIG_WAN_ROUTER=m + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +# CONFIG_NET_SCH_RR is not set +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +# CONFIG_NET_CLS_POLICE is not set +CONFIG_NET_CLS_IND=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_AF_RXRPC=m +# CONFIG_AF_RXRPC_DEBUG is not set +CONFIG_RXKAD=m +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_CONNECTOR=m +# CONFIG_MTD is not set +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +# CONFIG_PARPORT_SERIAL is not set +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_AX88796 is not set +# CONFIG_PARPORT_1284 is not set +# CONFIG_PNP is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +# CONFIG_BLK_DEV_IDEDMA_PCI is not set +# CONFIG_IDE_ARM is not set +# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_IDEDMA is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_NET_TULIP is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_PCNET32_NAPI is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_CS89x0 is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +CONFIG_8139CP=y +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +# CONFIG_NET_POCKET is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_WAN is not set +CONFIG_ATM_DRIVERS=y +# CONFIG_ATM_DUMMY is not set +# CONFIG_ATM_TCP is not set +# CONFIG_ATM_LANAI is not set +# CONFIG_ATM_ENI is not set +# CONFIG_ATM_FIRESTREAM is not set +# CONFIG_ATM_ZATM is not set +# CONFIG_ATM_NICSTAR is not set +# CONFIG_ATM_IDT77252 is not set +# CONFIG_ATM_AMBASSADOR is not set +# CONFIG_ATM_HORIZON is not set +# CONFIG_ATM_IA is not set +# CONFIG_ATM_FORE200E_MAYBE is not set +# CONFIG_ATM_HE is not set +# CONFIG_FDDI is not set +CONFIG_HIPPI=y +# CONFIG_ROADRUNNER is not set +# CONFIG_PLIP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_INPORT is not set +# CONFIG_MOUSE_LOGIBM is not set +# CONFIG_MOUSE_PC110PAD is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set +# CONFIG_TIPAR is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=y +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# ISA-based Watchdog Cards +# +# CONFIG_PCWATCHDOG is not set +# CONFIG_MIXCOMWD is not set +# CONFIG_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set +# CONFIG_CS5535_GPIO is not set +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=m +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +# CONFIG_VIDEO_SELECT is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +# CONFIG_HID is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_INFINIBAND is not set +# CONFIG_EDAC is not set +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# +# CONFIG_AUXDISPLAY is not set +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set + +# +# Userspace I/O +# +# CONFIG_UIO is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +CONFIG_ROMFS_FS=m +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=m +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set +CONFIG_INSTRUMENTATION=y +# CONFIG_PROFILING is not set +# CONFIG_KPROBES is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_LOCK_STAT=y +# CONFIG_DEBUG_LOCKDEP is not set +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +CONFIG_DEBUG_KOBJECT=y +CONFIG_DEBUG_HIGHMEM=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_LIST=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCED_INLINING=y +CONFIG_RCU_TORTURE_TEST=m +# CONFIG_FAULT_INJECTION is not set +CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_RODATA=y +CONFIG_4KSTACKS=y +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY is not set +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_HMAC=m +# CONFIG_CRYPTO_XCBC is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_GF128MUL=m +# CONFIG_CRYPTO_ECB is not set +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_LRW=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set +# CONFIG_CRYPTO_SERPENT is not set +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_TEA=m +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y diff --git a/datapath/linux-2.6/kbuild.inc b/datapath/linux-2.6/kbuild.inc new file mode 100644 index 00000000..c3102d61 --- /dev/null +++ b/datapath/linux-2.6/kbuild.inc @@ -0,0 +1,214 @@ +# The remainder of this file is from Intel's e1000 distribution, +# with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS +# e1000-devel Mailing List +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ +ifeq (,$(BUILD_KERNEL)) +BUILD_KERNEL=$(shell uname -r) +endif + +########################################################################### +# Environment tests + +ifeq (,$(CC)) + CC := gcc cc +endif + +ifeq (,$(KSRC)) + $(error Linux kernel source not found) +else +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif +endif + +# version 2.4 or 2.6? (we assume 2.2 isn't in use anymore +K_VERSION:=$(shell if grep -q 'PATCHLEVEL = 4' $(KSRC)/Makefile; then echo 2.4; else echo 2.6; fi) + +ifneq ($(K_VERSION),2.6) + $(error Linux kernel source not not 2.6) +endif + +# check for version.h and autoconf.h for running kernel in /boot (SUSE) +ifneq (,$(wildcard /boot/vmlinuz.version.h)) + VERSION_FILE := /boot/vmlinuz.version.h + CONFIG_FILE := /boot/vmlinuz.autoconf.h + KVER := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(VERSION_FILE) | \ + grep UTS_RELEASE | awk '{ print $$3 }' | sed 's/\"//g') + ifeq ($(KVER),$(shell uname -r)) + # set up include path to override headers from kernel source + x:=$(shell rm -rf include) + x:=$(shell mkdir -p include/linux) + x:=$(shell cp /boot/vmlinuz.version.h include/linux/version.h) + x:=$(shell cp /boot/vmlinuz.autoconf.h include/linux/autoconf.h) + EXTRA_CFLAGS += -I./include + else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h + endif +else + ifneq (,$(wildcard $(KOBJ)/include/linux/utsrelease.h)) + VERSION_FILE := $(KOBJ)/include/linux/utsrelease.h + else + VERSION_FILE := $(KOBJ)/include/linux/version.h + endif + CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +endif + +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + + +test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc)) +CC := $(foreach cc, $(CC), $(test_cc)) +CC := $(firstword $(CC)) +ifeq (,$(CC)) + $(error Compiler not found) +endif + +# we need to know what platform the driver is being built on +# some additional features are only built on Intel platforms +ifeq ($(ARCH),) + # Set the architecture if it hasn't been already set for cross-compilation + ARCH := $(shell uname -m | sed 's/i.86/i386/') +endif +ifeq ($(ARCH),alpha) + EXTRA_CFLAGS += -ffixed-8 -mno-fp-regs +endif +ifeq ($(ARCH),x86_64) + EXTRA_CFLAGS += -mcmodel=kernel -mno-red-zone +endif +ifeq ($(ARCH),ppc) + EXTRA_CFLAGS += -msoft-float +endif +ifeq ($(ARCH),ppc64) + EXTRA_CFLAGS += -m64 -msoft-float + LDFLAGS += -melf64ppc +endif + +# standard flags for module builds +EXTRA_CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O2 -pipe -Wall +NOSTDINC_FLAGS += -I$(srcdir)/compat-2.6 -I$(srcdir)/compat-2.6/include +EXTRA_CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \ + echo "-DMODVERSIONS -DEXPORT_SYMTAB \ + -include $(KSRC)/include/linux/modversions.h") + +RHC := $(KSRC)/include/linux/rhconfig.h +ifneq (,$(wildcard $(RHC))) + # 7.3 typo in rhconfig.h + ifneq (,$(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(RHC) | grep __module__bigmem)) + EXTRA_CFLAGS += -D__module_bigmem + endif +endif + +# get the kernel version - we use this to find the correct install path +KVER := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \ + awk '{ print $$3 }' | sed 's/\"//g') + +# assume source symlink is the same as build, otherwise adjust KOBJ +ifneq (,$(wildcard /lib/modules/$(KVER)/build)) +ifneq ($(KSRC),$(shell cd /lib/modules/$(KVER)/build ; pwd -P)) + KOBJ=/lib/modules/$(KVER)/build +endif +endif + +KKVER := $(shell echo $(KVER) | \ + awk '{ if ($$0 ~ /2\.[6]\./) print "1"; else print "0"}') +ifeq ($(KKVER), 0) + $(error *** Aborting the build. \ + *** This driver is not supported on kernel versions older than 2.6.0) +endif + +# look for SMP in config.h +SMP := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(CONFIG_FILE) | \ + grep -w CONFIG_SMP | awk '{ print $$3 }') +ifneq ($(SMP),1) + SMP := 0 +endif + +#ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0)) +# $(warning ***) +# ifeq ($(SMP),1) +# $(warning *** Warning: kernel source configuration (SMP)) +# $(warning *** does not match running kernel (UP)) +# else +# $(warning *** Warning: kernel source configuration (UP)) +# $(warning *** does not match running kernel (SMP)) +# endif +# $(warning *** Continuing with build,) +# $(warning *** resulting driver may not be what you want) +# $(warning ***) +#endif + +ifeq ($(SMP),1) + EXTRA_CFLAGS += -D__SMP__ +endif + +########################################################################### +# Makefile for 2.6.x kernel +all: $(TARGET) +TARGET = openflow_mod.ko unit_mod.ko + +$(UNIT_CFILES): + $(foreach UNIT_CFILE, $(UNIT_CFILES), $(shell ln -s $(patsubst %,../t/%,$(UNIT_CFILE)) $(UNIT_CFILE))) + +ifneq ($(PATCHLEVEL),) +EXTRA_CFLAGS += $(CFLAGS_EXTRA) +obj-m += openflow_mod.o unit_mod.o +openflow_mod-objs := $(CFILES:.c=.o) +unit_mod-objs := $(UNIT_CFILES:.c=.o) +else +default: +ifeq ($(KOBJ),$(KSRC)) + $(MAKE) -C $(KSRC) SUBDIRS=$(shell pwd) modules +else + $(MAKE) -C $(KSRC) O=$(KOBJ) SUBDIRS=$(shell pwd) modules +endif +endif + +# depmod version for rpm builds +DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \ + awk 'BEGIN {FS="."} NR==1 {print $$2}') + +.PHONY: clean + +clean: + rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c) \ + $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(MANFILE).gz .*cmd \ + .tmp_versions t/ *.o tmp/ diff --git a/datapath/linux-2.6/kernel-src.inc.in b/datapath/linux-2.6/kernel-src.inc.in new file mode 100644 index 00000000..531f7bc4 --- /dev/null +++ b/datapath/linux-2.6/kernel-src.inc.in @@ -0,0 +1 @@ +KSRC=@KSRC26@ diff --git a/datapath/run-unit-tests b/datapath/run-unit-tests new file mode 100755 index 00000000..85257fcb --- /dev/null +++ b/datapath/run-unit-tests @@ -0,0 +1,64 @@ +#! /bin/sh -ex + +fail () { + echo "$@" + exit 1 +} + +test -n "$VMDIR" || fail "must pass --with-vm to configure to run unit tests" + +rm -rf tmp +mkdir tmp +cd tmp + +ln -s $KSRC/arch/i386/boot/bzImage kernel.bin +ln -s $VMDIR/hda.dsk hda.dsk + +cat > unit.conf < unit.cd/runme < + +#define SNAP_OUI_LEN 3 + + +struct snap_hdr +{ + uint8_t dsap; /* Always 0xAA */ + uint8_t ssap; /* Always 0xAA */ + uint8_t ctrl; + uint8_t oui[SNAP_OUI_LEN]; + uint16_t ethertype; +} __attribute__ ((packed)); + +static inline int snap_get_ethertype(struct sk_buff *skb, uint16_t *ethertype) +{ + struct snap_hdr *sh = (struct snap_hdr *)(skb->data + + sizeof(struct ethhdr)); + if (((sh->dsap & 0xFE) != LLC_SAP_SNAP) + || ((sh->dsap & 0xFE) != LLC_SAP_SNAP) + || (!memcmp(sh->oui, "\0\0\0", SNAP_OUI_LEN))) + return -EINVAL; + + *ethertype = sh->ethertype; + + return 0; +} + +#endif /* snap.h */ diff --git a/datapath/t/.gitignore b/datapath/t/.gitignore new file mode 100644 index 00000000..35e75b7a --- /dev/null +++ b/datapath/t/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/forward_t.h +/fwdhgen diff --git a/datapath/table-hash.c b/datapath/table-hash.c new file mode 100644 index 00000000..57a9f1c0 --- /dev/null +++ b/datapath/table-hash.c @@ -0,0 +1,466 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "crc32.h" +#include "flow.h" +#include "datapath.h" + +#include +#include +#include +#include +#include + +static void *kmem_alloc(size_t); +static void *kmem_zalloc(size_t); +static void kmem_free(void *, size_t); + +struct sw_table_hash { + struct sw_table swt; + spinlock_t lock; + struct crc32 crc32; + atomic_t n_flows; + unsigned int bucket_mask; /* Number of buckets minus 1. */ + struct sw_flow **buckets; +}; + +static struct sw_flow **find_bucket(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int crc = crc32_calculate(&th->crc32, key, sizeof *key); + return &th->buckets[crc & th->bucket_mask]; +} + +static struct sw_flow *table_hash_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_flow *flow = *find_bucket(swt, key); + return flow && !memcmp(&flow->key, key, sizeof *key) ? flow : NULL; +} + +static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + struct sw_flow **bucket; + unsigned long int flags; + int retval; + + if (flow->key.wildcards != 0) + return 0; + + spin_lock_irqsave(&th->lock, flags); + bucket = find_bucket(swt, &flow->key); + if (*bucket == NULL) { + atomic_inc(&th->n_flows); + rcu_assign_pointer(*bucket, flow); + retval = 1; + } else { + struct sw_flow *old_flow = *bucket; + if (!memcmp(&old_flow->key, &flow->key, sizeof flow->key) + && flow_del(old_flow)) { + rcu_assign_pointer(*bucket, flow); + flow_deferred_free(old_flow); + retval = 1; + } else { + retval = 0; + } + } + spin_unlock_irqrestore(&th->lock, flags); + return retval; +} + +/* Caller must update n_flows. */ +static int do_delete(struct sw_flow **bucket, struct sw_flow *flow) +{ + if (flow_del(flow)) { + rcu_assign_pointer(*bucket, NULL); + flow_deferred_free(flow); + return 1; + } + return 0; +} + +/* Returns number of deleted flows. */ +static int table_hash_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int count = 0; + + if (key->wildcards == 0) { + struct sw_flow **bucket = find_bucket(swt, key); + struct sw_flow *flow = *bucket; + if (flow && !memcmp(&flow->key, key, sizeof *key)) + count = do_delete(bucket, flow); + } else { + unsigned int i; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_del_matches(&flow->key, key, strict)) + count += do_delete(bucket, flow); + } + } + if (count) + atomic_sub(count, &th->n_flows); + return count; +} + +static int table_hash_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int i; + int count = 0; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_timeout(flow)) { + count += do_delete(bucket, flow); + if (dp->hello_flags & OFP_CHELLO_SEND_FLOW_EXP) + dp_send_flow_expired(dp, flow); + } + } + + if (count) + atomic_sub(count, &th->n_flows); + return count; +} + +static void table_hash_destroy(struct sw_table *swt) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int i; + for (i = 0; i <= th->bucket_mask; i++) + if (th->buckets[i]) + flow_free(th->buckets[i]); + kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets); + kfree(th); +} + +struct swt_iterator_hash { + struct sw_table_hash *th; + unsigned int bucket_i; +}; + +static struct sw_flow *next_flow(struct swt_iterator_hash *ih) +{ + for (;ih->bucket_i <= ih->th->bucket_mask; ih->bucket_i++) { + struct sw_flow *f = ih->th->buckets[ih->bucket_i]; + if (f != NULL) + return f; + } + + return NULL; +} + +static int table_hash_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash *ih; + + swt_iter->private = ih = kmalloc(sizeof *ih, GFP_KERNEL); + + if (ih == NULL) + return 0; + + ih->th = (struct sw_table_hash *) swt; + + ih->bucket_i = 0; + swt_iter->flow = next_flow(ih); + + return 1; +} + +static void table_hash_next(struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash *ih; + + if (swt_iter->flow == NULL) + return; + + ih = (struct swt_iterator_hash *) swt_iter->private; + + ih->bucket_i++; + swt_iter->flow = next_flow(ih); +} + +static void table_hash_iterator_destroy(struct swt_iterator *swt_iter) +{ + kfree(swt_iter->private); +} + +static void table_hash_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + stats->name = "hash"; + stats->n_flows = atomic_read(&th->n_flows); + stats->max_flows = th->bucket_mask + 1; +} + +struct sw_table *table_hash_create(unsigned int polynomial, + unsigned int n_buckets) +{ + struct sw_table_hash *th; + struct sw_table *swt; + + th = kmalloc(sizeof *th, GFP_KERNEL); + if (th == NULL) + return NULL; + + BUG_ON(n_buckets & (n_buckets - 1)); + th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets); + if (th->buckets == NULL) { + printk("failed to allocate %u buckets\n", n_buckets); + kfree(th); + return NULL; + } + th->bucket_mask = n_buckets - 1; + + swt = &th->swt; + swt->lookup = table_hash_lookup; + swt->insert = table_hash_insert; + swt->delete = table_hash_delete; + swt->timeout = table_hash_timeout; + swt->destroy = table_hash_destroy; + swt->iterator = table_hash_iterator; + swt->iterator_next = table_hash_next; + swt->iterator_destroy = table_hash_iterator_destroy; + swt->stats = table_hash_stats; + + spin_lock_init(&th->lock); + crc32_init(&th->crc32, polynomial); + atomic_set(&th->n_flows, 0); + + return swt; +} + +/* Double-hashing table. */ + +struct sw_table_hash2 { + struct sw_table swt; + struct sw_table *subtable[2]; +}; + +static struct sw_flow *table_hash2_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + int i; + + for (i = 0; i < 2; i++) { + struct sw_flow *flow = *find_bucket(t2->subtable[i], key); + if (flow && !memcmp(&flow->key, key, sizeof *key)) + return flow; + } + return NULL; +} + +static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + + if (table_hash_insert(t2->subtable[0], flow)) + return 1; + return table_hash_insert(t2->subtable[1], flow); +} + +static int table_hash2_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + return (table_hash_delete(t2->subtable[0], key, strict) + + table_hash_delete(t2->subtable[1], key, strict)); +} + +static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + return (table_hash_timeout(dp, t2->subtable[0]) + + table_hash_timeout(dp, t2->subtable[1])); +} + +static void table_hash2_destroy(struct sw_table *swt) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + table_hash_destroy(t2->subtable[0]); + table_hash_destroy(t2->subtable[1]); + kfree(t2); +} + +struct swt_iterator_hash2 { + struct sw_table_hash2 *th2; + struct swt_iterator ih; + uint8_t table_i; +}; + +static int table_hash2_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash2 *ih2; + + swt_iter->private = ih2 = kmalloc(sizeof *ih2, GFP_KERNEL); + if (ih2 == NULL) + return 0; + + ih2->th2 = (struct sw_table_hash2 *) swt; + if (!table_hash_iterator(ih2->th2->subtable[0], &ih2->ih)) { + kfree(ih2); + return 0; + } + + if (ih2->ih.flow != NULL) { + swt_iter->flow = ih2->ih.flow; + ih2->table_i = 0; + } else { + table_hash_iterator_destroy(&ih2->ih); + ih2->table_i = 1; + if (!table_hash_iterator(ih2->th2->subtable[1], &ih2->ih)) { + kfree(ih2); + return 0; + } + swt_iter->flow = ih2->ih.flow; + } + + return 1; +} + +static void table_hash2_next(struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash2 *ih2; + + if (swt_iter->flow == NULL) + return; + + ih2 = (struct swt_iterator_hash2 *) swt_iter->private; + table_hash_next(&ih2->ih); + + if (ih2->ih.flow != NULL) { + swt_iter->flow = ih2->ih.flow; + } else { + if (ih2->table_i == 0) { + table_hash_iterator_destroy(&ih2->ih); + ih2->table_i = 1; + if (!table_hash_iterator(ih2->th2->subtable[1], &ih2->ih)) { + ih2->ih.private = NULL; + swt_iter->flow = NULL; + } else { + swt_iter->flow = ih2->ih.flow; + } + } else { + swt_iter->flow = NULL; + } + } +} + +static void table_hash2_iterator_destroy(struct swt_iterator *swt_iter) +{ + struct swt_iterator_hash2 *ih2; + + ih2 = (struct swt_iterator_hash2 *) swt_iter->private; + if (ih2->ih.private != NULL) + table_hash_iterator_destroy(&ih2->ih); + kfree(ih2); +} + +static void table_hash2_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + struct sw_table_stats substats[2]; + int i; + + for (i = 0; i < 2; i++) + table_hash_stats(t2->subtable[i], &substats[i]); + stats->name = "hash2"; + stats->n_flows = substats[0].n_flows + substats[1].n_flows; + stats->max_flows = substats[0].max_flows + substats[1].max_flows; +} + +struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, + unsigned int poly1, unsigned int buckets1) + +{ + struct sw_table_hash2 *t2; + struct sw_table *swt; + + t2 = kmalloc(sizeof *t2, GFP_KERNEL); + if (t2 == NULL) + return NULL; + + t2->subtable[0] = table_hash_create(poly0, buckets0); + if (t2->subtable[0] == NULL) + goto out_free_t2; + + t2->subtable[1] = table_hash_create(poly1, buckets1); + if (t2->subtable[1] == NULL) + goto out_free_subtable0; + + swt = &t2->swt; + swt->lookup = table_hash2_lookup; + swt->insert = table_hash2_insert; + swt->delete = table_hash2_delete; + swt->timeout = table_hash2_timeout; + swt->destroy = table_hash2_destroy; + swt->stats = table_hash2_stats; + + swt->iterator = table_hash2_iterator; + swt->iterator_next = table_hash2_next; + swt->iterator_destroy = table_hash2_iterator_destroy; + + return swt; + +out_free_subtable0: + table_hash_destroy(t2->subtable[0]); +out_free_t2: + kfree(t2); + return NULL; +} + +/* From fs/xfs/linux-2.4/kmem.c. */ + +static void * +kmem_alloc(size_t size) +{ + void *ptr; + +#ifdef KMALLOC_MAX_SIZE + if (size > KMALLOC_MAX_SIZE) + return NULL; +#endif + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + ptr = vmalloc(size); + if (ptr) + printk("openflow: used vmalloc for %lu bytes\n", + (unsigned long)size); + } + return ptr; +} + +static void * +kmem_zalloc(size_t size) +{ + void *ptr = kmem_alloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} + +static void +kmem_free(void *ptr, size_t size) +{ + if (((unsigned long)ptr < VMALLOC_START) || + ((unsigned long)ptr >= VMALLOC_END)) { + kfree(ptr); + } else { + vfree(ptr); + } +} diff --git a/datapath/table-linear.c b/datapath/table-linear.c new file mode 100644 index 00000000..3baede66 --- /dev/null +++ b/datapath/table-linear.c @@ -0,0 +1,202 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "flow.h" +#include "datapath.h" + +#include +#include +#include + +struct sw_table_linear { + struct sw_table swt; + + spinlock_t lock; + unsigned int max_flows; + atomic_t n_flows; + struct list_head flows; +}; + +static struct sw_flow *table_linear_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow; + list_for_each_entry_rcu (flow, &tl->flows, u.node) { + if (flow_matches(&flow->key, key)) + return flow; + } + return NULL; +} + +static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + unsigned long int flags; + struct sw_flow *f; + + /* Replace flows that match exactly. */ + spin_lock_irqsave(&tl->lock, flags); + list_for_each_entry_rcu (f, &tl->flows, u.node) { + if (f->key.wildcards == flow->key.wildcards + && flow_matches(&f->key, &flow->key) + && flow_del(f)) { + list_replace_rcu(&f->u.node, &flow->u.node); + spin_unlock_irqrestore(&tl->lock, flags); + flow_deferred_free(f); + return 1; + } + } + + /* Table overflow? */ + if (atomic_read(&tl->n_flows) >= tl->max_flows) { + spin_unlock_irqrestore(&tl->lock, flags); + return 0; + } + atomic_inc(&tl->n_flows); + + /* FIXME: need to order rules from most to least specific. */ + list_add_rcu(&flow->u.node, &tl->flows); + spin_unlock_irqrestore(&tl->lock, flags); + return 1; +} + +static int do_delete(struct sw_table *swt, struct sw_flow *flow) +{ + if (flow_del(flow)) { + list_del_rcu(&flow->u.node); + flow_deferred_free(flow); + return 1; + } + return 0; +} + +static int table_linear_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct list_head *pos, *n; + unsigned int count = 0; + + list_for_each_safe_rcu (pos, n, &tl->flows) { + struct sw_flow *flow = list_entry(pos, struct sw_flow, u.node); + if (flow_del_matches(&flow->key, key, strict)) + count += do_delete(swt, flow); + } + if (count) + atomic_sub(count, &tl->n_flows); + return count; +} + +static int table_linear_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct list_head *pos, *n; + int count = 0; + + list_for_each_safe_rcu (pos, n, &tl->flows) { + struct sw_flow *flow = list_entry(pos, struct sw_flow, u.node); + if (flow_timeout(flow)) { + count += do_delete(swt, flow); + if (dp->hello_flags & OFP_CHELLO_SEND_FLOW_EXP) + dp_send_flow_expired(dp, flow); + } + } + if (count) + atomic_sub(count, &tl->n_flows); + return count; +} + +static void table_linear_destroy(struct sw_table *swt) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + + while (!list_empty(&tl->flows)) { + struct sw_flow *flow = list_entry(tl->flows.next, + struct sw_flow, u.node); + list_del(&flow->u.node); + flow_free(flow); + } + kfree(tl); +} + +/* Linear table's private data is just a pointer to the table */ + +static int table_linear_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + + swt_iter->private = tl; + + if (atomic_read(&tl->n_flows) == 0) + swt_iter->flow = NULL; + else + swt_iter->flow = list_entry(tl->flows.next, + struct sw_flow, u.node); + + return 1; +} + +static void table_linear_next(struct swt_iterator *swt_iter) +{ + struct sw_table_linear *tl; + struct list_head *next; + + if (swt_iter->flow == NULL) + return; + + tl = (struct sw_table_linear *) swt_iter->private; + + next = swt_iter->flow->u.node.next; + if (next == &tl->flows) + swt_iter->flow = NULL; + else + swt_iter->flow = list_entry(next, struct sw_flow, u.node); +} + +static void table_linear_iterator_destroy(struct swt_iterator *swt_iter) +{} + +static void table_linear_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + stats->name = "linear"; + stats->n_flows = atomic_read(&tl->n_flows); + stats->max_flows = tl->max_flows; +} + + +struct sw_table *table_linear_create(unsigned int max_flows) +{ + struct sw_table_linear *tl; + struct sw_table *swt; + + tl = kzalloc(sizeof *tl, GFP_KERNEL); + if (tl == NULL) + return NULL; + + swt = &tl->swt; + swt->lookup = table_linear_lookup; + swt->insert = table_linear_insert; + swt->delete = table_linear_delete; + swt->timeout = table_linear_timeout; + swt->destroy = table_linear_destroy; + swt->stats = table_linear_stats; + + swt->iterator = table_linear_iterator; + swt->iterator_next = table_linear_next; + swt->iterator_destroy = table_linear_iterator_destroy; + + tl->max_flows = max_flows; + atomic_set(&tl->n_flows, 0); + INIT_LIST_HEAD(&tl->flows); + spin_lock_init(&tl->lock); + + return swt; +} diff --git a/datapath/table-mac.c b/datapath/table-mac.c new file mode 100644 index 00000000..06f68a36 --- /dev/null +++ b/datapath/table-mac.c @@ -0,0 +1,272 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "crc32.h" +#include "flow.h" +#include "openflow.h" +#include "datapath.h" + +#include + +struct sw_table_mac { + struct sw_table swt; + spinlock_t lock; + struct crc32 crc32; + atomic_t n_flows; + unsigned int max_flows; + unsigned int bucket_mask; /* Number of buckets minus 1. */ + struct hlist_head *buckets; +}; + +static struct hlist_head *find_bucket(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + unsigned int crc = crc32_calculate(&tm->crc32, key, sizeof *key); + return &tm->buckets[crc & tm->bucket_mask]; +} + +static struct sw_flow *table_mac_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct hlist_head *bucket = find_bucket(swt, key); + struct hlist_node *pos; + struct sw_flow *flow; + hlist_for_each_entry_rcu (flow, pos, bucket, u.hnode) + if (!memcmp(key->dl_src, flow->key.dl_src, 6)) + return flow; + return NULL; +} + +static int table_mac_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + struct hlist_head *bucket; + struct hlist_node *pos; + unsigned long int flags; + struct sw_flow *f; + + /* MAC table only handles flows that match on Ethernet + source address and wildcard everything else. */ + if (likely(flow->key.wildcards != (OFPFW_ALL & ~OFPFW_DL_SRC))) + return 0; + bucket = find_bucket(swt, &flow->key); + + spin_lock_irqsave(&tm->lock, flags); + hlist_for_each_entry_rcu (f, pos, bucket, u.hnode) { + if (!memcmp(f->key.dl_src, flow->key.dl_src, 6) + && flow_del(f)) { + hlist_replace_rcu(&f->u.hnode, &flow->u.hnode); + spin_unlock_irqrestore(&tm->lock, flags); + flow_deferred_free(f); + return 1; + } + } + + /* Table overflow? */ + if (atomic_read(&tm->n_flows) >= tm->max_flows) { + spin_unlock_irqrestore(&tm->lock, flags); + return 0; + } + atomic_inc(&tm->n_flows); + + hlist_add_head_rcu(&flow->u.hnode, bucket); + spin_unlock_irqrestore(&tm->lock, flags); + return 1; +} + +static int do_delete(struct sw_table *swt, struct sw_flow *flow) +{ + if (flow_del(flow)) { + hlist_del_rcu(&flow->u.hnode); + flow_deferred_free(flow); + return 1; + } + return 0; +} + +/* Returns number of deleted flows. */ +static int table_mac_delete(struct sw_table *swt, + const struct sw_flow_key *key, int strict) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + + if (key->wildcards == (OFPFW_ALL & ~OFPFW_DL_SRC)) { + struct sw_flow *flow = table_mac_lookup(swt, key); + if (flow && do_delete(swt, flow)) { + atomic_dec(&tm->n_flows); + return 1; + } + return 0; + } else { + unsigned int i; + int count = 0; + for (i = 0; i <= tm->bucket_mask; i++) { + struct hlist_head *bucket = &tm->buckets[i]; + struct hlist_node *pos; + struct sw_flow *flow; + hlist_for_each_entry_rcu (flow, pos, bucket, u.hnode) + if (flow_del_matches(&flow->key, key, strict)) + count += do_delete(swt, flow); + } + if (count) + atomic_sub(count, &tm->n_flows); + return count; + } +} + +static int table_mac_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + unsigned int i; + int count = 0; + + for (i = 0; i <= tm->bucket_mask; i++) { + struct hlist_head *bucket = &tm->buckets[i]; + struct hlist_node *pos; + struct sw_flow *flow; + hlist_for_each_entry_rcu (flow, pos, bucket, u.hnode) { + if (flow_timeout(flow)) { + count += do_delete(swt, flow); + if (dp->hello_flags & OFP_CHELLO_SEND_FLOW_EXP) + dp_send_flow_expired(dp, flow); + } + } + } + if (count) + atomic_sub(count, &tm->n_flows); + return count; +} + +static void table_mac_destroy(struct sw_table *swt) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + unsigned int i; + for (i = 0; i <= tm->bucket_mask; i++) { + struct hlist_head *hlist = &tm->buckets[i]; + while (!hlist_empty(hlist)) { + struct sw_flow *flow = hlist_entry(hlist->first, + struct sw_flow, u.hnode); + hlist_del(&flow->u.hnode); + flow_free(flow); + } + } + kfree(tm->buckets); + kfree(tm); +} + +struct swt_iterator_mac { + struct sw_table_mac *tm; + unsigned int bucket_i; +}; + +static struct sw_flow *next_head_flow(struct swt_iterator_mac *im) +{ + for (; im->bucket_i <= im->tm->bucket_mask; im->bucket_i++) { + struct hlist_node *first = im->tm->buckets[im->bucket_i].first; + if (first != NULL) { + struct sw_flow *f = hlist_entry(first, + struct sw_flow, + u.hnode); + return f; + } + } + return NULL; +} + +static int table_mac_iterator(struct sw_table *swt, + struct swt_iterator *swt_iter) +{ + struct swt_iterator_mac *im; + + swt_iter->private = im = kmalloc(sizeof *im, GFP_KERNEL); + if (im == NULL) + return 0; + + im->tm = (struct sw_table_mac *) swt; + + if (atomic_read(&im->tm->n_flows) == 0) + swt_iter->flow = NULL; + else { + im->bucket_i = 0; + swt_iter->flow = next_head_flow(im); + } + + return 1; +} + +static void table_mac_next(struct swt_iterator *swt_iter) +{ + struct swt_iterator_mac *im; + struct hlist_node *next; + + if (swt_iter->flow == NULL) + return; + + im = (struct swt_iterator_mac *) swt_iter->private; + + next = swt_iter->flow->u.hnode.next; + if (next != NULL) { + swt_iter->flow = hlist_entry(next, struct sw_flow, u.hnode); + } else { + im->bucket_i++; + swt_iter->flow = next_head_flow(im); + } +} + +static void table_mac_iterator_destroy(struct swt_iterator *swt_iter) +{ + kfree(swt_iter->private); +} + +static void table_mac_stats(struct sw_table *swt, struct sw_table_stats *stats) +{ + struct sw_table_mac *tm = (struct sw_table_mac *) swt; + stats->name = "mac"; + stats->n_flows = atomic_read(&tm->n_flows); + stats->max_flows = tm->max_flows; +} + +struct sw_table *table_mac_create(unsigned int n_buckets, + unsigned int max_flows) +{ + struct sw_table_mac *tm; + struct sw_table *swt; + + tm = kzalloc(sizeof *tm, GFP_KERNEL); + if (tm == NULL) + return NULL; + + BUG_ON(n_buckets & (n_buckets - 1)); + + tm->buckets = kzalloc(n_buckets * sizeof *tm->buckets, GFP_KERNEL); + if (tm->buckets == NULL) { + printk("failed to allocate %u buckets\n", n_buckets); + kfree(tm); + return NULL; + } + tm->bucket_mask = n_buckets - 1; + + swt = &tm->swt; + swt->lookup = table_mac_lookup; + swt->insert = table_mac_insert; + swt->delete = table_mac_delete; + swt->timeout = table_mac_timeout; + swt->destroy = table_mac_destroy; + swt->stats = table_mac_stats; + + swt->iterator = table_mac_iterator; + swt->iterator_next = table_mac_next; + swt->iterator_destroy = table_mac_iterator_destroy; + + crc32_init(&tm->crc32, 0x04C11DB7); /* Ethernet CRC. */ + atomic_set(&tm->n_flows, 0); + tm->max_flows = max_flows; + spin_lock_init(&tm->lock); + + return swt; +} diff --git a/datapath/table.h b/datapath/table.h new file mode 100644 index 00000000..9a303670 --- /dev/null +++ b/datapath/table.h @@ -0,0 +1,74 @@ +/* Individual switching tables. Generally grouped together in a chain (see + * chain.h). */ + +#ifndef TABLE_H +#define TABLE_H 1 + +struct sw_flow; +struct sw_flow_key; +struct datapath; + +/* Iterator through the flows stored in a table. */ +struct swt_iterator { + struct sw_flow *flow; /* Current flow, for use by client. */ + void *private; +}; + +/* Table statistics. */ +struct sw_table_stats { + const char *name; /* Human-readable name. */ + unsigned long int n_flows; /* Number of active flows. */ + unsigned long int max_flows; /* Flow capacity. */ +}; + +/* A single table of flows. + * + * All functions, except destroy, must be called holding the + * rcu_read_lock. destroy must be fully serialized. + */ +struct sw_table { + /* Searches 'table' for a flow matching 'key', which must not have any + * wildcard fields. Returns the flow if successful, a null pointer + * otherwise. */ + struct sw_flow *(*lookup)(struct sw_table *table, + const struct sw_flow_key *key); + + /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns + * 0 if successful or a negative error. Error can be due to an + * over-capacity table or because the flow is not one of the kind that + * the table accepts. + * + * If successful, 'flow' becomes owned by 'table', otherwise it is + * retained by the caller. */ + int (*insert)(struct sw_table *table, struct sw_flow *flow); + + /* Deletes from 'table' any and all flows that match 'key' from + * 'table'. If 'strict' set, wildcards must match. Returns the + * number of flows that were deleted. */ + int (*delete)(struct sw_table *table, const struct sw_flow_key *key, + int strict); + + /* Performs timeout processing on all the flow entries in 'table'. + * Returns the number of flow entries deleted through expiration. */ + int (*timeout)(struct datapath *dp, struct sw_table *table); + + /* Destroys 'table', which must not have any users. */ + void (*destroy)(struct sw_table *table); + + int (*iterator)(struct sw_table *, struct swt_iterator *); + void (*iterator_next)(struct swt_iterator *); + void (*iterator_destroy)(struct swt_iterator *); + + /* Dumps statistics for 'table' into 'stats'. */ + void (*stats)(struct sw_table *table, struct sw_table_stats *stats); +}; + +struct sw_table *table_mac_create(unsigned int n_buckets, + unsigned int max_flows); +struct sw_table *table_hash_create(unsigned int polynomial, + unsigned int n_buckets); +struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, + unsigned int poly1, unsigned int buckets1); +struct sw_table *table_linear_create(unsigned int max_flows); + +#endif /* table.h */ diff --git a/datapath/table_t.c b/datapath/table_t.c new file mode 100644 index 00000000..3f92a118 --- /dev/null +++ b/datapath/table_t.c @@ -0,0 +1,879 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include + +#include "flow.h" +#include "table.h" +#include "openflow.h" +#include "unit.h" + +static const char * +table_name(struct sw_table *table) +{ + struct sw_table_stats stats; + table->stats(table, &stats); + return stats.name; +} + +static unsigned long int +table_max_flows(struct sw_table *table) +{ + struct sw_table_stats stats; + table->stats(table, &stats); + return stats.max_flows; +} + +static struct sw_flow *flow_zalloc(int n_actions, gfp_t flags) +{ + struct sw_flow *flow = flow_alloc(n_actions, flags); + if (flow) { + struct ofp_action *actions = flow->actions; + memset(flow, 0, sizeof *flow); + flow->actions = actions; + } + return flow; +} + +static void +simple_insert_delete(struct sw_table *swt, uint16_t wildcards) +{ + struct sw_flow *a_flow = flow_zalloc(0, GFP_KERNEL); + struct sw_flow *b_flow = flow_zalloc(0, GFP_KERNEL); + struct sw_flow *found; + + if (!swt) { + unit_fail("table creation failed"); + return; + } + + printk("simple_insert_delete: testing %s table\n", table_name(swt)); + *((uint32_t*)a_flow->key.dl_src) = 0x12345678; + *((uint32_t*)b_flow->key.dl_src) = 0x87654321; + + a_flow->key.nw_src = 0xdeadbeef; + b_flow->key.nw_src = 0x001dd0d0; + + a_flow->key.wildcards = wildcards; + b_flow->key.wildcards = wildcards; + + if (!(swt->insert(swt, a_flow))) + unit_fail("insert failed"); + found = swt->lookup(swt, &a_flow->key); + if(found != a_flow) + unit_fail("%p != %p", found, a_flow); + if (swt->lookup(swt, &b_flow->key)) + unit_fail("lookup should not succeed (1)"); + + swt->delete(swt, &a_flow->key, 0); + if (swt->lookup(swt, &a_flow->key)) + unit_fail("lookup should not succeed (3)"); + + flow_free(b_flow); + swt->destroy(swt); +} + +static void +multiple_insert_destroy(struct sw_table *swt, int inserts, uint16_t wildcards, + int min_collisions, int max_collisions) +{ + int i; + int col = 0; + + if (!swt) { + unit_fail("table creation failed"); + return; + } + + printk("inserting %d flows into %s table with max %lu flows: ", + inserts, table_name(swt), table_max_flows(swt)); + for(i = 0; i < inserts; ++i){ + struct sw_flow *a_flow = flow_zalloc(0, GFP_KERNEL); + *((uint32_t*)&(a_flow->key.dl_src[0])) = random32(); + a_flow->key.nw_src = random32(); + a_flow->key.wildcards = wildcards; + + if(!swt->insert(swt, a_flow)) { + col++; + flow_free(a_flow); + } + } + printk("%d failures\n", col); + if (min_collisions <= col && col <= max_collisions) + printk("\tmin = %d <= %d <= %d = max, OK.\n", + min_collisions, col, max_collisions); + else { + if (col < min_collisions) + unit_fail("too few collisions (%d < %d)", + col, min_collisions); + else if (col > max_collisions) + unit_fail("too many collisions (%d > %d)", + col, max_collisions); + printk("(This is statistically possible " + "but should not occur often.)\n"); + } + + swt->destroy(swt); +} + +static void +set_random_key(struct sw_flow_key *key, uint16_t wildcards) +{ + key->nw_src = random32(); + key->nw_dst = random32(); + key->in_port = (uint16_t) random32(); + key->dl_vlan = (uint16_t) random32(); + key->dl_type = (uint16_t) random32(); + key->tp_src = (uint16_t) random32(); + key->tp_dst = (uint16_t) random32(); + key->wildcards = wildcards; + *((uint32_t*)key->dl_src) = random32(); + *(((uint32_t*)key->dl_src) + 1) = random32(); + *((uint32_t*)key->dl_dst) = random32(); + *(((uint32_t*)key->dl_dst) + 1) = random32(); + key->nw_proto = (uint8_t) random32(); +} + +struct flow_key_entry { + struct sw_flow_key key; + struct list_head node; +}; + +/* + * Allocates memory for 'n_keys' flow_key_entrys. Initializes the allocated + * keys with random values, setting their wildcard values to 'wildcards', and + * places them all in a list. Returns a pointer to a flow_key_entry that + * serves solely as the list's head (its key has not been set). If allocation + * fails, returns NULL. Returned pointer should be freed with vfree (which + * frees the memory associated with the keys as well.) + */ + +static struct flow_key_entry * +allocate_random_keys(int n_keys, uint16_t wildcards) +{ + struct flow_key_entry *entries, *pos; + struct list_head *keys; + + if (n_keys < 0) + return NULL; + + entries = vmalloc((n_keys+1) * sizeof *entries); + if (entries == NULL) { + unit_fail("cannot allocate memory for %u keys", + n_keys); + return NULL; + } + + keys = &entries->node; + INIT_LIST_HEAD(keys); + + for(pos = entries+1; pos < (entries + n_keys + 1); pos++) { + set_random_key(&pos->key, wildcards); + list_add(&pos->node, keys); + } + + return entries; +} + +/* + * Attempts to insert the first 'n_flows' flow keys in list 'keys' into table + * 'swt', where 'keys' is a list of flow_key_entrys. key_entrys that are + * inserted into the table are removed from the 'keys' list and placed in + * 'added' list. Returns -1 if flow memory allocation fails, else returns the + * number of flows that were actually inserted (some attempts might fail due to + * collisions). + */ + +static int +insert_flows(struct sw_table *swt, struct list_head *keys, struct list_head *added, int n_flows) +{ + struct flow_key_entry *pos, *next; + int cnt; + + cnt = 0; + + + list_for_each_entry_safe (pos, next, keys, node) { + struct sw_flow *flow = flow_zalloc(0, GFP_KERNEL); + if (flow == NULL) { + unit_fail("Could only allocate %u flows", cnt); + return -1; + } + + flow->key = pos->key; + + if (!swt->insert(swt, flow)) { + flow_free(flow); + list_del(&pos->node); + } else { + list_del(&pos->node); + list_add(&pos->node, added); + cnt++; + if (n_flows != -1 && cnt == n_flows) + break; + } + } + + return cnt; +} + +/* + * Finds and returns the flow_key_entry in list 'keys' matching the passed in + * flow's key. If not found, returns NULL. + */ + +static struct flow_key_entry * +find_flow(struct list_head *keys, struct sw_flow *flow) +{ + struct flow_key_entry *pos; + + list_for_each_entry(pos, keys, node) { + if(!memcmp(&pos->key, &flow->key, sizeof(struct sw_flow_key))) + return pos; + } + + return NULL; +} + +/* + * Checks that all flow_key_entrys in list 'keys' return successful lookups on + * the table 'swt'. + */ + +static int +check_lookup(struct sw_table *swt, struct list_head *keys) +{ + struct flow_key_entry *pos; + + list_for_each_entry(pos, keys, node) { + if(swt->lookup(swt, &pos->key) == NULL) + return -1; + } + + return 0; +} + +/* + * Checks that all flow_key_entrys in list 'keys' DO NOT return successful + * lookups in the 'swt' table. + */ + +static int +check_no_lookup(struct sw_table *swt, struct list_head *keys) +{ + struct flow_key_entry *pos; + + list_for_each_entry(pos, keys, node) { + if(swt->lookup(swt, &pos->key) != NULL) + return -1; + } + + return 0; +} + + +/* + * Compares an iterator's view of the 'swt' table to the list of + * flow_key_entrys in 'to_find'. flow_key_entrys that are matched are removed + * from the 'to_find' list and placed in the 'found' list. Returns -1 if the + * iterator cannot be initialized or it encounters a flow with a key not in + * 'to_find'. Else returns the number of flows found by the iterator + * (i.e. there might still be flow keys in the 'to_find' list that were not + * encountered by the iterator. it is up to the caller to determine if that is + * acceptable behavior) + */ + +static int +check_iteration(struct sw_table *swt, struct list_head *to_find, struct list_head *found) +{ + struct swt_iterator iter; + struct flow_key_entry *entry; + int n_found = 0; + + rcu_read_lock(); + if (!swt->iterator(swt, &iter)) { + rcu_read_unlock(); + unit_fail("Could not initialize iterator"); + return -1; + } + + while (iter.flow != NULL) { + entry = find_flow(to_find, iter.flow); + if (entry == NULL) { + unit_fail("UNKNOWN ITERATOR FLOW %p", + iter.flow); + swt->iterator_destroy(&iter); + rcu_read_unlock(); + return -1; + } + n_found++; + list_del(&entry->node); + list_add(&entry->node, found); + swt->iterator_next(&iter); + } + + swt->iterator_destroy(&iter); + rcu_read_unlock(); + + return n_found; +} + +/* + * Deletes from table 'swt' keys from the list of flow_key_entrys 'keys'. + * Removes flow_key_entrys of deleted flows from 'keys' and places them in the + * 'deleted' list. If 'del_all' == 1, all flows in 'keys' will be deleted, + * else only every third key will be deleted. Returns the number flows deleted + * from the table. + */ + +static int +delete_flows(struct sw_table *swt, struct list_head *keys, + struct list_head *deleted, uint8_t del_all) +{ + struct flow_key_entry *pos, *next; + int i, n_del, total_del; + + total_del = 0; + i = 0; + + list_for_each_entry_safe (pos, next, keys, node) { + if (del_all == 1 || i % 3 == 0) { + n_del = swt->delete(swt, &pos->key, 0); + if (n_del > 1) { + unit_fail("%d flows deleted for one entry", n_del); + unit_fail("\tfuture 'errors' could just be product duplicate flow_key_entries"); + unit_fail("THIS IS VERY UNLIKELY...SHOULDN'T HAPPEN OFTEN"); + } + total_del += n_del; + list_del(&pos->node); + list_add(&pos->node, deleted); + } + i++; + } + + return total_del; +} + +/* + * Checks that both iteration and lookups are consistent with the caller's view + * of the table. In particular, checks that all keys in flow_key_entry list + * 'deleted' do not show up in lookup or iteration, and keys in flow_key_entry + * list 'added' do show up. 'tmp' should be an empty list that can be used for + * iteration. References to list_head pointers are needed for 'added' and 'tmp' + * because iteration will cause the list_heads to change. Function thus + * switches 'added' to point to the list of added keys after the iteration. + */ + +static int +check_lookup_and_iter(struct sw_table *swt, struct list_head *deleted, + struct list_head **added, struct list_head **tmp) +{ + struct list_head *tmp2; + int ret; + + if (check_no_lookup(swt, deleted) < 0) { + unit_fail("Uninserted flows returning lookup"); + return -1; + } + + if (check_lookup(swt, *added) < 0) { + unit_fail("Inserted flows not returning lookup"); + return -1; + } + + ret = check_iteration(swt, *added, *tmp); + + tmp2 = *added; + *added = *tmp; + *tmp = tmp2; + + if ((*tmp)->next != *tmp) { + unit_fail("WARNING: not all flows in 'added' found by iterator"); + unit_fail("\tcould be a product of duplicate flow_key_entrys, though should be VERY rare."); + /* To avoid reoccurence */ + (*tmp)->next = (*tmp)->prev = *tmp; + } + + return ret; +} + +/* + * Verifies iteration and lookup after inserting 'n_flows', then after deleting + * some flows, and once again after deleting all flows in table 'swt'. + */ + +static int +iterator_test(struct sw_table *swt, int n_flows, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp; + int ret, n_del, success; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + success = -1; + + allocated = allocate_random_keys(n_flows, wildcards); + if(allocated == NULL) + return success; + + deleted = &allocated->node; + added = &h1.node; + tmp = &h2.node; + + ret = insert_flows(swt, deleted, added, -1); + if (ret < 0) + goto iterator_test_destr; + + n_flows = ret; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after insertion"); + goto iterator_test_destr; + } else if (ret != n_flows) { + unit_fail("Iterator only found %d of %d flows", + ret, n_flows); + goto iterator_test_destr; + } + + n_del = delete_flows(swt, added, deleted, 0); + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after some deletion"); + goto iterator_test_destr; + } else if (ret + n_del != n_flows) { + unit_fail("iterator after deletion inconsistent"); + unit_fail("\tn_del = %d, n_found = %d, n_flows = %d", + n_del, ret, n_flows); + goto iterator_test_destr; + } + + n_flows -= n_del; + + n_del = delete_flows(swt, added, deleted, 1); + if (n_del != n_flows) { + unit_fail("Not all flows deleted - only %d of %d", + n_del, n_flows); + goto iterator_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after all deletion"); + goto iterator_test_destr; + } else if (ret != 0) { + unit_fail("Empty table iterator failed. %d flows found", + ret); + goto iterator_test_destr; + } + + success = 0; + +iterator_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; +} + + +/* + * Checks lookup and iteration consistency after adding one flow, adding the + * flow again, and then deleting the flow from table 'swt'. + */ + +static int +add_test(struct sw_table *swt, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp, *tmp2; + int ret, success = -1; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + allocated = allocate_random_keys(2, wildcards); + if (allocated == NULL) + return success; + + deleted = &allocated->node; + added = &h1.node; + tmp = &h2.node; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup before table modification"); + goto add_test_destr; + } else if (ret != 0) { + unit_fail("Iterator on empty table found %d flows", + ret); + goto add_test_destr; + } + + if (insert_flows(swt, deleted, added, 1) != 1) { + unit_fail("Cannot add one flow to table"); + goto add_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after single add"); + goto add_test_destr; + } else if (ret != 1) { + unit_fail("Iterator on single add found %d flows", + ret); + goto add_test_destr; + } + + /* Re-adding flow */ + if (insert_flows(swt, added, tmp, 1) != 1) { + unit_fail("Cannot insert same flow twice"); + goto add_test_destr; + } + + tmp2 = added; + added = tmp; + tmp = tmp2; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after double add"); + goto add_test_destr; + } else if (ret != 1) { + unit_fail("Iterator on double add found %d flows", + ret); + goto add_test_destr; + } + + ret = delete_flows(swt, added, deleted, 1); + if (ret != 1) { + unit_fail("Unexpected %d flows deleted", ret); + goto add_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after delete."); + goto add_test_destr; + } else if (ret != 0) { + unit_fail("unexpected %d flows found delete", ret); + goto add_test_destr; + } + + success = 0; + +add_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; +} + +/* + * Checks lookup and iteration consistency after each deleting a non-existent + * flow, adding and then deleting a flow, adding the flow again, and then + * deleting the flow twice in table 'swt'. + */ + +static int +delete_test(struct sw_table *swt, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp, *tmp2; + int i, ret, success = -1; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + allocated = allocate_random_keys(2, wildcards); + if (allocated == NULL) + return success; + + /* Not really added...*/ + + added = &allocated->node; + deleted = &h1.node; + tmp = &h2.node; + + ret = delete_flows(swt, added, deleted, 1); + if (ret != 0) { + unit_fail("Deleting non-existent keys from table returned unexpected value %d", + ret); + goto delete_test_destr; + } + + for (i = 0; i < 3; i++) { + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + if (i == 0) + unit_fail("Loop %d. Bad lookup before modification.", i); + else + unit_fail("Loop %d. Bad lookup after delete.", i); + goto delete_test_destr; + } else if (ret != 0) { + if(i == 0) + unit_fail("Loop %d. Unexpected %d flows found before modification", + i, ret); + else + unit_fail("Loop %d. Unexpected %d flows found after delete", + i, ret); + goto delete_test_destr; + } + + if(i == 2) + break; + + if (insert_flows(swt, deleted, added, 1) != 1) { + unit_fail("loop %d: cannot add flow to table", i); + goto delete_test_destr; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("loop %d: bad lookup after single add.", i); + goto delete_test_destr; + } else if (ret != 1) { + unit_fail("loop %d: unexpected %d flows found after single add", + i, ret); + goto delete_test_destr; + } + + ret = delete_flows(swt, added, deleted, 1); + if (ret != 1) { + unit_fail("loop %d: deleting inserted key from table returned unexpected value %d", + i, ret); + goto delete_test_destr; + } + } + + + ret = delete_flows(swt, deleted, tmp, 1); + + tmp2 = deleted; + deleted = tmp2; + tmp = tmp2; + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup after double delete."); + goto delete_test_destr; + } else if (ret != 0) { + unit_fail("Unexpected %d flows found after double delete", ret); + goto delete_test_destr; + } + + success = 0; + +delete_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; +} + +/* + * Randomly adds and deletes from a set of size 'n_flows', looping for 'i' + * iterations. + */ + +static int +complex_add_delete_test(struct sw_table *swt, int n_flows, int i, uint16_t wildcards) +{ + struct flow_key_entry *allocated, h1, h2; + struct list_head *added, *deleted, *tmp; + int cnt, ret, n_added, n_deleted, success = -1; + uint8_t del_all; + + INIT_LIST_HEAD(&h1.node); + INIT_LIST_HEAD(&h2.node); + + allocated = allocate_random_keys(n_flows, wildcards); + if (allocated == NULL) + return success; + + deleted = &allocated->node; + added = &h1.node; + tmp = &h2.node; + + n_deleted = n_flows; + n_added = 0; + + for (;i > 0; i--) { + if (n_deleted != 0 && random32() % 2 == 0) { + cnt = random32() % n_deleted; + cnt = insert_flows(swt, deleted, added, cnt); + if (cnt < 0) + goto complex_test_destr; + n_deleted -= cnt; + n_added += cnt; + } else { + if (random32() % 7 == 0) + del_all = 1; + else + del_all = 0; + cnt = delete_flows(swt, added, deleted, del_all); + n_deleted += cnt; + n_added -= cnt; + } + + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup on iteration %d.", i); + goto complex_test_destr; + } + } + + delete_flows(swt, added, deleted, 1); + ret = check_lookup_and_iter(swt, deleted, &added, &tmp); + if (ret < 0) { + unit_fail("Bad lookup on end deletion."); + goto complex_test_destr; + } else if (ret != 0) { + unit_fail("Unexpected %d flows found on end deletion", ret); + goto complex_test_destr; + } + + success = 0; + +complex_test_destr: + allocated->key.wildcards = OFPFW_ALL; + swt->delete(swt, &allocated->key, 0); + vfree(allocated); + return success; + +} + +void run_table_t(void) +{ + int mac_buckets, mac_max, linear_max, hash_buckets, hash2_buckets1; + int hash2_buckets2, num_flows, num_iterations; + int i; + + struct sw_table *swt; + + /* Most basic operations. */ + simple_insert_delete(table_mac_create(2048, 65536), + OFPFW_ALL & ~OFPFW_DL_SRC); + simple_insert_delete(table_linear_create(2048), 0); + simple_insert_delete(table_hash_create(0x04C11DB7, 2048), 0); + simple_insert_delete(table_hash2_create(0x04C11DB7, 2048, + 0x1EDC6F41, 2048), 0); + + /* MAC table operations. */ + multiple_insert_destroy(table_mac_create(2048, 65536), 1024, + OFPFW_ALL & ~OFPFW_DL_SRC, 0, 0); + multiple_insert_destroy(table_mac_create(2048, 65536), 2048, + OFPFW_ALL & ~OFPFW_DL_SRC, 0, 0); + multiple_insert_destroy(table_mac_create(2048, 65536), 65535, + OFPFW_ALL & ~OFPFW_DL_SRC, 0, 0); + multiple_insert_destroy(table_mac_create(2048, 65536), + 131072, OFPFW_ALL & ~OFPFW_DL_SRC, 65536, 65536); + + /* Linear table operations. */ + multiple_insert_destroy(table_linear_create(2048), 1024, 0, 0, 0); + multiple_insert_destroy(table_linear_create(2048), 2048, 0, 0, 0); + multiple_insert_destroy(table_linear_create(2048), 8192, 0, + 8192 - 2048, 8192 - 2048); + + /* Hash table operations. */ + multiple_insert_destroy(table_hash_create(0x04C11DB7, 2048), 1024, 0, + 100, 300); + multiple_insert_destroy(table_hash_create(0x04C11DB7, 2048), 2048, 0, + 500, 1000); + multiple_insert_destroy(table_hash_create(0x04C11DB7, 1 << 20), 8192, 0, + 0, 50); + multiple_insert_destroy(table_hash_create(0x04C11DB7, 1 << 20), 65536, 0, + 1500, 3000); + + /* Hash table 2, two hash functions. */ + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x1EDC6F41, 2048), 1024, 0, 0, 20); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x1EDC6F41, 2048), 2048, 0, 50, 200); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x1EDC6F41, 1<<20), 8192, 0, 0, 20); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x1EDC6F41, 1<<20), 65536, 0, 0, 20); + + /* Hash table 2, one hash function. */ + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x04C11DB7, 2048), 1024, 0, 0, 50); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 2048, + 0x04C11DB7, 2048), 2048, 0, 100, 300); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x04C11DB7, 1<<20), 8192, 0, 0, 20); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x04C11DB7, 1<<20), 65536, 0, 0, 100); + multiple_insert_destroy(table_hash2_create(0x04C11DB7, 1<<20, + 0x04C11DB7, 1<<20), 1<<16, 0, 0, 100); + + mac_buckets = 1024; + mac_max = 2048; + linear_max = 2048; + hash_buckets = 2048; + hash2_buckets1 = 1024; + hash2_buckets2 = 1024; + + num_flows = 2300; + num_iterations = 100; + + printk("\nTesting on each table type:\n"); + printk(" iteration_test on 0 flows\n"); + printk(" iteration_test on %d flows\n", num_flows); + printk(" add_test\n"); + printk(" delete_test\n"); + printk(" complex_add_delete_test with %d flows and %d iterations\n\n", + num_flows, num_iterations); + + for (i = 0; i < 4; i++) { + unsigned int mask = i == 0 ? : 0; + + if (unit_failed()) + return; + + mask = 0; + switch (i) { + case 0: + swt = table_mac_create(mac_buckets, mac_max); + mask = OFPFW_ALL & ~OFPFW_DL_SRC; + break; + case 1: + swt = table_linear_create(linear_max); + break; + case 2: + swt = table_hash_create (0x04C11DB7, hash_buckets); + break; + case 3: + swt = table_hash2_create(0x04C11DB7, hash2_buckets1, + 0x1EDC6F41, hash2_buckets2); + break; + default: + BUG(); + return; + } + + if (swt == NULL) { + unit_fail("failed to allocate table %d", i); + return; + } + printk("Testing %s table with %d buckets and %d max flows...\n", + table_name(swt), mac_buckets, mac_max); + iterator_test(swt, 0, mask); + iterator_test(swt, num_flows, mask); + add_test(swt, mask); + delete_test(swt, mask); + complex_add_delete_test(swt, num_flows, num_iterations, mask); + swt->destroy(swt); + } +} + diff --git a/datapath/tests/.gitignore b/datapath/tests/.gitignore new file mode 100644 index 00000000..35e75b7a --- /dev/null +++ b/datapath/tests/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/forward_t.h +/fwdhgen diff --git a/datapath/tests/Makefile.am b/datapath/tests/Makefile.am new file mode 100644 index 00000000..fc5fd8f8 --- /dev/null +++ b/datapath/tests/Makefile.am @@ -0,0 +1,7 @@ +noinst_HEADERS = forward_t.h + +forward_t.h: gen_forward_t.pl example.pcap + perl $(srcdir)/gen_forward_t.pl $(srcdir)/example.pcap > forward_t.h.tmp + mv forward_t.h.tmp forward_t.h + +EXTRA_DIST = gen_forward_t.pl example.pcap diff --git a/datapath/tests/example.pcap b/datapath/tests/example.pcap new file mode 100644 index 00000000..7bda4589 Binary files /dev/null and b/datapath/tests/example.pcap differ diff --git a/datapath/tests/gen_forward_t.pl b/datapath/tests/gen_forward_t.pl new file mode 100755 index 00000000..6e12e2c9 --- /dev/null +++ b/datapath/tests/gen_forward_t.pl @@ -0,0 +1,80 @@ +#! /usr/bin/perl + +use warnings; +use strict; + +if (@ARGV != 1) { + print "usage: $0 input.pcap > output.h\n"; + print "where input.pcap is a packet capture in pcap format\n"; + print "and output.c is a C header file containing the packets\n"; + exit(1); +} +my ($in_file_name) = $ARGV[0]; +open(INPUT, '<', $in_file_name) or die "$in_file_name: open: $!\n"; + +my ($file_header); +if (read(INPUT, $file_header, 24) != 24) { + die "$in_file_name: could not read pcap header\n"; +} + +my ($s, $l); +if (substr($file_header, 0, 4) eq pack('V', 0xa1b2c3d4)) { + ($s, $l) = ('v', 'V'); +} elsif (substr($file_header, 0, 4) eq pack('N', 0xa1b2c3d4)) { + ($s, $l) = ('n', 'N'); +} else { + die "$in_file_name: not a pcap file\n"; +} + +print <<'EOF'; +#ifndef DP_TEST_PACKETS_H +#define DP_TEST_PACKETS_H 1 + +struct pkt { + unsigned char *data; + unsigned int len; +}; +EOF + +my ($n_packets) = 0; +for (;;) { + my ($pkt_hdr) = must_read(16); + last if $pkt_hdr eq ''; + + my ($ts_sec, $ts_usec, $incl_len, $orig_len) = unpack("${l}4", $pkt_hdr); + print STDERR "warning: captured less than len %u\n" + if $incl_len < $orig_len; + + my ($pkt) = must_read($incl_len); + die "$in_file_name: unexpected end of file\n" if !$pkt; + + print "\nstatic unsigned char p${n_packets}[] = {"; + my ($line_bytes) = 0; + for my $c (map(ord($_), split(//, $pkt))) { + if ($line_bytes++ % 13 == 0) { + print "\n"; + } + printf " 0x%02x,", $c; + } + print "\n};\n"; + $n_packets++; +} + +print "\nstatic int num_packets = $n_packets;\n"; +print "\nstatic struct pkt packets[] = {\n"; +for my $i (0..$n_packets - 1) { + print " { p$i, sizeof p$i },\n"; +} +print "};\n"; + +print "\n#endif\n"; + +sub must_read { + my ($rq_bytes) = @_; + my ($data); + my ($nbytes) = read(INPUT, $data, $rq_bytes); + die "$in_file_name: read: $!\n" if !defined $nbytes; + die "$in_file_name: unexpected end of file\n" + if $nbytes && $nbytes != $rq_bytes; + return $data; +} diff --git a/datapath/tests/ofp_pcap.c b/datapath/tests/ofp_pcap.c new file mode 100644 index 00000000..e1b0d226 --- /dev/null +++ b/datapath/tests/ofp_pcap.c @@ -0,0 +1,97 @@ +/* A cheap knock-off of the pcap library to remove that dependency. */ + +#include +#include +#include +#include "ofp_pcap.h" + +int +ofp_pcap_open(struct ofp_pcap *p, const char *fname, char *errbuf) +{ + FILE *fp; + struct pcap_file_header hdr; + size_t amt_read; + + fp = fopen(fname, "r"); + + memset((char *)p, 0, sizeof(*p)); + + amt_read = fread((char *)&hdr, 1, sizeof(hdr), fp); + if (amt_read != sizeof(hdr)) { + snprintf(errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + goto error; + } + + if (hdr.magic != TCPDUMP_MAGIC) { + hdr.magic = SWAPLONG(hdr.magic); + hdr.version_major = SWAPSHORT(hdr.version_major); + hdr.version_minor = SWAPSHORT(hdr.version_minor); + hdr.thiszone = SWAPLONG(hdr.thiszone); + hdr.sigfigs = SWAPLONG(hdr.sigfigs); + hdr.snaplen = SWAPLONG(hdr.snaplen); + hdr.linktype = SWAPLONG(hdr.linktype); + + p->swapped = 1; + } + + p->fp = fp; + p->errbuf = errbuf; + p->bufsize = hdr.snaplen+sizeof(struct pcap_pkthdr); + p->buf = malloc(p->bufsize); + if (!p->buf) { + snprintf(errbuf, OFP_PCAP_ERRBUF_SIZE, "error allocating buffer"); + goto error; + } + + if (hdr.version_major < OFP_PCAP_VERSION_MAJOR) { + snprintf(errbuf, OFP_PCAP_ERRBUF_SIZE, "archaic file format"); + goto error; + } + + return 0; + +error: + if (p->buf) + free(p->buf); + return 1; +} + +char * +ofp_pcap_next(struct ofp_pcap *p, struct pcap_pkthdr *hdr) +{ + size_t amt_read; + + amt_read = fread(hdr, 1, sizeof(*hdr), p->fp); + if (amt_read != sizeof(*hdr)) { + snprintf(p->errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + return NULL; + } + + if (p->swapped) { + hdr->caplen = SWAPLONG(hdr->caplen); + hdr->len = SWAPLONG(hdr->len); + hdr->ts.tv_sec = SWAPLONG(hdr->ts.tv_sec); + hdr->ts.tv_usec = SWAPLONG(hdr->ts.tv_usec); + } + + if (hdr->caplen > p->bufsize) { + snprintf(p->errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + return NULL; + } + + amt_read = fread((char *)p->buf, 1, hdr->caplen, p->fp); + if (amt_read != hdr->caplen){ + snprintf(p->errbuf, OFP_PCAP_ERRBUF_SIZE, "error reading dump file"); + return NULL; + } + + return p->buf; +} + +void +ofp_pcap_close(struct ofp_pcap *p) +{ + fclose(p->fp); + free(p->buf); +} + diff --git a/datapath/tests/ofp_pcap.h b/datapath/tests/ofp_pcap.h new file mode 100644 index 00000000..6bd2dcb3 --- /dev/null +++ b/datapath/tests/ofp_pcap.h @@ -0,0 +1,64 @@ +#ifndef OFP_PCAP_H +#define OFP_PCAP_H + +#include +#include +#include + +#define OFP_PCAP_VERSION_MAJOR 2 +#define OFP_PCAP_VERSION_MINOR 4 + +#define TCPDUMP_MAGIC 0xa1b2c3d4 + +#define OFP_LINKTYPE_ETHERNET 1 + +#define OFP_PCAP_ERRBUF_SIZE 256 + +/* Swap the byte order regardless of the architecture */ +#define SWAPLONG(x) \ + ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24)) +#define SWAPSHORT(x) \ + ((((x)&0xff)<<8) | (((x)&0xff00)>>8)) + +struct ofp_pcap { + FILE *fp; /* File pointer to currently processed file */ + int swapped; /* Indicate whether endian-ness needs to change */ + char *buf; /* Buffer to hold packet data */ + size_t bufsize; /* Size of buffer */ + char *errbuf; /* Pointer to buffer to hold error message */ +}; + +struct pcap_file_header { + uint32_t magic; /* Magic number */ + uint16_t version_major; /* Version number major */ + uint16_t version_minor; /* Version number minor */ + int32_t thiszone; /* Gmt to local correction */ + uint32_t sigfigs; /* Accuracy of timestamps */ + uint32_t snaplen; /* Max length saved portion of each pkt */ + uint32_t linktype; /* Data link type (LINKTYPE_*) */ +}; + +/* + * This is a timeval as stored in disk in a dumpfile. + * It has to use the same types everywhere, independent of the actual + * `struct timeval' + */ +struct pcap_timeval { + int32_t tv_sec; /* Seconds */ + int32_t tv_usec; /* Microseconds */ +}; + +/* + * How a `pcap_pkthdr' is actually stored in the dumpfile. + */ +struct pcap_pkthdr { + struct pcap_timeval ts; /* Time stamp */ + uint32_t caplen; /* Length of portion present */ + uint32_t len; /* Length this packet (off wire) */ +}; + +int ofp_pcap_open(struct ofp_pcap *p, const char *fname, char *errbuf); +char *ofp_pcap_next(struct ofp_pcap *p, struct pcap_pkthdr *hdr); +void ofp_pcap_close(struct ofp_pcap *p); + +#endif /* ofp_pcap.h */ diff --git a/datapath/unit-exports.c b/datapath/unit-exports.c new file mode 100644 index 00000000..275f01ad --- /dev/null +++ b/datapath/unit-exports.c @@ -0,0 +1,26 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include "table.h" +#include "flow.h" +#include "crc32.h" +#include "forward.h" +#include + +EXPORT_SYMBOL(flow_alloc); +EXPORT_SYMBOL(flow_free); +EXPORT_SYMBOL(flow_cache); + +EXPORT_SYMBOL(table_mac_create); +EXPORT_SYMBOL(table_hash_create); +EXPORT_SYMBOL(table_hash2_create); +EXPORT_SYMBOL(table_linear_create); + +EXPORT_SYMBOL(crc32_init); +EXPORT_SYMBOL(crc32_calculate); + +EXPORT_SYMBOL(flow_extract); +EXPORT_SYMBOL(execute_setter); diff --git a/datapath/unit.c b/datapath/unit.c new file mode 100644 index 00000000..dfc12797 --- /dev/null +++ b/datapath/unit.c @@ -0,0 +1,100 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007 The Board of Trustees of The Leland Stanford Junior Univer +sity + */ + +#include +#include +#include +#include +#include + +#include "unit.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +static char run[1024]; +module_param_string(run, run, sizeof run, 0); +MODULE_PARM_DESC(run_tests, "run=\"test1,[test2,...]\"\n"); +#else +static char *run; +MODULE_PARM(run, "s"); +#endif + +static int test_failed; +static const char *test_name; + +void unit_fail_function(const char *function, const char *msg, ...) +{ + va_list args; + + printk("%s: FAIL: %s: ", test_name, function); + va_start(args, msg); + vprintk(msg, args); + va_end(args); + printk("\n"); + test_failed = 1; +} + +int unit_failed(void) +{ + return test_failed; +} + +static int run_test(const char *name, size_t len) +{ + static const struct test { + const char *name; + void (*func)(void); + } tests[] = { +#define UNIT_TEST(NAME) {#NAME, run_##NAME}, + UNIT_TESTS +#undef UNIT_TEST + }; + + const struct test *p; + + for (p = tests; p < &tests[ARRAY_SIZE(tests)]; p++) + if (len == strlen(p->name) + && !memcmp(name, p->name, len)) { + test_name = p->name; + test_failed = 0; + p->func(); + printk("%s: %s\n", test_name, + test_failed ? "FAIL" : "PASS"); + return !test_failed; + } + printk("unknown unit test %.*s\n", (int) len, name); + return 0; +} + +int unit_init(void) +{ + int n_pass = 0, n_fail = 0; + char *p = run ?: ""; + for (;;) { + static const char white_space[] = " \t\r\n\v,"; + int len; + + p += strspn(p, white_space); + if (!*p) + break; + + len = strcspn(p, white_space); + if (run_test(p, len)) + n_pass++; + else + n_fail++; + p += len; + } + + if (n_pass + n_fail == 0) + printk("no tests specified (use run=\"test1 [test2...]\")\n"); + else + printk("%d tests passed, %d failed\n", n_pass, n_fail); + + return -ENODEV; +} + +module_init(unit_init); +MODULE_LICENSE("GPL"); diff --git a/datapath/unit.h b/datapath/unit.h new file mode 100644 index 00000000..6d180a8b --- /dev/null +++ b/datapath/unit.h @@ -0,0 +1,21 @@ +#ifndef UNIT_H +#define UNIT_H 1 + +/* List of unit tests. */ +#define UNIT_TESTS \ + UNIT_TEST(table_t) \ + UNIT_TEST(crc_t) \ + UNIT_TEST(forward_t) + +/* Prototype a function run_ for each of the unit tests. */ +#define UNIT_TEST(NAME) void run_##NAME(void); +UNIT_TESTS +#undef UNIT_TEST + +void unit_fail_function(const char *function, const char *msg, ...) + __attribute__((format(printf, 2, 3))); +#define unit_fail(...) unit_fail_function(__func__, __VA_ARGS__) + +int unit_failed(void); + +#endif /* unit.h */ diff --git a/include/.gitignore b/include/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/include/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/include/Makefile.am b/include/Makefile.am new file mode 100644 index 00000000..0406acb1 --- /dev/null +++ b/include/Makefile.am @@ -0,0 +1,25 @@ +noinst_HEADERS = \ + buffer.h \ + command-line.h \ + compiler.h \ + dynamic-string.h \ + dpif.h \ + fatal-signal.h \ + fault.h \ + flow.h \ + hash.h \ + ip.h \ + list.h \ + mac.h \ + Makefile.am \ + netlink.h \ + ofp-print.h \ + openflow.h \ + openflow-netlink.h \ + packets.h \ + socket-util.h \ + util.h \ + vconn.h \ + vlog-socket.h \ + vlog.h \ + xtoxll.h diff --git a/include/buffer.h b/include/buffer.h new file mode 100644 index 00000000..98750244 --- /dev/null +++ b/include/buffer.h @@ -0,0 +1,63 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BUFFER_H +#define BUFFER_H 1 + +#include + +/* Buffer for holding arbitrary data. A buffer is automatically reallocated as + * necessary if it grows too large for the available memory. */ +struct buffer { + void *base; /* First byte of area malloc()'d area. */ + size_t allocated; /* Number of bytes allocated. */ + + void *data; /* First byte actually in use. */ + size_t size; /* Number of bytes in use. */ + + struct buffer *next; /* Next in a list of buffers. */ +}; + +void buffer_use(struct buffer *, void *, size_t); + +void buffer_init(struct buffer *, size_t); +void buffer_uninit(struct buffer *); +void buffer_reinit(struct buffer *, size_t); + +struct buffer *buffer_new(size_t); +void buffer_delete(struct buffer *); + +void *buffer_at(const struct buffer *, size_t offset, size_t size); +void *buffer_at_assert(const struct buffer *, size_t offset, size_t size); +void *buffer_tail(const struct buffer *); +void *buffer_end(const struct buffer *); + +void *buffer_put_uninit(struct buffer *, size_t); +void buffer_put(struct buffer *, const void *, size_t); + +size_t buffer_headroom(struct buffer *); +size_t buffer_tailroom(struct buffer *); +void buffer_reserve_tailroom(struct buffer *, size_t); + +void buffer_clear(struct buffer *); +void buffer_pull(struct buffer *, size_t); + +#endif /* buffer.h */ diff --git a/include/command-line.h b/include/command-line.h new file mode 100644 index 00000000..57d3e9bd --- /dev/null +++ b/include/command-line.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef COMMAND_LINE_H +#define COMMAND_LINE_H 1 + +/* Utilities for command-line parsing. */ + +struct option; +char *long_options_to_short_options(const struct option *options); + +#endif /* command-line.h */ diff --git a/include/compiler.h b/include/compiler.h new file mode 100644 index 00000000..bfd3f167 --- /dev/null +++ b/include/compiler.h @@ -0,0 +1,32 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef COMPILER_H +#define COMPILER_H 1 + +#define NO_RETURN __attribute__((__noreturn__)) +#define UNUSED __attribute__((__unused__)) +#define PACKED __attribute__((__packed__)) +#define PRINTF_FORMAT(FMT, ARG1) __attribute__((__format__(printf, FMT, ARG1))) +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +#endif /* compiler.h */ diff --git a/include/dpif.h b/include/dpif.h new file mode 100644 index 00000000..795a5007 --- /dev/null +++ b/include/dpif.h @@ -0,0 +1,55 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DPIF_H +#define DPIF_H 1 + +/* Operations for the datapath running in the local kernel. The interface can + * generalize to multiple types of local datapaths, but the implementation only + * supports the openflow kernel module via netlink. */ + +#include +#include + +struct buffer; +struct ofp_match; + +/* A datapath interface. Opaque. */ +struct dpif +{ + int dp_idx; + struct nl_sock *sock; +}; + +int dpif_open(int dp_idx, bool subscribe, struct dpif *); +void dpif_close(struct dpif *); +int dpif_recv_openflow(struct dpif *, struct buffer **, bool wait); +int dpif_send_openflow(struct dpif *, struct buffer *, bool wait); +int dpif_add_dp(struct dpif *); +int dpif_del_dp(struct dpif *); +int dpif_add_port(struct dpif *, const char *netdev); +int dpif_del_port(struct dpif *, const char *netdev); +int dpif_show(struct dpif *); +int dpif_dump_tables(struct dpif *); +int dpif_dump_flows(struct dpif *, int table, struct ofp_match *); +int dpif_benchmark_nl(struct dpif *, uint32_t, uint32_t); + +#endif /* dpif.h */ diff --git a/include/dynamic-string.h b/include/dynamic-string.h new file mode 100644 index 00000000..5d21c092 --- /dev/null +++ b/include/dynamic-string.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DYNAMIC_STRING_H +#define DYNAMIC_STRING_H 1 + +#include +#include +#include "compiler.h" + +struct ds { + char *string; /* Null-terminated string. */ + size_t length; /* Bytes used, not including null terminator. */ + size_t allocated; /* Bytes allocated, not including null terminator. */ +}; + +#define DS_EMPTY_INITIALIZER { NULL, 0, 0 } + +void ds_init(struct ds *); +void ds_reserve(struct ds *, size_t min_length); +void ds_put_format(struct ds *, const char *, ...) PRINTF_FORMAT(2, 3); +void ds_put_format_valist(struct ds *, const char *, va_list) + PRINTF_FORMAT(2, 0); +char *ds_cstr(struct ds *); +void ds_destroy(struct ds *); + +#endif /* dynamic-string.h */ diff --git a/include/fatal-signal.h b/include/fatal-signal.h new file mode 100644 index 00000000..7d716da4 --- /dev/null +++ b/include/fatal-signal.h @@ -0,0 +1,28 @@ +/* Utility functions for hooking process termination signals. + * + * Hooks registered with this module are called by handlers for signals that + * terminate the process normally (e.g. SIGTERM, SIGINT). They are not called + * for signals that indicate program errors (e.g. SIGFPE, SIGSEGV). They are + * useful for cleanup, such as deleting temporary files. + * + * The hooks are not called upon normal process termination via exit(). Use + * atexit() to hook normal process termination. + * + * These functions will only work properly for single-threaded processes. */ + +#ifndef FATAL_SIGNAL_H +#define FATAL_SIGNAL_H 1 + +/* Basic interface. */ +void fatal_signal_add_hook(void (*)(void *aux), void *aux); +void fatal_signal_block(void); +void fatal_signal_unblock(void); + +/* Convenience functions for unlinking files upon termination. + * + * These functions also unlink the files upon normal process termination via + * exit(). */ +void fatal_signal_add_file_to_unlink(const char *); +void fatal_signal_remove_file_to_unlink(const char *); + +#endif /* fatal-signal.h */ diff --git a/include/fault.h b/include/fault.h new file mode 100644 index 00000000..c1e8ff18 --- /dev/null +++ b/include/fault.h @@ -0,0 +1,28 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef FAULT_H +#define FAULT_H 1 + +void register_fault_handlers(void); +void log_backtrace(void); + +#endif /* fault.h */ diff --git a/include/flow.h b/include/flow.h new file mode 100644 index 00000000..63db5679 --- /dev/null +++ b/include/flow.h @@ -0,0 +1,33 @@ +#ifndef FLOW_H +#define FLOW_H 1 + +#include +#include "util.h" + +struct buffer; + +/* Identification data for a flow. + All fields are in network byte order. + In decreasing order by size, so that flow structures can be hashed or + compared bytewise. */ +struct flow { + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint16_t in_port; /* Input switch port. */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ + uint8_t dl_src[6]; /* Ethernet source address. */ + uint8_t dl_dst[6]; /* Ethernet destination address. */ + uint8_t nw_proto; /* IP protocol. */ + uint8_t reserved; /* One byte of padding. */ +}; +BUILD_ASSERT_DECL(sizeof (struct flow) == 32); + +void flow_extract(const struct buffer *, uint16_t in_port, struct flow *); +void flow_print(FILE *, const struct flow *); +int flow_compare(const struct flow *, const struct flow *); +unsigned long int flow_hash(const struct flow *, uint32_t basis); + +#endif /* flow.h */ diff --git a/include/hash.h b/include/hash.h new file mode 100644 index 00000000..2a17c0e1 --- /dev/null +++ b/include/hash.h @@ -0,0 +1,12 @@ +#ifndef HASH_H +#define HASH_H 1 + +#include +#include + +#define HASH_FNV_BASIS UINT32_C(2166136261) +#define HASH_FNV_PRIME UINT32_C(16777619) + +uint32_t hash_fnv(const void *, size_t, uint32_t basis); + +#endif /* hash.h */ diff --git a/include/ip.h b/include/ip.h new file mode 100644 index 00000000..2fa8aa98 --- /dev/null +++ b/include/ip.h @@ -0,0 +1,11 @@ +#ifndef IP_H +#define IP_H 1 + +#define IP_FMT "%"PRIu8".%"PRIu8".%"PRIu8".%"PRIu8 +#define IP_ARGS(ip) \ + ((uint8_t *) ip)[0], \ + ((uint8_t *) ip)[1], \ + ((uint8_t *) ip)[2], \ + ((uint8_t *) ip)[3] + +#endif /* ip.h */ diff --git a/include/list.h b/include/list.h new file mode 100644 index 00000000..6bf934bd --- /dev/null +++ b/include/list.h @@ -0,0 +1,53 @@ +#ifndef LIST_H +#define LIST_H 1 + +/* Doubly linked list. */ + +#include +#include +#include "util.h" + +/* Doubly linked list head or element. */ +struct list + { + struct list *prev; /* Previous list element. */ + struct list *next; /* Next list element. */ + }; + +#define LIST_INITIALIZER(LIST) { LIST, LIST } + +void list_init(struct list *); + +/* List insertion. */ +void list_insert(struct list *, struct list *); +void list_splice(struct list *before, struct list *first, struct list *last); +void list_push_front(struct list *, struct list *); +void list_push_back(struct list *, struct list *); + +/* List removal. */ +struct list *list_remove(struct list *); +struct list *list_pop_front(struct list *); +struct list *list_pop_back(struct list *); + +/* List elements. */ +struct list *list_front(struct list *); +struct list *list_back(struct list *); + +/* List properties. */ +size_t list_size(const struct list *); +bool list_is_empty(const struct list *); + +#define LIST_ELEM__(ELEM, STRUCT, MEMBER, LIST) \ + (ELEM != LIST ? CONTAINER_OF(ELEM, STRUCT, MEMBER) : NULL) +#define LIST_FOR_EACH(ITER, STRUCT, MEMBER, LIST) \ + for (ITER = LIST_ELEM__((LIST)->next, STRUCT, MEMBER, LIST); \ + ITER != NULL; \ + ITER = LIST_ELEM__((ITER)->MEMBER.next, STRUCT, MEMBER, LIST)) +#define LIST_FOR_EACH_SAFE(ITER, NEXT, STRUCT, MEMBER, LIST) \ + for (ITER = LIST_ELEM__((LIST)->next, STRUCT, MEMBER, LIST); \ + (ITER != NULL \ + ? (NEXT = LIST_ELEM__((ITER)->MEMBER.next, STRUCT, MEMBER, LIST), 1) \ + : 0), \ + ITER = NEXT) + +#endif /* list.h */ diff --git a/include/mac.h b/include/mac.h new file mode 100644 index 00000000..a8516df7 --- /dev/null +++ b/include/mac.h @@ -0,0 +1,41 @@ +#ifndef MAC_H +#define MAC_H 1 + +#include +#include +#include +#include "packets.h" + +static inline bool mac_is_multicast(const uint8_t mac[ETH_ADDR_LEN]) +{ + return mac[0] & 0x80; +} + +static inline bool mac_is_private(const uint8_t mac[ETH_ADDR_LEN]) +{ + return mac[0] & 0x40; +} + +static inline bool mac_is_broadcast(const uint8_t mac[ETH_ADDR_LEN]) +{ + return (mac[0] & mac[1] & mac[2] & mac[3] & mac[4] & mac[5]) == 0xff; +} + +static inline bool mac_is_zero(const uint8_t mac[ETH_ADDR_LEN]) +{ + return (mac[0] | mac[1] | mac[2] | mac[3] | mac[4] | mac[5]) == 0; +} + +static inline bool mac_equals(const uint8_t a[ETH_ADDR_LEN], + const uint8_t b[ETH_ADDR_LEN]) +{ + return !memcmp(a, b, ETH_ADDR_LEN); +} + +#define MAC_FMT \ + "%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8 +#define MAC_ARGS(mac) \ + (mac)[0], (mac)[1], (mac)[2], (mac)[3], (mac)[4], (mac)[5] + + +#endif /* mac.h */ diff --git a/include/netlink.h b/include/netlink.h new file mode 100644 index 00000000..e5e66c28 --- /dev/null +++ b/include/netlink.h @@ -0,0 +1,148 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NETLINK_H +#define NETLINK_H 1 + +/* Netlink interface. + * + * Netlink is a datagram-based network protocol primarily for communication + * between user processes and the kernel, and mainly on Linux. Netlink is + * specified in RFC 3549, "Linux Netlink as an IP Services Protocol". + * + * Netlink is not suitable for use in physical networks of heterogeneous + * machines because host byte order is used throughout. */ + +#include +#include +#include +#include +#include +#include + +#ifndef NLA_ALIGNTO +struct nlattr +{ + __u16 nla_len; + __u16 nla_type; +}; + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +#endif + +struct buffer; +struct nl_sock; + +/* Netlink sockets. */ + +int nl_sock_create(int protocol, int multicast_group, + size_t so_sndbuf, size_t so_rcvbuf, + struct nl_sock **); +void nl_sock_destroy(struct nl_sock *); + +int nl_sock_send(struct nl_sock *, const struct buffer *, bool wait); +int nl_sock_sendv(struct nl_sock *sock, const struct iovec iov[], size_t n_iov, + bool wait); +int nl_sock_recv(struct nl_sock *, struct buffer **, bool wait); +int nl_sock_transact(struct nl_sock *, const struct buffer *request, + struct buffer **reply); + +int nl_sock_fd(const struct nl_sock *); + +/* Netlink messages. */ + +/* Accessing headers and data. */ +struct nlmsghdr *nl_msg_nlmsghdr(const struct buffer *); +struct genlmsghdr *nl_msg_genlmsghdr(const struct buffer *); +bool nl_msg_nlmsgerr(const struct buffer *, int *error); +void nl_msg_reserve(struct buffer *, size_t); + +/* Appending headers and raw data. */ +void nl_msg_put_nlmsghdr(struct buffer *, struct nl_sock *, + size_t expected_payload, + uint32_t type, uint32_t flags); +void nl_msg_put_genlmsghdr(struct buffer *, struct nl_sock *, + size_t expected_payload, int family, uint32_t flags, + uint8_t cmd, uint8_t version); +void nl_msg_put(struct buffer *, const void *, size_t); +void *nl_msg_put_uninit(struct buffer *, size_t); + +/* Appending attributes. */ +void *nl_msg_put_unspec_uninit(struct buffer *, uint16_t type, size_t); +void nl_msg_put_unspec(struct buffer *, uint16_t type, const void *, size_t); +void nl_msg_put_flag(struct buffer *, uint16_t type); +void nl_msg_put_u8(struct buffer *, uint16_t type, uint8_t value); +void nl_msg_put_u16(struct buffer *, uint16_t type, uint16_t value); +void nl_msg_put_u32(struct buffer *, uint16_t type, uint32_t value); +void nl_msg_put_u64(struct buffer *, uint16_t type, uint64_t value); +void nl_msg_put_string(struct buffer *, uint16_t type, const char *value); +void nl_msg_put_nested(struct buffer *, uint16_t type, struct buffer *); + +/* Netlink attribute types. */ +enum nl_attr_type +{ + NL_A_NO_ATTR = 0, + NL_A_UNSPEC, + NL_A_U8, + NL_A_U16, + NL_A_U32, + NL_A_U64, + NL_A_STRING, + NL_A_FLAG, + NL_A_NESTED, + N_NL_ATTR_TYPES +}; + +/* Netlink attribute parsing. */ +const void* nl_attr_get(const struct nlattr *); +size_t nl_attr_get_size(const struct nlattr *); +const void* nl_attr_get_unspec(const struct nlattr *, size_t size); +bool nl_attr_get_flag(const struct nlattr *); +uint8_t nl_attr_get_u8(const struct nlattr *); +uint16_t nl_attr_get_u16(const struct nlattr *); +uint32_t nl_attr_get_u32(const struct nlattr *); +uint64_t nl_attr_get_u64(const struct nlattr *); +const char *nl_attr_get_string(const struct nlattr *); + +/* Netlink attribute policy. + * + * Specifies how to parse a single attribute from a Netlink message payload. + * + * See Nl_policy for example. + */ +struct nl_policy +{ + enum nl_attr_type type; + size_t min_len, max_len; + bool optional; +}; + +bool nl_policy_parse(const struct buffer *, const struct nl_policy[], + struct nlattr *[], size_t n_attrs); + +/* Miscellaneous. */ + +int nl_lookup_genl_family(const char *name, int *number); + +#endif /* netlink.h */ diff --git a/include/ofp-print.h b/include/ofp-print.h new file mode 100644 index 00000000..ad383a43 --- /dev/null +++ b/include/ofp-print.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* OpenFlow protocol pretty-printer. */ + +#ifndef __OFP_PRINT_H__ +#define __OFP_ORINT_H __1 + +#include + +struct ofp_flow_mod; +struct ofp_table; + +#ifdef __cplusplus +extern "C" { +#endif + +void ofp_print(FILE *, const void *, size_t, int verbosity); +void ofp_print_table(FILE *stream, const struct ofp_table* ot); +void ofp_print_flow_mod(FILE *stream, const void *data, size_t len, int verbosity); +void ofp_print_flow_expired(FILE *stream, const void *data, size_t len, int verbosity); +void ofp_print_data_hello(FILE *stream, const void *data, size_t len, int verbosity); +void ofp_print_packet(FILE *stream, const void *data, size_t len, size_t total_len); +void ofp_print_port_status(FILE *stream, const void *oh, size_t len, int verbosity); + +#ifdef __cplusplus +} +#endif + +#endif /* ofppp.h */ diff --git a/include/openflow-netlink.h b/include/openflow-netlink.h new file mode 100644 index 00000000..31bd71eb --- /dev/null +++ b/include/openflow-netlink.h @@ -0,0 +1,83 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef OPENFLOW_NETLINK_H +#define OPENFLOW_NETLINK_H 1 + +#include + +#define DP_GENL_FAMILY_NAME "OpenFlow" + +/* Attributes that can be attached to the datapath's netlink messages. */ +enum { + DP_GENL_A_UNSPEC, + DP_GENL_A_OFPHEADER, /* OFP header information */ + DP_GENL_A_DP_IDX, /* Datapath Ethernet device name. */ + DP_GENL_A_PORTNAME, /* Device name for datapath port. */ + DP_GENL_A_MC_GROUP, /* Generic netlink multicast group. */ + DP_GENL_A_OPENFLOW, /* OpenFlow packet. */ + + DP_GENL_A_DP_INFO, /* OpenFlow datapath information */ + + DP_GENL_A_FLOW, /* OpenFlow flow entry */ + DP_GENL_A_NUMFLOWS, /* Number of flows */ + DP_GENL_A_TABLEIDX, /* Flow table index */ + + DP_GENL_A_TABLE, /* OpenFlow table entry */ + DP_GENL_A_NUMTABLES, /* Number of tables in a table query */ + + DP_GENL_A_NPACKETS, /* Number of packets to send up netlink */ + DP_GENL_A_PSIZE, /* Size of packets to send up netlink */ + + __DP_GENL_A_MAX, + DP_GENL_A_MAX = __DP_GENL_A_MAX - 1 +}; + +/* Commands that can be executed on the datapath's netlink interface. */ +enum dp_genl_command { + DP_GENL_C_UNSPEC, + DP_GENL_C_ADD_DP, /* Create datapath. */ + DP_GENL_C_DEL_DP, /* Destroy datapath. */ + DP_GENL_C_QUERY_DP, /* Get multicast group for datapath. */ + DP_GENL_C_SHOW_DP, /* Show information about datapath. */ + DP_GENL_C_ADD_PORT, /* Add port to datapath. */ + DP_GENL_C_DEL_PORT, /* Remove port from datapath. */ + DP_GENL_C_OPENFLOW, /* Encapsulated OpenFlow protocol. */ + + DP_GENL_C_QUERY_FLOW, /* Request flow entries. */ + DP_GENL_C_QUERY_TABLE, /* Request table entries. */ + + DP_GENL_C_BENCHMARK_NL, /* Benchmark netlink connection */ + + __DP_GENL_C_MAX, + DP_GENL_C_MAX = __DP_GENL_C_MAX - 1 +}; + +/* Table */ +enum { + TBL_MACONLY, + TBL_HASH, + TBL_LINEAR, + __TBL_MAX, + TBL_MAX = __TBL_MAX - 1 +}; + +#endif /* openflow_netlink_h */ diff --git a/include/openflow.h b/include/openflow.h new file mode 100644 index 00000000..5f76e931 --- /dev/null +++ b/include/openflow.h @@ -0,0 +1,388 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* OpenFlow: protocol between controller and datapath. */ + +#ifndef OPENFLOW_H +#define OPENFLOW_H 1 + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* Maximum length of a OpenFlow packet. */ +#define OFP_MAXLEN (sizeof(struct ofp_data_hello) \ + + (sizeof(struct ofp_phy_port) * OFPP_MAX) + 200) + +#define OFP_VERSION 1 +#define OFP_MAX_TABLE_NAME_LEN 32 +#define OFP_MAX_PORT_NAME_LEN 16 + +#define OFP_TCP_PORT 975 +#define OFP_SSL_PORT 976 + +#define OFP_ETH_ALEN 6 /* Bytes in an Ethernet address. */ + +/* Port numbering. Physical ports are numbered starting from 0. */ +enum ofp_port { + /* Maximum number of physical switch ports. */ + OFPP_MAX = 0x100, + + /* Fake output "ports". */ + OFPP_NORMAL = 0xfffa, /* Process with normal L2/L3 switching */ + OFPP_FLOOD = 0xfffb, /* All physical ports except input port and + those disabled by STP. */ + OFPP_ALL = 0xfffc, /* All physical ports except input port. */ + OFPP_CONTROLLER = 0xfffd, /* Send to controller. */ + OFPP_LOCAL = 0xfffe, /* Local openflow "port". */ /* xxx Want?! */ + OFPP_NONE = 0xffff /* Not associated with a physical port. */ +}; + +enum ofp_type { + OFPT_CONTROL_HELLO, /* 0 Controller/switch message */ + OFPT_DATA_HELLO, /* 1 Controller/switch message */ + OFPT_PACKET_IN, /* 2 Async message */ + OFPT_PACKET_OUT, /* 3 Controller/switch message */ + OFPT_FLOW_MOD, /* 4 Controller/switch message */ + OFPT_FLOW_EXPIRED, /* 5 Async message */ + OFPT_TABLE, /* 6 Controller/switch message */ + OFPT_PORT_MOD, /* 7 Controller/switch message */ + OFPT_PORT_STATUS, /* 8 Async message */ + OFPT_FLOW_STAT_REQUEST, /* 9 Controller/switch message */ + OFPT_FLOW_STAT_REPLY, /* 10 Controller/switch message */ + OFPT_TABLE_STAT_REQUEST, /* 11 Controller/switch message */ + OFPT_TABLE_STAT_REPLY, /* 12 Controller/switch message */ + OFPT_PORT_STAT_REQUEST, /* 13 Controller/switch message */ + OFPT_PORT_STAT_REPLY /* 14 Controller/switch message */ +}; + +/* Header on all OpenFlow packets. */ +struct ofp_header { + uint8_t version; /* Always 1. */ + uint8_t type; /* One of the OFPT_ constants. */ + uint16_t length; /* Length including this ofp_header. */ + uint32_t xid; /* Transactin id associated with this packet. + Replies use the same id as was in the request + to facilitate pairing. */ +}; + +#define OFP_DEFAULT_MISS_SEND_LEN 128 +#define OFP_MISS_SEND_LEN_UNCHANGED 0xffff + +/* Flag to indicate that datapath should notify the controller of + * expired flow entries. + */ +#define OFP_CHELLO_SEND_FLOW_EXP 0x0001 + +/* Controller hello (controller -> datapath). */ +struct ofp_control_hello { + struct ofp_header header; + uint32_t version; /* Max supported protocol version (?) */ + uint16_t flags; + uint16_t miss_send_len; /* Max bytes of new flow that datapath should + send to the controller. A value of + OFP_MISS_SEND_LEN_UNCHANGED leaves the + currently configured value unchanged. */ +}; + +/* Capabilities supported by the datapath. */ +enum ofp_capabilities { + OFPC_FLOW_STATS = 1 << 0, /* Flow statistics. */ + OFPC_TABLE_STATS = 1 << 1, /* Table statistics. */ + OFPC_PORT_STATS = 1 << 2, /* Port statistics. */ + OFPC_STP = 1 << 3, /* 802.11d spanning tree. */ + OFPC_MULTI_PHY_TX = 1 << 4 /* Supports transmitting through multiple + physical interfaces */ +}; + +/* Flags to indicate behavior of the physical port */ +enum ofp_port_flags { + OFPPFL_NO_FLOOD = 1 << 0, /* Do not include this port when flooding */ +}; + +/* Features of physical ports available in a datapath. */ +enum ofp_port_features { + OFPPF_10MB_HD = 1 << 0, /* 10 Mb half-duplex rate support. */ + OFPPF_10MB_FD = 1 << 1, /* 10 Mb full-duplex rate support. */ + OFPPF_100MB_HD = 1 << 2, /* 100 Mb half-duplex rate support. */ + OFPPF_100MB_FD = 1 << 3, /* 100 Mb full-duplex rate support. */ + OFPPF_1GB_HD = 1 << 4, /* 1 Gb half-duplex rate support. */ + OFPPF_1GB_FD = 1 << 5, /* 1 Gb full-duplex rate support. */ + OFPPF_10GB_FD = 1 << 6, /* 10 Gb full-duplex rate support. */ +}; + + +/* Description of a physical port */ +struct ofp_phy_port { + uint16_t port_no; + uint8_t hw_addr[OFP_ETH_ALEN]; + uint8_t name[OFP_MAX_PORT_NAME_LEN]; /* Null-terminated */ + uint32_t flags; /* Bitmap of "ofp_port_flags". */ + uint32_t speed; /* Current speed in Mbps */ + uint32_t features; /* Bitmap of supported "ofp_port_features"s. */ +}; + +/* Datapath hello (datapath -> controller). */ +struct ofp_data_hello { + struct ofp_header header; + uint64_t datapath_id; /* Datapath unique ID */ + + /* Table info. */ + uint32_t n_exact; /* Max exact-match table entries. */ + uint32_t n_mac_only; /* Max mac-only table entries. */ + uint32_t n_compression; /* Max entries compressed on service port. */ + uint32_t n_general; /* Max entries of arbitrary form. */ + + /* Buffer limits. A datapath that cannot buffer reports 0.*/ + uint32_t buffer_mb; /* Space for buffering packets, in MB. */ + uint32_t n_buffers; /* Max packets buffered at once. */ + + /* Features. */ + uint32_t capabilities; /* Bitmap of support "ofp_capabilities". */ + uint32_t actions; /* Bitmap of supported "ofp_action_type"s. */ + + /* Miscellany */ + uint16_t miss_send_len; /* Currently configured value for max bytes + of new flow that datapath will send to the + controller. */ + uint8_t pad[2]; /* Align to 32-bits */ + + /* Port info.*/ + struct ofp_phy_port ports[0]; /* Port definitions. The number of ports + is inferred from the length field in + the header. */ +}; + +/* What changed about the phsyical port */ +enum ofp_port_reason { + OFPPR_ADD, /* The port was added */ + OFPPR_DELETE, /* The port was removed */ + OFPPR_MOD /* Some attribute of the port has changed */ +}; + +/* A physical port has changed in the datapath */ +struct ofp_port_status { + struct ofp_header header; + uint8_t reason; /* One of OFPPR_* */ + uint8_t pad[3]; /* Align to 32-bits */ + struct ofp_phy_port desc; +}; + +/* Modify behavior of the physical port */ +struct ofp_port_mod { + struct ofp_header header; + struct ofp_phy_port desc; +}; + +/* Why is this packet being sent to the controller? */ +enum ofp_reason { + OFPR_NO_MATCH, /* No matching flow. */ + OFPR_ACTION /* Action explicitly output to controller. */ +}; + +/* Packet received on port (datapath -> controller). */ +struct ofp_packet_in { + struct ofp_header header; + uint32_t buffer_id; /* ID assigned by datapath. */ + uint16_t total_len; /* Full length of frame. */ + uint16_t in_port; /* Port on which frame was received. */ + uint8_t reason; /* Reason packet is being sent (one of OFPR_*) */ + uint8_t pad; + uint8_t data[0]; /* Ethernet frame, halfway through 32-bit word, + so the IP header is 32-bit aligned. The + amount of data is inferred from the length + field in the header. Because of padding, + offsetof(struct ofp_packet_in, data) == + sizeof(struct ofp_packet_in) - 2. */ +}; + +enum ofp_action_type { + OFPAT_OUTPUT, /* Output to switch port. */ + OFPAT_SET_DL_VLAN, /* VLAN. */ + OFPAT_SET_DL_SRC, /* Ethernet source address. */ + OFPAT_SET_DL_DST, /* Ethernet destination address. */ + OFPAT_SET_NW_SRC, /* IP source address. */ + OFPAT_SET_NW_DST, /* IP destination address. */ + OFPAT_SET_TP_SRC, /* TCP/UDP source port. */ + OFPAT_SET_TP_DST /* TCP/UDP destination port. */ +}; + +/* An output action sends packets out 'port'. When the 'port' is the + * OFPP_CONTROLLER, 'max_len' indicates the max number of bytes to + * send. A 'max_len' of zero means the entire packet should be sent. */ +struct ofp_action_output { + uint16_t max_len; + uint16_t port; +}; + +/* The VLAN id is 12-bits, so we'll use the entire 16 bits to indicate + * special conditions. All ones is used to indicate that no VLAN id was + * set, or if used as an action, that the VLAN header should be + * stripped. + */ +#define OFP_VLAN_NONE 0xffff + +struct ofp_action { + uint16_t type; /* One of OFPAT_* */ + union { + struct ofp_action_output output; /* OFPAT_OUTPUT: output struct. */ + uint16_t vlan_id; /* OFPAT_SET_DL_VLAN: VLAN id. */ + uint8_t dl_addr[OFP_ETH_ALEN]; /* OFPAT_SET_DL_SRC/DST */ + uint32_t nw_addr; /* OFPAT_SET_NW_SRC/DST */ + uint16_t tp; /* OFPAT_SET_TP_SRC/DST */ + } arg; +}; + +/* Send packet (controller -> datapath). */ +struct ofp_packet_out { + struct ofp_header header; + uint32_t buffer_id; /* ID assigned by datapath (-1 if none). */ + uint16_t in_port; /* Packet's input port (OFPP_NONE if none). */ + uint16_t out_port; /* Output port (if buffer_id == -1). */ + union { + struct ofp_action actions[0]; /* buffer_id != -1 */ + uint8_t data[0]; /* buffer_id == -1 */ + } u; +}; + +enum ofp_flow_mod_command { + OFPFC_ADD, /* New flow. */ + OFPFC_DELETE, /* Delete all matching flows. */ + OFPFC_DELETE_STRICT /* Strictly match wildcards. */ +}; + +/* Flow wildcards. */ +enum ofp_flow_wildcards { + OFPFW_IN_PORT = 1 << 0, /* Switch input port. */ + OFPFW_DL_VLAN = 1 << 1, /* VLAN. */ + OFPFW_DL_SRC = 1 << 2, /* Ethernet source address. */ + OFPFW_DL_DST = 1 << 3, /* Ethernet destination address. */ + OFPFW_DL_TYPE = 1 << 4, /* Ethernet frame type. */ + OFPFW_NW_SRC = 1 << 5, /* IP source address. */ + OFPFW_NW_DST = 1 << 6, /* IP destination address. */ + OFPFW_NW_PROTO = 1 << 7, /* IP protocol. */ + OFPFW_TP_SRC = 1 << 8, /* TCP/UDP source port. */ + OFPFW_TP_DST = 1 << 9, /* TCP/UDP destination port. */ + OFPFW_ALL = (1 << 10) - 1 +}; + +/* Values below this cutoff are 802.3 packets and the two bytes + * following MAC addresses are used as a frame length. Otherwise, the + * two bytes are used as the Ethernet type. + */ +#define OFP_DL_TYPE_ETH2_CUTOFF 0x0600 + +/* Value of dl_type to indicate that the frame does not include an + * Ethernet type. + */ +#define OFP_DL_TYPE_NOT_ETH_TYPE 0x05ff + +/* Fields to match against flows */ +struct ofp_match { + uint16_t wildcards; /* Wildcard fields. */ + uint16_t in_port; /* Input switch port. */ + uint8_t dl_src[OFP_ETH_ALEN]; /* Ethernet source address. */ + uint8_t dl_dst[OFP_ETH_ALEN]; /* Ethernet destination address. */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint8_t nw_proto; /* IP protocol. */ + uint8_t pad[3]; /* Align to 32-bits */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ +}; + +/* Value used in "max_idle" to indicate that the entry is permanent */ +#define OFP_FLOW_PERMANENT 0 + +/* Flow setup and teardown (controller -> datapath). */ +struct ofp_flow_mod { + struct ofp_header header; + struct ofp_match match; /* Fields to match */ + + /* Flow actions. */ + uint16_t command; /* One of OFPFC_*. */ + uint16_t max_idle; /* Idle time before discarding (seconds). */ + uint32_t buffer_id; /* Buffered packet to apply to (or -1). */ + uint32_t group_id; /* Flow group ID (for QoS). */ + struct ofp_action actions[0]; /* The number of actions is inferred from + the length field in the header. */ +}; + +/* Flow expiration (datapath -> controller). */ +struct ofp_flow_expired { + struct ofp_header header; + struct ofp_match match; /* Description of fields */ + + uint32_t duration; /* Time flow was alive in seconds. */ + uint64_t packet_count; + uint64_t byte_count; +}; + +/* Statistics about flows that match the "match" field */ +struct ofp_flow_stats { + struct ofp_match match; /* Description of fields */ + uint32_t duration; /* Time flow has been alive in seconds. Only + used for non-aggregated results. */ + uint64_t packet_count; + uint64_t byte_count; +}; + +enum { + OFPFS_INDIV, /* Send an entry for each matching flow */ + OFPFS_AGGREGATE /* Aggregate matching flows */ +}; + +/* Current flow statistics request */ +struct ofp_flow_stat_request { + struct ofp_header header; + struct ofp_match match; /* Fields to match */ + uint8_t type; /* One of OFPFS_ */ + uint8_t pad[3]; /* Align to 32-bits */ +}; + +/* Current flow statistics reply */ +struct ofp_flow_stat_reply { + struct ofp_header header; + + /* If request was of type OFPFS_INDIV, this will contain an array of + * flow statistic entries. The number of matching flows is likely + * much larger than can fit in a single OpenFlow message, so a + * a response with no flows included is sent to indicate the end. + * If it was a OFPFS_AGGREGATE request, only a single flow stats + * entry will be contained in the response. + */ + struct ofp_flow_stats flows[0]; +}; + +/* Table attributes collected at runtime */ +struct ofp_table { + struct ofp_header header; + char name[OFP_MAX_TABLE_NAME_LEN]; + uint16_t table_id; + unsigned long int n_flows; + unsigned long int max_flows; +}; + +#endif /* openflow.h */ diff --git a/include/packets.h b/include/packets.h new file mode 100644 index 00000000..52a7a380 --- /dev/null +++ b/include/packets.h @@ -0,0 +1,136 @@ +#ifndef PACKETS_H +#define PACKETS_H 1 + +#include +#include "util.h" + +/* Ethernet frames. */ +#define ETH_ADDR_LEN 6 + +#define ETH_TYPE_IP 0x0800 +#define ETH_TYPE_ARP 0x0806 +#define ETH_TYPE_VLAN 0x8100 + +#define ETH_HEADER_LEN 14 +#define ETH_PAYLOAD_MIN 46 +#define ETH_TOTAL_MIN (ETH_HEADER_LEN + ETH_PAYLOAD_MIN) +struct eth_header { + uint8_t eth_dst[ETH_ADDR_LEN]; + uint8_t eth_src[ETH_ADDR_LEN]; + uint16_t eth_type; +}; +BUILD_ASSERT_DECL(ETH_HEADER_LEN == sizeof(struct eth_header)); + +#define LLC_DSAP_SNAP 0xaa +#define LLC_SSAP_SNAP 0xaa +#define LLC_CNTL_SNAP 3 + +#define LLC_HEADER_LEN 3 +struct llc_header { + uint8_t llc_dsap; + uint8_t llc_ssap; + uint8_t llc_cntl; +}; +BUILD_ASSERT_DECL(LLC_HEADER_LEN == sizeof(struct llc_header)); + +#define SNAP_ORG_ETHERNET "\0\0" /* The compiler adds a null byte, so + sizeof(SNAP_ORG_ETHERNET) == 3. */ +#define SNAP_HEADER_LEN 5 +struct snap_header { + uint8_t snap_org[3]; + uint16_t snap_type; +} __attribute__((packed)); +BUILD_ASSERT_DECL(SNAP_HEADER_LEN == sizeof(struct snap_header)); + +#define LLC_SNAP_HEADER_LEN (LLC_HEADER_LEN + SNAP_HEADER_LEN) +struct llc_snap_header { + struct llc_header llc; + struct snap_header snap; +}; +BUILD_ASSERT_DECL(LLC_SNAP_HEADER_LEN == sizeof(struct llc_snap_header)); + +#define VLAN_VID 0x0fff + +#define VLAN_HEADER_LEN 4 +struct vlan_header { + uint16_t vlan_tci; /* Lowest 12 bits are VLAN ID. */ + uint16_t vlan_next_type; +}; +BUILD_ASSERT_DECL(VLAN_HEADER_LEN == sizeof(struct vlan_header)); + +#define IP_VER(ip_ihl_ver) ((ip_ihl_ver) >> 4) +#define IP_IHL(ip_ihl_ver) ((ip_ihl_ver) & 15) + +#define IP_TYPE_TCP 6 +#define IP_TYPE_UDP 17 + +#define IP_HEADER_LEN 20 +struct ip_header { + uint8_t ip_ihl_ver; + uint8_t ip_tos; + uint16_t ip_tot_len; + uint16_t ip_id; + uint16_t ip_frag_off; + uint8_t ip_ttl; + uint8_t ip_proto; + uint16_t ip_csum; + uint32_t ip_src; + uint32_t ip_dst; +}; +BUILD_ASSERT_DECL(IP_HEADER_LEN == sizeof(struct ip_header)); + +#define UDP_HEADER_LEN 8 +struct udp_header { + uint16_t udp_src; + uint16_t udp_dst; + uint16_t udp_len; + uint16_t udp_csum; +}; +BUILD_ASSERT_DECL(UDP_HEADER_LEN == sizeof(struct udp_header)); + +#define TCP_FIN 0x01 +#define TCP_SYN 0x02 +#define TCP_RST 0x04 +#define TCP_PSH 0x08 +#define TCP_ACK 0x10 +#define TCP_URG 0x20 + +#define TCP_FLAGS(tcp_ctl) (htons(tcp_ctl) & 0x003f) +#define TCP_OFFSET(tcp_ctl) (htons(tcp_ctl) >> 12) + +#define TCP_HEADER_LEN 20 +struct tcp_header { + uint16_t tcp_src; + uint16_t tcp_dst; + uint32_t tcp_seq; + uint32_t tcp_ack; + uint16_t tcp_ctl; + uint16_t tcp_winsz; + uint16_t tcp_csum; + uint16_t tcp_urg; +}; +BUILD_ASSERT_DECL(TCP_HEADER_LEN == sizeof(struct tcp_header)); + +#define ARP_HRD_ETHERNET 1 +#define ARP_PRO_IP 0x0800 +#define ARP_OP_REQUEST 1 +#define ARP_OP_REPLY 2 + +#define ARP_ETH_HEADER_LEN 28 +struct arp_eth_header { + /* Generic members. */ + uint16_t ar_hrd; /* Hardware type. */ + uint16_t ar_pro; /* Protocol type. */ + uint8_t ar_hln; /* Hardware address length. */ + uint8_t ar_pln; /* Protocol address length. */ + uint16_t ar_op; /* Opcode. */ + + /* Ethernet+IPv4 specific members. */ + uint8_t ar_sha[ETH_ADDR_LEN]; /* Sender hardware address. */ + uint32_t ar_spa; /* Sender protocol address. */ + uint8_t ar_tha[ETH_ADDR_LEN]; /* Target hardware address. */ + uint32_t ar_tpa; /* Target protocol address. */ +} __attribute__((packed)); +BUILD_ASSERT_DECL(ARP_ETH_HEADER_LEN == sizeof(struct arp_eth_header)); + +#endif /* packets.h */ diff --git a/include/socket-util.h b/include/socket-util.h new file mode 100644 index 00000000..2d167f94 --- /dev/null +++ b/include/socket-util.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef SOCKET_UTIL_H +#define SOCKET_UTIL_H 1 + +#include +#include + +int set_nonblocking(int fd); +int lookup_ip(const char *host_name, struct in_addr *address); + +#endif /* socket-util.h */ diff --git a/include/util.h b/include/util.h new file mode 100644 index 00000000..5a12a091 --- /dev/null +++ b/include/util.h @@ -0,0 +1,84 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef UTIL_H +#define UTIL_H 1 + +#include +#include +#include +#include +#include "compiler.h" + +#ifndef __cplusplus +/* Build-time assertion for use in a statement context. */ +#define BUILD_ASSERT(EXPR) \ + sizeof(struct { unsigned int build_assert_failed : (EXPR) ? 1 : -1; }) + +/* Build-time assertion for use in a declaration context. */ +#define BUILD_ASSERT_DECL(EXPR) \ + extern int (*build_assert(void))[BUILD_ASSERT(EXPR)] +#else /* __cplusplus */ +#include +#define BUILD_ASSERT BOOST_STATIC_ASSERT +#define BUILD_ASSERT_DECL BOOST_STATIC_ASSERT +#endif /* __cplusplus */ + +extern const char *program_name; + +#define ARRAY_SIZE(ARRAY) (sizeof ARRAY / sizeof *ARRAY) +#define ROUND_UP(X, Y) (((X) + ((Y) - 1)) / (Y) * (Y)) +#define ROUND_DOWN(X, Y) ((X) / (Y) * (Y)) +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) + +#define NOT_REACHED() abort() +#define NOT_IMPLEMENTED() abort() +#define NOT_TESTED() ((void) 0) /* XXX should print a message. */ + +/* Given POINTER, the address of the given MEMBER in a STRUCT object, returns + the STRUCT object. */ +#define CONTAINER_OF(POINTER, STRUCT, MEMBER) \ + ((STRUCT *) ((char *) (POINTER) - offsetof (STRUCT, MEMBER))) + +#ifdef __cplusplus +extern "C" { +#endif + +void set_program_name(const char *); + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xrealloc(void *, size_t); +char *xstrdup(const char *); +char *xasprintf(const char *format, ...) PRINTF_FORMAT(1, 2); + +void fatal(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3) NO_RETURN; +void error(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); +void debug(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); +void debug_msg(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); +void hex_dump(FILE *, const void *, size_t, uintptr_t offset, bool ascii); + +#ifdef __cplusplus +} +#endif + +#endif /* util.h */ diff --git a/include/vconn.h b/include/vconn.h new file mode 100644 index 00000000..f6da0ad4 --- /dev/null +++ b/include/vconn.h @@ -0,0 +1,154 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VCONN_H +#define VCONN_H 1 + +#include +#include + +struct buffer; +struct flow; +struct pollfd; + +/* Client interface. */ + +/* Virtual connection to an OpenFlow device. */ +struct vconn { + struct vconn_class *class; +}; + +/* What kind of operation do we want to perform? */ +enum { + WANT_ACCEPT = 1 << 0, /* Want to accept a new connection. */ + WANT_RECV = 1 << 1, /* Want to receive a message. */ + WANT_SEND = 1 << 2 /* Want to send a message. */ +}; + +int vconn_open(const char *name, struct vconn **); +void vconn_close(struct vconn *); +bool vconn_is_passive(const struct vconn *); +void vconn_prepoll(struct vconn *, int want, struct pollfd *); +void vconn_postpoll(struct vconn *, short int *revents); +int vconn_accept(struct vconn *, struct vconn **); +int vconn_recv(struct vconn *, struct buffer **); +int vconn_send(struct vconn *, struct buffer *); +int vconn_send_wait(struct vconn *, struct buffer *); + +struct buffer *make_add_simple_flow(const struct flow *, + uint32_t buffer_id, uint16_t out_port); +struct buffer *make_buffered_packet_out(uint32_t buffer_id, + uint16_t in_port, uint16_t out_port); +struct buffer *make_unbuffered_packet_out(const struct buffer *packet, + uint16_t in_port, uint16_t out_port); + +/* Provider interface. */ + +struct vconn_class { + /* Prefix for connection names, e.g. "nl", "tcp". */ + const char *name; + + /* Attempts to connect to an OpenFlow device. 'name' is the full + * connection name provided by the user, e.g. "nl:0", "tcp:1.2.3.4". This + * name is useful for error messages but must not be modified. + * + * 'suffix' is a copy of 'name' following the colon and may be modified. + * + * Returns 0 if successful, otherwise a positive errno value. If + * successful, stores a pointer to the new connection in '*vconnp'. */ + int (*open)(const char *name, char *suffix, struct vconn **vconnp); + + /* Closes 'vconn' and frees associated memory. */ + void (*close)(struct vconn *vconn); + + /* Called by the main loop before calling poll(), this function must + * initialize 'pfd->fd' and 'pfd->events' appropriately so that poll() will + * wake up when the connection becomes available for the operations + * specified in 'want'. The prepoll function may also set bits in 'pfd' to + * allow for internal processing. */ + void (*prepoll)(struct vconn *, int want, struct pollfd *pfd); + + /* Called by the main loop after calling poll(), this function may perform + * any internal processing needed by the connection. It is provided with + * the vconn file descriptor's status in '*revents', as reported by poll(). + * + * The postpoll function should adjust '*revents' to reflect the status of + * the connection from the caller's point of view: that is, upon return + * '*revents & POLLIN' should indicate that a packet is (potentially) ready + * to be read (for an active vconn) or a new connection is ready to be + * accepted (for a passive vconn) and '*revents & POLLOUT' should indicate + * that a packet is (potentially) ready to be written. + * + * This function may be a null pointer in a vconn class that has no use for + * it, that is, if the vconn does not need to do any internal processing + * and poll's revents out properly reflects the vconn's status. */ + void (*postpoll)(struct vconn *, short int *revents); + + /* Tries to accept a new connection on 'vconn', which must be a passive + * vconn. If successful, stores the new connection in '*new_vconnp' and + * returns 0. Otherwise, returns a positive errno value. + * + * The accept function must not block waiting for a connection. If no + * connection is ready to be accepted, it should return EAGAIN. + * + * Nonnull iff this is a passive vconn (one that accepts connection and + * does not transfer data). */ + int (*accept)(struct vconn *vconn, struct vconn **new_vconnp); + + /* Tries to receive an OpenFlow message from 'vconn', which must be an + * active vconn. If successful, stores the received message into '*msgp' + * and returns 0. The caller is responsible for destroying the message + * with buffer_delete(). On failure, returns a positive errno value and + * stores a null pointer into '*msgp'. + * + * If the connection has been closed in the normal fashion, returns EOF. + * + * The recv function must not block waiting for a packet to arrive. If no + * packets have been received, it should return EAGAIN. + * + * Nonnull iff this is an active vconn (one that transfers data and does + * not accept connections). */ + int (*recv)(struct vconn *vconn, struct buffer **msgp); + + /* Tries to queue 'msg' for transmission on 'vconn', which must be an + * active vconn. If successful, returns 0, in which case ownership of + * 'msg' is transferred to the vconn. Success does not guarantee that + * 'msg' has been or ever will be delivered to the peer, only that it has + * been queued for transmission. + * + * Returns a positive errno value on failure, in which case the caller + * retains ownership of 'msg'. + * + * The send function must not block. If 'msg' cannot be immediately + * accepted for transmission, it should return EAGAIN. + * + * Nonnull iff this is an active vconn (one that transfers data and does + * not accept connections). */ + int (*send)(struct vconn *vconn, struct buffer *msg); +}; + +extern struct vconn_class tcp_vconn_class; +extern struct vconn_class ptcp_vconn_class; +#ifdef HAVE_NETLINK +extern struct vconn_class netlink_vconn_class; +#endif + +#endif /* vconn.h */ diff --git a/include/vlog-socket.h b/include/vlog-socket.h new file mode 100644 index 00000000..90ec97cb --- /dev/null +++ b/include/vlog-socket.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VLOG_SOCKET_H +#define VLOG_SOCKET_H 1 + +/* Server for Vlog control connection. */ +struct vlog_server; +int vlog_server_listen(const char *path, struct vlog_server **); +void vlog_server_close(struct vlog_server *); +int vlog_server_get_fd(const struct vlog_server *); +void vlog_server_poll(struct vlog_server *); + +/* Client for Vlog control connection. */ +struct vlog_client; +int vlog_client_connect(const char *path, struct vlog_client **); +void vlog_client_close(struct vlog_client *); +int vlog_client_send(struct vlog_client *, const char *request); +int vlog_client_recv(struct vlog_client *, char **reply); +int vlog_client_transact(struct vlog_client *, + const char *request, char **reply); +const char *vlog_client_target(const struct vlog_client *); + +#endif /* vlog-socket.h */ diff --git a/include/vlog.h b/include/vlog.h new file mode 100644 index 00000000..f47324b8 --- /dev/null +++ b/include/vlog.h @@ -0,0 +1,100 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VLOG_H +#define VLOG_H 1 + +#include + +/* Logging importance levels. */ +enum vlog_level { + VLL_EMER, + VLL_ERR, + VLL_WARN, + VLL_DBG, + VLL_N_LEVELS +}; + +const char *vlog_get_level_name(enum vlog_level); +enum vlog_level vlog_get_level_val(const char *name); + +/* Facilities that we can log to. */ +enum vlog_facility { + VLF_SYSLOG, + VLF_CONSOLE, + VLF_N_FACILITIES, + VLF_ANY_FACILITY = -1 +}; + +const char *vlog_get_facility_name(enum vlog_facility); +enum vlog_facility vlog_get_facility_val(const char *name); + +/* Modules that can emit log messages. */ +#define VLOG_MODULES \ + VLOG_MODULE(controller) \ + VLOG_MODULE(ctlpath) \ + VLOG_MODULE(dpif) \ + VLOG_MODULE(dpctl) \ + VLOG_MODULE(fault) \ + VLOG_MODULE(flow) \ + VLOG_MODULE(netlink) \ + VLOG_MODULE(secchan) \ + VLOG_MODULE(socket_util) \ + VLOG_MODULE(vconn_netlink) \ + VLOG_MODULE(vconn_tcp) \ + VLOG_MODULE(vconn) \ + +/* VLM_ constant for each vlog module. */ +enum vlog_module { +#define VLOG_MODULE(NAME) VLM_##NAME, + VLOG_MODULES +#undef VLOG_MODULE + VLM_N_MODULES, + VLM_ANY_MODULE = -1 +}; + +const char *vlog_get_module_name(enum vlog_module); +enum vlog_module vlog_get_module_val(const char *name); + +/* Configuring how each module logs messages. */ +enum vlog_level vlog_get_level(enum vlog_module, enum vlog_facility); +void vlog_set_levels(enum vlog_module, enum vlog_facility, enum vlog_level); +char *vlog_set_levels_from_string(const char *); +char *vlog_get_levels(void); +void vlog_set_verbosity(const char *arg); + +/* Function for actual logging. */ +void vlog_init(void); +void vlog_exit(void); +void vlog(enum vlog_module, enum vlog_level, const char *format, ...) + __attribute__((format(printf, 3, 4))); + +/* Convenience macros. To use these, define THIS_MODULE as a macro that + * expands to the module used by the current source file, e.g. + * #include "vlog.h" + * #define THIS_MODULE VLM_NETLINK + */ +#define VLOG_EMER(...) vlog(THIS_MODULE, VLL_EMER, __VA_ARGS__) +#define VLOG_ERR(...) vlog(THIS_MODULE, VLL_ERR, __VA_ARGS__) +#define VLOG_WARN(...) vlog(THIS_MODULE, VLL_WARN, __VA_ARGS__) +#define VLOG_DBG(...) vlog(THIS_MODULE, VLL_DBG, __VA_ARGS__) + +#endif /* vlog.h */ diff --git a/include/xtoxll.h b/include/xtoxll.h new file mode 100644 index 00000000..22070941 --- /dev/null +++ b/include/xtoxll.h @@ -0,0 +1,19 @@ +#ifndef XTOXLL_H +#define XTOXLL_H 1 + +#include +#include + +static inline uint64_t +htonll(uint64_t n) +{ + return htonl(1) == 1 ? n : ((uint64_t) htonl(n) << 32) | htonl(n >> 32); +} + +static inline uint64_t +ntohll(uint64_t n) +{ + return htonl(1) == 1 ? n : ((uint64_t) ntohl(n) << 32) | ntohl(n >> 32); +} + +#endif /* xtonll.h */ diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/lib/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/lib/Makefile.am b/lib/Makefile.am new file mode 100644 index 00000000..d9a490c5 --- /dev/null +++ b/lib/Makefile.am @@ -0,0 +1,27 @@ +include ../Make.vars + +noinst_LTLIBRARIES = libopenflow.la + +libopenflow_la_SOURCES = \ + buffer.c \ + command-line.c \ + dynamic-string.c \ + fatal-signal.c \ + fault.c \ + flow.c \ + hash.c \ + list.c \ + ofp-print.c \ + socket-util.c \ + util.c \ + vconn-tcp.c \ + vconn.c \ + vlog-socket.c \ + vlog.c + +if HAVE_NETLINK +libopenflow_la_SOURCES += \ + dpif.c \ + netlink.c \ + vconn-netlink.c +endif diff --git a/lib/buffer.c b/lib/buffer.c new file mode 100644 index 00000000..0ce1045e --- /dev/null +++ b/lib/buffer.c @@ -0,0 +1,192 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "buffer.h" +#include +#include +#include +#include "util.h" + +/* Initializes 'b' as an empty buffer that contains the 'allocated' bytes of + * memory starting at 'base'. + * + * 'base' should ordinarily be the first byte of a region obtained from + * malloc(), but in circumstances where it can be guaranteed that 'b' will + * never need to be expanded or freed, it can be a pointer into arbitrary + * memory. */ +void +buffer_use(struct buffer *b, void *base, size_t allocated) +{ + b->base = b->data = base; + b->allocated = allocated; + b->size = 0; + b->next = NULL; +} + +/* Initializes 'b' as a buffer with an initial capacity of 'size' bytes. */ +void +buffer_init(struct buffer *b, size_t size) +{ + buffer_use(b, size ? xmalloc(size) : NULL, size); +} + +/* Frees memory that 'b' points to. */ +void +buffer_uninit(struct buffer *b) +{ + if (b) { + free(b->base); + } +} + +/* Frees memory that 'b' points to and allocates a new buffer */ +void +buffer_reinit(struct buffer *b, size_t size) +{ + buffer_uninit(b); + buffer_init(b, size); +} + +/* Creates and returns a new buffer with an initial capacity of 'size' + * bytes. */ +struct buffer * +buffer_new(size_t size) +{ + struct buffer *b = xmalloc(sizeof *b); + buffer_init(b, size); + return b; +} + +/* Frees memory that 'b' points to, as well as 'b' itself. */ +void +buffer_delete(struct buffer *b) +{ + if (b) { + buffer_uninit(b); + free(b); + } +} + +/* Returns the number of bytes of headroom in 'b', that is, the number of bytes + * of unused space in buffer 'b' before the data that is in use. (Most + * commonly, the data in a buffer is at its beginning, and thus the buffer's + * headroom is 0.) */ +size_t +buffer_headroom(struct buffer *b) +{ + return b->data - b->base; +} + +/* Returns the number of bytes that may be appended to the tail end of buffer + * 'b' before the buffer must be reallocated. */ +size_t +buffer_tailroom(struct buffer *b) +{ + return buffer_end(b) - buffer_tail(b); +} + +/* Ensures that 'b' has room for at least 'size' bytes at its tail end, + * reallocating and copying its data if necessary. */ +void +buffer_reserve_tailroom(struct buffer *b, size_t size) +{ + if (size > buffer_tailroom(b)) { + size_t headroom = buffer_headroom(b); + size_t new_allocated = b->allocated + MAX(size, 64); + void *new_base = xmalloc(new_allocated); + memcpy(new_base, b->base, b->allocated); + free(b->base); + b->base = new_base; + b->allocated = new_allocated; + b->data = new_base + headroom; + } +} + +/* Appends 'size' bytes of data to the tail end of 'b', reallocating and + * copying its data if necessary. Returns a pointer to the first byte of the + * new data, which is left uninitialized. */ +void * +buffer_put_uninit(struct buffer *b, size_t size) +{ + void *p; + buffer_reserve_tailroom(b, size); + p = buffer_tail(b); + b->size += size; + return p; +} + +/* Appends the 'size' bytes of data in 'p' to the tail end of 'b'. Data in 'b' + * is reallocated and copied if necessary. */ +void +buffer_put(struct buffer *b, const void *p, size_t size) +{ + memcpy(buffer_put_uninit(b, size), p, size); +} + +/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to + * byte 'offset'. Otherwise, returns a null pointers. */ +void * +buffer_at(const struct buffer *b, size_t offset, size_t size) +{ + return offset + size <= b->size ? (char *) b->data + offset : NULL; +} + +/* Returns a pointer to byte 'offset' in 'b', which must contain at least + * 'offset + size' bytes of data. */ +void * +buffer_at_assert(const struct buffer *b, size_t offset, size_t size) +{ + assert(offset + size <= b->size); + return ((char *) b->data) + offset; +} + +/* Returns the byte following the last byte of data in use in 'b'. */ +void * +buffer_tail(const struct buffer *b) +{ + return (char *) b->data + b->size; +} + +/* Returns the byte following the last byte allocated for use (but not + * necessarily in use) by 'b'. */ +void * +buffer_end(const struct buffer *b) +{ + return (char *) b->base + b->allocated; +} + +/* Clears any data from 'b'. */ +void +buffer_clear(struct buffer *b) +{ + b->data = b->base; + b->size = 0; +} + +/* Removes 'size' bytes from the head end of 'b', which must contain at least + * 'size' bytes of data. */ +void +buffer_pull(struct buffer *b, size_t size) +{ + assert(b->size >= size); + b->data += size; + b->size -= size; +} diff --git a/lib/command-line.c b/lib/command-line.c new file mode 100644 index 00000000..fbd8e8eb --- /dev/null +++ b/lib/command-line.c @@ -0,0 +1,53 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "command-line.h" +#include +#include +#include "util.h" +#include "vlog.h" + +/* Given the GNU-style long options in 'options', returns a string that may be + * passed to getopt() with the corresponding short options. The caller is + * responsible for freeing the string. */ +char * +long_options_to_short_options(const struct option options[]) +{ + char short_options[UCHAR_MAX * 3 + 1]; + char *p = short_options; + + for (; options->name; options++) { + const struct option *o = options; + if (o->flag == NULL && o->val > 0 && o->val <= UCHAR_MAX) { + *p++ = o->val; + if (o->has_arg == required_argument) { + *p++ = ':'; + } else if (o->has_arg == optional_argument) { + *p++ = ':'; + *p++ = ':'; + } + } + } + *p = '\0'; + + return xstrdup(short_options); +} + diff --git a/lib/dpif.c b/lib/dpif.c new file mode 100644 index 00000000..6bd6fef7 --- /dev/null +++ b/lib/dpif.c @@ -0,0 +1,519 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dpif.h" + +#include +#include +#include +#include +#include +#include + +#include "buffer.h" +#include "mac.h" +#include "netlink.h" +#include "ofp-print.h" +#include "openflow-netlink.h" +#include "openflow.h" +#include "util.h" +#include "xtoxll.h" + +#include "vlog.h" +#define THIS_MODULE VLM_dpif + +/* The Generic Netlink family number used for OpenFlow. */ +static int openflow_family; + +static int lookup_openflow_multicast_group(int dp_idx, int *multicast_group); +static int send_mgmt_command(struct dpif *, int command, + const char *netdev); + +/* Opens the local datapath numbered 'dp_idx', initializing 'dp'. If + * 'subscribe' is true, listens for asynchronous messages (packet-in, etc.) + * from the datapath; otherwise, 'dp' will receive only replies to explicitly + * initiated requests. */ +int +dpif_open(int dp_idx, bool subscribe, struct dpif *dp) +{ + struct nl_sock *sock; + int multicast_group = 0; + int retval; + + retval = nl_lookup_genl_family(DP_GENL_FAMILY_NAME, &openflow_family); + if (retval) { + return retval; + } + + if (subscribe) { + retval = lookup_openflow_multicast_group(dp_idx, &multicast_group); + if (retval) { + return retval; + } + } + + /* Specify a large so_rcvbuf size because we occasionally need to be able + * to retrieve large collections of flow records. */ + retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, + 4 * 1024u * 1024, &sock); + if (retval) { + return retval; + } + + dp->dp_idx = dp_idx; + dp->sock = sock; + return 0; +} + +/* Closes 'dp'. */ +void +dpif_close(struct dpif *dp) +{ + nl_sock_destroy(dp->sock); +} + +static const struct nl_policy openflow_policy[] = { + [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 }, + [DP_GENL_A_OPENFLOW] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct ofp_header), + .max_len = OFP_MAXLEN }, +}; + +/* Tries to receive an openflow message from the kernel on 'sock'. If + * successful, stores the received message into '*msgp' and returns 0. The + * caller is responsible for destroying the message with buffer_delete(). On + * failure, returns a positive errno value and stores a null pointer into + * '*msgp'. + * + * Only Netlink messages with embedded OpenFlow messages are accepted. Other + * Netlink messages provoke errors. + * + * If 'wait' is true, dpif_recv_openflow waits for a message to be ready; + * otherwise, returns EAGAIN if the 'sock' receive buffer is empty. */ +int +dpif_recv_openflow(struct dpif *dp, struct buffer **bufferp, + bool wait) +{ + struct nlattr *attrs[ARRAY_SIZE(openflow_policy)]; + struct buffer *buffer; + struct ofp_header *oh; + size_t ofp_len; + int retval; + + *bufferp = NULL; + do { + retval = nl_sock_recv(dp->sock, &buffer, wait); + } while (retval == ENOBUFS || (!retval && nl_msg_nlmsgerr(buffer, NULL))); + if (retval) { + if (retval != EAGAIN) { + VLOG_WARN("dpif_recv_openflow: %s", strerror(retval)); + } + return retval; + } + + if (nl_msg_genlmsghdr(buffer) == NULL) { + VLOG_DBG("received packet too short for Generic Netlink"); + goto error; + } + if (nl_msg_nlmsghdr(buffer)->nlmsg_type != openflow_family) { + VLOG_DBG("received type (%"PRIu16") != openflow family (%d)", + nl_msg_nlmsghdr(buffer)->nlmsg_type, openflow_family); + goto error; + } + + if (!nl_policy_parse(buffer, openflow_policy, attrs, + ARRAY_SIZE(openflow_policy))) { + goto error; + } + if (nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]) != dp->dp_idx) { + VLOG_WARN("received dp_idx (%"PRIu32") differs from expected (%d)", + nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]), dp->dp_idx); + goto error; + } + + oh = buffer->data = (void *) nl_attr_get(attrs[DP_GENL_A_OPENFLOW]); + buffer->size = nl_attr_get_size(attrs[DP_GENL_A_OPENFLOW]); + ofp_len = ntohs(oh->length); + if (ofp_len != buffer->size) { + VLOG_WARN("ofp_header.length %"PRIu16" != attribute length %zu\n", + ofp_len, buffer->size); + buffer->size = MIN(ofp_len, buffer->size); + } + *bufferp = buffer; + return 0; + +error: + buffer_delete(buffer); + return EPROTO; +} + +/* Encapsulates 'msg', which must contain an OpenFlow message, in a Netlink + * message, and sends it to the OpenFlow kernel module via 'sock'. + * + * Returns 0 if successful, otherwise a positive errno value. If + * 'wait' is true, then the send will wait until buffer space is ready; + * otherwise, returns EAGAIN if the 'sock' send buffer is full. + * + * If the send is successful, then the kernel module will receive it, but there + * is no guarantee that any reply will not be dropped (see nl_sock_transact() + * for details). + */ +int +dpif_send_openflow(struct dpif *dp, struct buffer *buffer, bool wait) +{ + struct buffer hdr; + struct nlattr *nla; + uint32_t fixed_buffer[64 / 4]; + struct iovec iov[3]; + int pad_bytes; + int n_iov; + int retval; + + buffer_use(&hdr, fixed_buffer, sizeof fixed_buffer); + nl_msg_put_genlmsghdr(&hdr, dp->sock, 32, openflow_family, + NLM_F_REQUEST, DP_GENL_C_OPENFLOW, 1); + nl_msg_put_u32(&hdr, DP_GENL_A_DP_IDX, dp->dp_idx); + nla = buffer_put_uninit(&hdr, sizeof nla); + nla->nla_len = sizeof nla + buffer->size; + nla->nla_type = DP_GENL_A_OPENFLOW; + pad_bytes = NLA_ALIGN(nla->nla_len) - nla->nla_len; + nl_msg_nlmsghdr(&hdr)->nlmsg_len = hdr.size + buffer->size + pad_bytes; + n_iov = 2; + iov[0].iov_base = hdr.data; + iov[0].iov_len = hdr.size; + iov[1].iov_base = buffer->data; + iov[1].iov_len = buffer->size; + if (pad_bytes) { + static char zeros[NLA_ALIGNTO]; + n_iov++; + iov[2].iov_base = zeros; + iov[2].iov_len = pad_bytes; + } + retval = nl_sock_sendv(dp->sock, iov, n_iov, false); + if (retval && retval != EAGAIN) { + VLOG_WARN("dpif_send_openflow: %s", strerror(retval)); + } + return retval; +} + +/* Creates the datapath represented by 'dp'. Returns 0 if successful, + * otherwise a positive errno value. */ +int +dpif_add_dp(struct dpif *dp) +{ + return send_mgmt_command(dp, DP_GENL_C_ADD_DP, NULL); +} + +/* Destroys the datapath represented by 'dp'. Returns 0 if successful, + * otherwise a positive errno value. */ +int +dpif_del_dp(struct dpif *dp) +{ + return send_mgmt_command(dp, DP_GENL_C_DEL_DP, NULL); +} + +/* Adds the Ethernet device named 'netdev' to this datapath. Returns 0 if + * successful, otherwise a positive errno value. */ +int +dpif_add_port(struct dpif *dp, const char *netdev) +{ + return send_mgmt_command(dp, DP_GENL_C_ADD_PORT, netdev); +} + +/* Removes the Ethernet device named 'netdev' from this datapath. Returns 0 + * if successful, otherwise a positive errno value. */ +int +dpif_del_port(struct dpif *dp, const char *netdev) +{ + return send_mgmt_command(dp, DP_GENL_C_DEL_PORT, netdev); +} + +/* Prints a description of 'dp' to stdout. Returns 0 if successful, otherwise + * a positive errno value. */ +int +dpif_show(struct dpif *dp) +{ + static const struct nl_policy show_policy[] = { + [DP_GENL_A_DP_INFO] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct ofp_data_hello), + .max_len = SIZE_MAX }, + }; + + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(show_policy)]; + struct ofp_data_hello *odh; + int retval; + size_t len; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, + NLM_F_REQUEST, DP_GENL_C_SHOW_DP, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + return retval; + } + if (!nl_policy_parse(reply, show_policy, attrs, + ARRAY_SIZE(show_policy))) { + buffer_delete(reply); + return EPROTO; + } + + odh = (void *) nl_attr_get(attrs[DP_GENL_A_DP_INFO]); + if (odh->header.version != OFP_VERSION + || odh->header.type != OFPT_DATA_HELLO) { + VLOG_ERR("bad show query response (%"PRIu8",%"PRIu8")", + odh->header.version, odh->header.type); + buffer_delete(reply); + return EPROTO; + } + + len = nl_attr_get_size(attrs[DP_GENL_A_DP_INFO]); + ofp_print_data_hello(stdout, odh, len, 1); + + return retval; +} + +static const struct nl_policy table_policy[] = { + [DP_GENL_A_NUMTABLES] = { .type = NL_A_U32 }, + [DP_GENL_A_TABLE] = { .type = NL_A_UNSPEC }, +}; + +/* Writes a description of 'dp''s tables to stdout. Returns 0 if successful, + * otherwise a positive errno value. */ +int +dpif_dump_tables(struct dpif *dp) +{ + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(table_policy)]; + const struct ofp_table *tables; + int n_tables; + int i; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, + NLM_F_REQUEST, DP_GENL_C_QUERY_TABLE, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + return retval; + } + if (!nl_policy_parse(reply, table_policy, attrs, + ARRAY_SIZE(table_policy))) { + buffer_delete(reply); + return EPROTO; + } + + tables = nl_attr_get(attrs[DP_GENL_A_TABLE]); + n_tables = (nl_attr_get_size(attrs[DP_GENL_A_TABLE]) + / sizeof(struct ofp_table)); + n_tables = MIN(n_tables, nl_attr_get_u32(attrs[DP_GENL_A_NUMTABLES])); + for (i = 0; i < n_tables; i++) { + const struct ofp_table *ot = &tables[i]; + if (ot->header.version != 1 || ot->header.type != OFPT_TABLE) { + VLOG_DBG("bad table query response (%"PRIu8",%"PRIu8")", + ot->header.version, ot->header.type); + retval = EPROTO; + break; + } + + ofp_print_table(stdout, ot); + fprintf(stdout,"\n"); + } + buffer_delete(reply); + + return retval; +} + +static const struct nl_policy flow_policy[] = { + [DP_GENL_A_TABLEIDX] = { .type = NL_A_U16 }, + [DP_GENL_A_NUMFLOWS] = { .type = NL_A_U32 }, + [DP_GENL_A_FLOW] = { .type = NL_A_UNSPEC }, +}; + +struct _dump_ofp_flow_mod +{ + struct ofp_flow_mod ofm; + struct ofp_action oa; +}; + +/* Writes a description of flows in the given 'table' in 'dp' to stdout. If + * 'match' is null, all flows in the table are written; otherwise, only + * matching flows are written. Returns 0 if successful, otherwise a positive + * errno value. */ +int +dpif_dump_flows(struct dpif *dp, int table, struct ofp_match *match) +{ + struct buffer request, *reply; + struct ofp_flow_mod *ofm; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, NLM_F_REQUEST, + DP_GENL_C_QUERY_FLOW, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + nl_msg_put_u16(&request, DP_GENL_A_TABLEIDX, table); + ofm = nl_msg_put_unspec_uninit(&request, DP_GENL_A_FLOW, sizeof *ofm); + memset(ofm, 0, sizeof *ofm); + ofm->header.version = 1; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(sizeof ofm); + if (match) { + ofm->match = *match; + } else { + ofm->match.wildcards = htons(OFPFW_ALL); + } + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + return retval; + } + + for (;;) { + struct nlattr *attrs[ARRAY_SIZE(flow_policy)]; + const struct _dump_ofp_flow_mod *flows, *ofm; + int n_flows; + + if (!nl_policy_parse(reply, flow_policy, attrs, + ARRAY_SIZE(flow_policy))) { + buffer_delete(reply); + return EPROTO; + } + n_flows = (nl_attr_get_size(attrs[DP_GENL_A_FLOW]) + / sizeof(struct ofp_flow_mod)); + n_flows = MIN(n_flows, nl_attr_get_u32(attrs[DP_GENL_A_NUMFLOWS])); + if (n_flows <= 0) { + break; + } + + flows = nl_attr_get(attrs[DP_GENL_A_FLOW]); + for (ofm = flows; ofm < &flows[n_flows]; ofm++) { + if (ofm->ofm.header.version != 1){ + VLOG_DBG("recv_dp_flow incorrect version"); + buffer_delete(reply); + return EPROTO; + } else if (ofm->ofm.header.type != OFPT_FLOW_MOD) { + VLOG_DBG("recv_fp_flow bad return message type"); + buffer_delete(reply); + return EPROTO; + } + + ofp_print_flow_mod(stdout, &ofm->ofm, + sizeof(struct ofp_flow_mod), 1); + putc('\n', stdout); + } + + buffer_delete(reply); + retval = nl_sock_recv(dp->sock, &reply, true); + if (retval) { + return retval; + } + } + return 0; +} + +/* Tells dp to send num_packets up through netlink for benchmarking*/ +int +dpif_benchmark_nl(struct dpif *dp, uint32_t num_packets, uint32_t packet_size) +{ + struct buffer request; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 0, openflow_family, + NLM_F_REQUEST, DP_GENL_C_BENCHMARK_NL, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + nl_msg_put_u32(&request, DP_GENL_A_NPACKETS, num_packets); + nl_msg_put_u32(&request, DP_GENL_A_PSIZE, packet_size); + retval = nl_sock_send(dp->sock, &request, true); + buffer_uninit(&request); + + return retval; +} + +static const struct nl_policy openflow_multicast_policy[] = { + [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 }, + [DP_GENL_A_MC_GROUP] = { .type = NL_A_U32 }, +}; + +/* Looks up the Netlink multicast group used by datapath 'dp_idx'. If + * successful, stores the multicast group in '*multicast_group' and returns 0. + * Otherwise, returns a positve errno value. */ +static int +lookup_openflow_multicast_group(int dp_idx, int *multicast_group) +{ + struct nl_sock *sock; + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(openflow_multicast_policy)]; + int retval; + + retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + if (retval) { + return retval; + } + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, sock, 0, openflow_family, NLM_F_REQUEST, + DP_GENL_C_QUERY_DP, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx); + retval = nl_sock_transact(sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + nl_sock_destroy(sock); + return retval; + } + if (!nl_policy_parse(reply, openflow_multicast_policy, attrs, + ARRAY_SIZE(openflow_multicast_policy))) { + nl_sock_destroy(sock); + buffer_delete(reply); + return EPROTO; + } + *multicast_group = nl_attr_get_u32(attrs[DP_GENL_A_MC_GROUP]); + nl_sock_destroy(sock); + buffer_delete(reply); + + return 0; +} + +/* Sends the given 'command' to datapath 'dp'. If 'netdev' is nonnull, adds it + * to the command as the port name attribute. Returns 0 if successful, + * otherwise a positive errno value. */ +static int +send_mgmt_command(struct dpif *dp, int command, const char *netdev) +{ + struct buffer request, *reply; + int retval; + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, dp->sock, 32, openflow_family, + NLM_F_REQUEST | NLM_F_ACK, command, 1); + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp->dp_idx); + if (netdev) { + nl_msg_put_string(&request, DP_GENL_A_PORTNAME, netdev); + } + retval = nl_sock_transact(dp->sock, &request, &reply); + buffer_uninit(&request); + buffer_delete(reply); + + return retval; +} diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c new file mode 100644 index 00000000..610bb7c8 --- /dev/null +++ b/lib/dynamic-string.c @@ -0,0 +1,98 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dynamic-string.h" +#include +#include +#include "util.h" + +void +ds_init(struct ds *ds) +{ + ds->string = NULL; + ds->length = 0; + ds->allocated = 0; +} + +void +ds_reserve(struct ds *ds, size_t min_length) +{ + if (min_length > ds->allocated || !ds->string) { + ds->allocated += MAX(min_length, ds->allocated); + ds->allocated = MAX(8, ds->allocated); + ds->string = xrealloc(ds->string, ds->allocated + 1); + } +} + +void +ds_put_format(struct ds *ds, const char *format, ...) +{ + va_list args; + + va_start(args, format); + ds_put_format_valist(ds, format, args); + va_end(args); +} + +void +ds_put_format_valist(struct ds *ds, const char *format, va_list args_) +{ + va_list args; + size_t available; + int needed; + + va_copy(args, args_); + available = ds->string ? ds->allocated - ds->length + 1 : 0; + needed = vsnprintf(&ds->string[ds->length], available, format, args); + va_end(args); + + if (needed < available) { + ds->length += needed; + } else { + size_t available; + + ds_reserve(ds, ds->length + needed); + + va_copy(args, args_); + available = ds->allocated - ds->length + 1; + needed = vsnprintf(&ds->string[ds->length], available, format, args); + va_end(args); + + assert(needed < available); + ds->length += needed; + } +} + +char * +ds_cstr(struct ds *ds) +{ + if (!ds->string) { + ds_reserve(ds, 0); + ds->string[0] = '\0'; + } + return ds->string; +} + +void +ds_destroy(struct ds *ds) +{ + free(ds->string); +} diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c new file mode 100644 index 00000000..5ab62848 --- /dev/null +++ b/lib/fatal-signal.c @@ -0,0 +1,181 @@ +#include "fatal-signal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" + +/* Signals to catch. */ +static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP }; + +/* Signals to catch as a sigset_t. */ +static sigset_t fatal_signal_set; + +/* Hooks to call upon catching a signal */ +struct hook { + void (*func)(void *aux); + void *aux; +}; +#define MAX_HOOKS 32 +static struct hook hooks[MAX_HOOKS]; +static size_t n_hooks; + +/* Number of nesting signal blockers. */ +static int block_level = 0; + +/* Signal mask saved by outermost signal blocker. */ +static sigset_t saved_signal_mask; + +static void call_sigprocmask(int how, sigset_t* new_set, sigset_t* old_set); +static void signal_handler(int sig_nr); + +/* Registers 'hook' to be called when a process termination signal is + * raised. */ +void +fatal_signal_add_hook(void (*func)(void *aux), void *aux) +{ + fatal_signal_block(); + assert(n_hooks < MAX_HOOKS); + hooks[n_hooks].func = func; + hooks[n_hooks].aux = aux; + n_hooks++; + fatal_signal_unblock(); +} + +/* Blocks program termination signals until fatal_signal_unblock() is called. + * May be called multiple times with nesting; if so, fatal_signal_unblock() + * must be called the same number of times to unblock signals. + * + * This is needed while adjusting a data structure that will be accessed by a + * fatal signal hook, so that the hook is not invoked while the data structure + * is in an inconsistent state. */ +void +fatal_signal_block() +{ + static bool inited = false; + if (!inited) { + size_t i; + + inited = true; + sigemptyset(&fatal_signal_set); + for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) { + int sig_nr = fatal_signals[i]; + sigaddset(&fatal_signal_set, sig_nr); + if (signal(sig_nr, signal_handler) == SIG_IGN) { + signal(sig_nr, SIG_IGN); + } + } + } + + if (++block_level == 1) { + call_sigprocmask(SIG_BLOCK, &fatal_signal_set, &saved_signal_mask); + } +} + +/* Unblocks program termination signals blocked by fatal_signal_block() is + * called. If multiple calls to fatal_signal_block() are nested, + * fatal_signal_unblock() must be called the same number of times to unblock + * signals. */ +void +fatal_signal_unblock() +{ + assert(block_level > 0); + if (--block_level == 0) { + call_sigprocmask(SIG_SETMASK, &saved_signal_mask, NULL); + } +} + +static char **files; +static size_t n_files, max_files; + +static void unlink_files(void *aux); +static void do_unlink_files(void); + +/* Registers 'file' to be unlinked when the program terminates via exit() or a + * fatal signal. */ +void +fatal_signal_add_file_to_unlink(const char *file) +{ + static bool added_hook = false; + if (!added_hook) { + added_hook = true; + fatal_signal_add_hook(unlink_files, NULL); + atexit(do_unlink_files); + } + + fatal_signal_block(); + if (n_files >= max_files) { + max_files = max_files * 2 + 1; + files = xrealloc(files, sizeof *files * max_files); + } + files[n_files++] = xstrdup(file); + fatal_signal_unblock(); +} + +/* Unregisters 'file' from being unlinked when the program terminates via + * exit() or a fatal signal. */ +void +fatal_signal_remove_file_to_unlink(const char *file) +{ + size_t i; + + fatal_signal_block(); + for (i = 0; i < n_files; i++) { + if (!strcmp(files[i], file)) { + free(files[i]); + files[i] = files[--n_files]; + break; + } + } + fatal_signal_unblock(); +} + +static void +unlink_files(void *aux UNUSED) +{ + do_unlink_files(); +} + +static void +do_unlink_files(void) +{ + size_t i; + + for (i = 0; i < n_files; i++) { + unlink(files[i]); + } +} + +static void +call_sigprocmask(int how, sigset_t* new_set, sigset_t* old_set) +{ + int error = sigprocmask(how, new_set, old_set); + if (error) { + fprintf(stderr, "sigprocmask: %s\n", strerror(errno)); + } +} + +static void +signal_handler(int sig_nr) +{ + volatile sig_atomic_t recurse = 0; + if (!recurse) { + size_t i; + + recurse = 1; + + /* Call all the hooks. */ + for (i = 0; i < n_hooks; i++) { + hooks[i].func(hooks[i].aux); + } + } + + /* Re-raise the signal with the default handling so that the program + * termination status reflects that we were killed by this signal */ + signal(sig_nr, SIG_DFL); + raise(sig_nr); +} diff --git a/lib/fault.c b/lib/fault.c new file mode 100644 index 00000000..c1de3d81 --- /dev/null +++ b/lib/fault.c @@ -0,0 +1,77 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "fault.h" +#include +#include +#include +#include +#include +#include +#include "util.h" + +#include "vlog.h" +#define THIS_MODULE VLM_fault + +void +fault_handler(int sig_nr) +{ + VLOG_EMER("Caught signal %d.", sig_nr); + log_backtrace(); + fflush(stdout); + fflush(stderr); + + signal(sig_nr, SIG_DFL); + raise(sig_nr); +} + +void +log_backtrace(void) +{ + /* During the loop: + + frame[0] points to the next frame. + frame[1] points to the return address. */ + void **frame; + for (frame = __builtin_frame_address(0); + frame != NULL && frame[0] != NULL; + frame = frame[0]) { + Dl_info addrinfo; + if (!dladdr(frame[1], &addrinfo) || !addrinfo.dli_sname) { + fprintf(stderr, " 0x%08"PRIxPTR"\n", (uintptr_t) frame[1]); + } else { + fprintf(stderr, " 0x%08"PRIxPTR" (%s+0x%x)\n", + (uintptr_t) frame[1], addrinfo.dli_sname, + (char *) frame[1] - (char *) addrinfo.dli_saddr); + } + } + fflush(stderr); +} + +void +register_fault_handlers(void) +{ + signal(SIGABRT, fault_handler); + signal(SIGBUS, fault_handler); + signal(SIGFPE, fault_handler); + signal(SIGILL, fault_handler); + signal(SIGSEGV, fault_handler); +} diff --git a/lib/flow.c b/lib/flow.c new file mode 100644 index 00000000..bccc0efe --- /dev/null +++ b/lib/flow.c @@ -0,0 +1,122 @@ +#include +#include "flow.h" +#include +#include +#include +#include "buffer.h" +#include "hash.h" +#include "ip.h" +#include "mac.h" +#include "openflow.h" +#include "packets.h" + +#include "vlog.h" +#define THIS_MODULE VLM_flow + +void +flow_extract(const struct buffer *packet, uint16_t in_port, struct flow *flow) +{ + struct buffer b = *packet; + struct eth_header *eth; + + if (b.size < ETH_TOTAL_MIN) { + VLOG_WARN("packet length %d less than minimum size %d", + b.size, ETH_TOTAL_MIN); + } + + memset(flow, 0, sizeof *flow); + flow->in_port = htons(in_port); + + eth = buffer_at(&b, 0, sizeof *eth); + if (eth) { + buffer_pull(&b, ETH_HEADER_LEN); + if (ntohs(eth->eth_type) >= OFP_DL_TYPE_ETH2_CUTOFF) { + /* This is an Ethernet II frame */ + flow->dl_type = eth->eth_type; + } else { + /* This is an 802.2 frame */ + struct llc_snap_header *h = buffer_at(&b, 0, sizeof *h); + if (h == NULL) { + return; + } + if (h->llc.llc_dsap == LLC_DSAP_SNAP + && h->llc.llc_ssap == LLC_SSAP_SNAP + && h->llc.llc_cntl == LLC_CNTL_SNAP + && !memcmp(h->snap.snap_org, SNAP_ORG_ETHERNET, + sizeof h->snap.snap_org)) { + flow->dl_type = h->snap.snap_type; + buffer_pull(&b, sizeof *h); + } else { + flow->dl_type = OFP_DL_TYPE_NOT_ETH_TYPE; + buffer_pull(&b, sizeof(struct llc_header)); + } + } + + /* Check for a VLAN tag */ + if (flow->dl_type != htons(ETH_TYPE_VLAN)) { + flow->dl_vlan = htons(OFP_VLAN_NONE); + } else { + struct vlan_header *vh = buffer_at(&b, 0, sizeof *vh); + flow->dl_type = vh->vlan_next_type; + flow->dl_vlan = vh->vlan_tci & htons(VLAN_VID); + buffer_pull(&b, sizeof *vh); + } + memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); + memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); + + if (flow->dl_type == htons(ETH_TYPE_IP)) { + const struct ip_header *nh = buffer_at(&b, 0, sizeof *nh); + if (nh) { + flow->nw_src = nh->ip_src; + flow->nw_dst = nh->ip_dst; + flow->nw_proto = nh->ip_proto; + if (flow->nw_proto == IP_TYPE_TCP + || flow->nw_proto == IP_TYPE_UDP) { + int udp_ofs = IP_IHL(nh->ip_ihl_ver) * 4; + const struct udp_header *th + = buffer_at(&b, udp_ofs, sizeof *th); + if (th) { + flow->tp_src = th->udp_src; + flow->tp_dst = th->udp_dst; + } + } + } + } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { + const struct arp_eth_header *ah = buffer_at(&b, 0, sizeof *ah); + if (ah && ah->ar_hrd == htons(ARP_HRD_ETHERNET) + && ah->ar_pro == htons(ARP_PRO_IP) + && ah->ar_hln == ETH_ADDR_LEN + && ah->ar_pln == sizeof flow->nw_src) + { + /* check if sha/tha match dl_src/dl_dst? */ + flow->nw_src = ah->ar_spa; + flow->nw_dst = ah->ar_tpa; + } + } + } +} + +void +flow_print(FILE *stream, const struct flow *flow) +{ + fprintf(stream, + "port%04x:vlan%04x mac"MAC_FMT"->"MAC_FMT" " + "proto%04x ip"IP_FMT"->"IP_FMT" port%d->%d", + ntohs(flow->in_port), ntohs(flow->dl_vlan), + MAC_ARGS(flow->dl_src), MAC_ARGS(flow->dl_dst), + ntohs(flow->dl_type), + IP_ARGS(&flow->nw_src), IP_ARGS(&flow->nw_dst), + ntohs(flow->tp_src), ntohs(flow->tp_dst)); +} + +int +flow_compare(const struct flow *a, const struct flow *b) +{ + return memcmp(a, b, sizeof *a); +} + +unsigned long int +flow_hash(const struct flow *flow, uint32_t basis) +{ + return hash_fnv(flow, sizeof *flow, basis); +} diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 00000000..784daa7f --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,13 @@ +#include "hash.h" + +uint32_t +hash_fnv(const void *p_, size_t n, uint32_t basis) +{ + const uint8_t *p = p_; + uint32_t hash = basis; + while (n--) { + hash *= HASH_FNV_PRIME; + hash ^= *p++; + } + return hash; +} diff --git a/lib/list.c b/lib/list.c new file mode 100644 index 00000000..379e8f8a --- /dev/null +++ b/lib/list.c @@ -0,0 +1,123 @@ +#include "list.h" +#include + +/* Initializes 'list' as an empty list. */ +void +list_init(struct list *list) +{ + list->next = list->prev = list; +} + +/* Inserts 'elem' just before 'before'. */ +void +list_insert(struct list *before, struct list *elem) +{ + elem->prev = before->prev; + elem->next = before; + before->prev->next = elem; + before->prev = elem; +} + +/* Removes elements 'first' though 'last' (exclusive) from their current list, + then inserts them just before 'before'. */ +void +list_splice(struct list *before, struct list *first, struct list *last) +{ + if (first == last) + return; + last = last->prev; + + /* Cleanly remove 'first'...'last' from its current list. */ + first->prev->next = last->next; + last->next->prev = first->prev; + + /* Splice 'first'...'last' into new list. */ + first->prev = before->prev; + last->next = before; + before->prev->next = first; + before->prev = last; +} + +/* Inserts 'elem' at the beginning of 'list', so that it becomes the front in + 'list'. */ +void +list_push_front(struct list *list, struct list *elem) +{ + list_insert(list->next, elem); +} + +/* Inserts 'elem' at the end of 'list', so that it becomes the back in + * 'list'. */ +void +list_push_back(struct list *list, struct list *elem) +{ + list_insert(list, elem); +} + +/* Removes 'elem' from its list and returns the element that followed it. + Undefined behavior if 'elem' is not in a list. */ +struct list * +list_remove(struct list *elem) +{ + elem->prev->next = elem->next; + elem->next->prev = elem->prev; + return elem->next; +} + +/* Removes the front element from 'list' and returns it. Undefined behavior if + 'list' is empty before removal. */ +struct list * +list_pop_front(struct list *list) +{ + struct list *front = list->next; + list_remove(front); + return front; +} + +/* Removes the back element from 'list' and returns it. + Undefined behavior if 'list' is empty before removal. */ +struct list * +list_pop_back(struct list *list) +{ + struct list *back = list->prev; + list_remove(back); + return back; +} + +/* Returns the front element in 'list'. + Undefined behavior if 'list' is empty. */ +struct list * +list_front(struct list *list) +{ + assert(!list_is_empty(list)); + return list->next; +} + +/* Returns the back element in 'list'. + Undefined behavior if 'list' is empty. */ +struct list * +list_back(struct list *list) +{ + assert(!list_is_empty(list)); + return list->prev; +} + +/* Returns the number of elements in 'list'. + Runs in O(n) in the number of elements. */ +size_t +list_size(const struct list *list) +{ + const struct list *e; + size_t cnt = 0; + + for (e = list->next; e != list; e = e->next) + cnt++; + return cnt; +} + +/* Returns true if 'list' is empty, false otherwise. */ +bool +list_is_empty(const struct list *list) +{ + return list->next == list; +} diff --git a/lib/netlink.c b/lib/netlink.c new file mode 100644 index 00000000..f625f94b --- /dev/null +++ b/lib/netlink.c @@ -0,0 +1,908 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "netlink.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "buffer.h" +#include "util.h" + +#include "vlog.h" +#define THIS_MODULE VLM_netlink + +/* Linux header file confusion causes this to be undefined. */ +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +/* Netlink sockets. */ + +struct nl_sock +{ + int fd; + uint32_t pid; +}; + +/* Next nlmsghdr sequence number. + * + * This implementation uses sequence numbers that are unique process-wide, to + * avoid a hypothetical race: send request, close socket, open new socket that + * reuses the old socket's PID value, send request on new socket, receive reply + * from kernel to old socket but with same PID and sequence number. (This race + * could be avoided other ways, e.g. by preventing PIDs from being quickly + * reused). */ +static uint32_t next_seq; + +static int alloc_pid(uint32_t *); +static void free_pid(uint32_t); + +/* Creates a new netlink socket for the given netlink 'protocol' + * (NETLINK_ROUTE, NETLINK_GENERIC, ...). Returns 0 and sets '*sockp' to the + * new socket if successful, otherwise returns a positive errno value. + * + * If 'multicast_group' is nonzero, the new socket subscribes to the specified + * netlink multicast group. (A netlink socket may listen to an arbitrary + * number of multicast groups, but so far we only need one at a time.) + * + * Nonzero 'so_sndbuf' or 'so_rcvbuf' override the kernel default send or + * receive buffer size, respectively. + */ +int +nl_sock_create(int protocol, int multicast_group, + size_t so_sndbuf, size_t so_rcvbuf, struct nl_sock **sockp) +{ + struct nl_sock *sock; + struct sockaddr_nl local, remote; + int retval = 0; + + if (next_seq == 0) { + /* Pick initial sequence number. */ + next_seq = getpid() ^ time(0); + } + + *sockp = NULL; + sock = malloc(sizeof *sock); + if (sock == NULL) { + return ENOMEM; + } + + sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol); + if (sock->fd < 0) { + VLOG_ERR("fcntl: %s", strerror(errno)); + goto error; + } + + retval = alloc_pid(&sock->pid); + if (retval) { + goto error; + } + + if (so_sndbuf != 0 + && setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, + &so_sndbuf, sizeof so_sndbuf) < 0) { + VLOG_ERR("setsockopt(SO_SNDBUF,%zu): %s", so_sndbuf, strerror(errno)); + goto error_free_pid; + } + + if (so_rcvbuf != 0 + && setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, + &so_rcvbuf, sizeof so_rcvbuf) < 0) { + VLOG_ERR("setsockopt(SO_RCVBUF,%zu): %s", so_rcvbuf, strerror(errno)); + goto error_free_pid; + } + + /* Bind local address as our selected pid. */ + memset(&local, 0, sizeof local); + local.nl_family = AF_NETLINK; + local.nl_pid = sock->pid; + if (multicast_group > 0 && multicast_group <= 32) { + /* This method of joining multicast groups is supported by old kernels, + * but it only allows 32 multicast groups per protocol. */ + local.nl_groups |= 1ul << (multicast_group - 1); + } + if (bind(sock->fd, (struct sockaddr *) &local, sizeof local) < 0) { + VLOG_ERR("bind(%"PRIu32"): %s", sock->pid, strerror(errno)); + goto error_free_pid; + } + + /* Bind remote address as the kernel (pid 0). */ + memset(&remote, 0, sizeof remote); + remote.nl_family = AF_NETLINK; + remote.nl_pid = 0; + if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) { + VLOG_ERR("connect(0): %s", strerror(errno)); + goto error_free_pid; + } + + /* This method of joining multicast groups is only supported by newish + * kernels, but it allows for an arbitrary number of multicast groups. */ + if (multicast_group > 32 + && setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &multicast_group, sizeof multicast_group) < 0) { + VLOG_ERR("setsockopt(NETLINK_ADD_MEMBERSHIP,%d): %s", + multicast_group, strerror(errno)); + goto error_free_pid; + } + + *sockp = sock; + return 0; + +error_free_pid: + free_pid(sock->pid); +error: + if (retval == 0) { + retval = errno; + if (retval == 0) { + retval = EINVAL; + } + } + if (sock->fd >= 0) { + close(sock->fd); + } + free(sock); + return retval; +} + +/* Destroys netlink socket 'sock'. */ +void +nl_sock_destroy(struct nl_sock *sock) +{ + if (sock) { + close(sock->fd); + free_pid(sock->pid); + free(sock); + } +} + +/* Tries to send 'msg', which must contain a Netlink message, to the kernel on + * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size before the + * message is sent. + * + * Returns 0 if successful, otherwise a positive errno value. If + * 'wait' is true, then the send will wait until buffer space is ready; + * otherwise, returns EAGAIN if the 'sock' send buffer is full. */ +int +nl_sock_send(struct nl_sock *sock, const struct buffer *msg, bool wait) +{ + int retval; + + nl_msg_nlmsghdr(msg)->nlmsg_len = msg->size; + do { + retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT); + } while (retval < 0 && errno == EINTR); + return retval < 0 ? errno : 0; +} + +/* Tries to send the 'n_iov' chunks of data in 'iov' to the kernel on 'sock' as + * a single Netlink message. (The message must be fully formed and not require + * finalization of its nlmsg_len field.) + * + * Returns 0 if successful, otherwise a positive errno value. If 'wait' is + * true, then the send will wait until buffer space is ready; otherwise, + * returns EAGAIN if the 'sock' send buffer is full. */ +int +nl_sock_sendv(struct nl_sock *sock, const struct iovec iov[], size_t n_iov, + bool wait) +{ + struct msghdr msg; + int retval; + + memset(&msg, 0, sizeof msg); + msg.msg_iov = (struct iovec *) iov; + msg.msg_iovlen = n_iov; + do { + retval = sendmsg(sock->fd, &msg, MSG_DONTWAIT); + } while (retval < 0 && errno == EINTR); + return retval < 0 ? errno : 0; +} + +/* Tries to receive a netlink message from the kernel on 'sock'. If + * successful, stores the received message into '*bufp' and returns 0. The + * caller is responsible for destroying the message with buffer_delete(). On + * failure, returns a positive errno value and stores a null pointer into + * '*bufp'. + * + * If 'wait' is true, nl_sock_recv waits for a message to be ready; otherwise, + * returns EAGAIN if the 'sock' receive buffer is empty. */ +int +nl_sock_recv(struct nl_sock *sock, struct buffer **bufp, bool wait) +{ + uint8_t tmp; + ssize_t bufsize = 2048; + ssize_t nbytes, nbytes2; + struct buffer *buf; + struct nlmsghdr *nlmsghdr; + struct iovec iov; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + + buf = buffer_new(bufsize); + *bufp = NULL; + +try_again: + /* Attempt to read the message. We don't know the size of the data + * yet, so we take a guess at 2048. If we're wrong, we keep trying + * and doubling the buffer size each time. + */ + nlmsghdr = buffer_put_uninit(buf, bufsize); + iov.iov_base = nlmsghdr; + iov.iov_len = bufsize; + do { + nbytes = recvmsg(sock->fd, &msg, (wait ? 0 : MSG_DONTWAIT) | MSG_PEEK); + } while (nbytes < 0 && errno == EINTR); + if (nbytes < 0) { + buffer_delete(buf); + return errno; + } + if (msg.msg_flags & MSG_TRUNC) { + bufsize *= 2; + buffer_reinit(buf, bufsize); + goto try_again; + } + buf->size = nbytes; + + /* We successfully read the message, so recv again to clear the queue */ + iov.iov_base = &tmp; + iov.iov_len = 1; + do { + nbytes2 = recvmsg(sock->fd, &msg, MSG_DONTWAIT); + if (nbytes2 < 0) { + VLOG_ERR("failed to remove nlmsg from socket: %d\n", errno); + } + } while (nbytes2 < 0 && errno == EINTR); + + if (!NLMSG_OK(nlmsghdr, nbytes)) { + VLOG_ERR("received invalid nlmsg (%zd bytes < %d)", + bufsize, NLMSG_HDRLEN); + buffer_delete(buf); + return EPROTO; + } + *bufp = buf; + return 0; +} + +/* Sends 'request' to the kernel via 'sock' and waits for a response. If + * successful, stores the reply into '*replyp' and returns 0. The caller is + * responsible for destroying the reply with buffer_delete(). On failure, + * returns a positive errno value and stores a null pointer into '*replyp'. + * + * Bare Netlink is an unreliable transport protocol. This function layers + * reliable delivery and reply semantics on top of bare Netlink. + * + * In Netlink, sending a request to the kernel is reliable enough, because the + * kernel will tell us if the message cannot be queued (and we will in that + * case put it on the transmit queue and wait until it can be delivered). + * + * Receiving the reply is the real problem: if the socket buffer is full when + * the kernel tries to send the reply, the reply will be dropped. However, the + * kernel sets a flag that a reply has been dropped. The next call to recv + * then returns ENOBUFS. We can then re-send the request. + * + * Caveats: + * + * 1. Netlink depends on sequence numbers to match up requests and + * replies. The sender of a request supplies a sequence number, and + * the reply echos back that sequence number. + * + * This is fine, but (1) some kernel netlink implementations are + * broken, in that they fail to echo sequence numbers and (2) this + * function will drop packets with non-matching sequence numbers, so + * that only a single request can be usefully transacted at a time. + * + * 2. Resending the request causes it to be re-executed, so the request + * needs to be idempotent. + */ +int +nl_sock_transact(struct nl_sock *sock, + const struct buffer *request, struct buffer **replyp) +{ + uint32_t seq = nl_msg_nlmsghdr(request)->nlmsg_seq; + struct nlmsghdr *nlmsghdr; + struct buffer *reply; + int retval; + + *replyp = NULL; + + /* Ensure that we get a reply even if this message doesn't ordinarily call + * for one. */ + nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_ACK; + +send: + retval = nl_sock_send(sock, request, true); + if (retval) { + return retval; + } + +recv: + retval = nl_sock_recv(sock, &reply, true); + if (retval) { + if (retval == ENOBUFS) { + VLOG_DBG("receive buffer overflow, resending request"); + goto send; + } else { + return retval; + } + } + nlmsghdr = nl_msg_nlmsghdr(reply); + if (seq != nlmsghdr->nlmsg_seq) { + VLOG_DBG("ignoring seq %"PRIu32" != expected %"PRIu32, + nl_msg_nlmsghdr(reply)->nlmsg_seq, seq); + buffer_delete(reply); + goto recv; + } + if (nl_msg_nlmsgerr(reply, &retval)) { + if (retval) { + VLOG_DBG("received NAK error=%d (%s)", retval, strerror(retval)); + } + return retval != EAGAIN ? retval : EPROTO; + } + + *replyp = reply; + return 0; +} + +/* Returns 'sock''s underlying file descriptor. */ +int +nl_sock_fd(const struct nl_sock *sock) +{ + return sock->fd; +} + +/* Netlink messages. */ + +/* Returns the nlmsghdr at the head of 'msg'. + * + * 'msg' must be at least as large as a nlmsghdr. */ +struct nlmsghdr * +nl_msg_nlmsghdr(const struct buffer *msg) +{ + return buffer_at_assert(msg, 0, NLMSG_HDRLEN); +} + +/* Returns the genlmsghdr just past 'msg''s nlmsghdr. + * + * Returns a null pointer if 'msg' is not large enough to contain an nlmsghdr + * and a genlmsghdr. */ +struct genlmsghdr * +nl_msg_genlmsghdr(const struct buffer *msg) +{ + return buffer_at(msg, NLMSG_HDRLEN, GENL_HDRLEN); +} + +/* If 'buffer' is a NLMSG_ERROR message, stores 0 in '*errorp' if it is an ACK + * message, otherwise a positive errno value, and returns true. If 'buffer' is + * not an NLMSG_ERROR message, returns false. + * + * 'msg' must be at least as large as a nlmsghdr. */ +bool +nl_msg_nlmsgerr(const struct buffer *msg, int *errorp) +{ + if (nl_msg_nlmsghdr(msg)->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = buffer_at(msg, NLMSG_HDRLEN, sizeof *err); + int code = EPROTO; + if (!err) { + VLOG_ERR("received invalid nlmsgerr (%zd bytes < %zd)", + msg->size, NLMSG_HDRLEN + sizeof *err); + } else if (err->error <= 0 && err->error > INT_MIN) { + code = -err->error; + } + if (errorp) { + *errorp = code; + } + return true; + } else { + return false; + } +} + +/* Ensures that 'b' has room for at least 'size' bytes plus netlink pading at + * its tail end, reallocating and copying its data if necessary. */ +void +nl_msg_reserve(struct buffer *msg, size_t size) +{ + buffer_reserve_tailroom(msg, NLMSG_ALIGN(size)); +} + +/* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty. + * Uses the given 'type' and 'flags'. 'sock' is used to obtain a PID and + * sequence number for proper routing of replies. 'expected_payload' should be + * an estimate of the number of payload bytes to be supplied; if the size of + * the payload is unknown a value of 0 is acceptable. + * + * 'type' is ordinarily an enumerated value specific to the Netlink protocol + * (e.g. RTM_NEWLINK, for NETLINK_ROUTE protocol). For Generic Netlink, 'type' + * is the family number obtained via nl_lookup_genl_family(). + * + * 'flags' is a bit-mask that indicates what kind of request is being made. It + * is often NLM_F_REQUEST indicating that a request is being made, commonly + * or'd with NLM_F_ACK to request an acknowledgement. + * + * nl_msg_put_genlmsghdr is more convenient for composing a Generic Netlink + * message. */ +void +nl_msg_put_nlmsghdr(struct buffer *msg, struct nl_sock *sock, + size_t expected_payload, uint32_t type, uint32_t flags) +{ + struct nlmsghdr *nlmsghdr; + + assert(msg->size == 0); + + nl_msg_reserve(msg, NLMSG_HDRLEN + expected_payload); + nlmsghdr = nl_msg_put_uninit(msg, NLMSG_HDRLEN); + nlmsghdr->nlmsg_len = 0; + nlmsghdr->nlmsg_type = type; + nlmsghdr->nlmsg_flags = flags; + nlmsghdr->nlmsg_seq = ++next_seq; + nlmsghdr->nlmsg_pid = sock->pid; +} + +/* Puts a nlmsghdr and genlmsghdr at the beginning of 'msg', which must be + * initially empty. 'sock' is used to obtain a PID and sequence number for + * proper routing of replies. 'expected_payload' should be an estimate of the + * number of payload bytes to be supplied; if the size of the payload is + * unknown a value of 0 is acceptable. + * + * 'family' is the family number obtained via nl_lookup_genl_family(). + * + * 'flags' is a bit-mask that indicates what kind of request is being made. It + * is often NLM_F_REQUEST indicating that a request is being made, commonly + * or'd with NLM_F_ACK to request an acknowledgement. + * + * 'cmd' is an enumerated value specific to the Generic Netlink family + * (e.g. CTRL_CMD_NEWFAMILY for the GENL_ID_CTRL family). + * + * 'version' is a version number specific to the family and command (often 1). + * + * nl_msg_put_nlmsghdr should be used to compose Netlink messages that are not + * Generic Netlink messages. */ +void +nl_msg_put_genlmsghdr(struct buffer *msg, struct nl_sock *sock, + size_t expected_payload, int family, uint32_t flags, + uint8_t cmd, uint8_t version) +{ + struct genlmsghdr *genlmsghdr; + + nl_msg_put_nlmsghdr(msg, sock, GENL_HDRLEN + expected_payload, + family, flags); + assert(msg->size == NLMSG_HDRLEN); + genlmsghdr = nl_msg_put_uninit(msg, GENL_HDRLEN); + genlmsghdr->cmd = cmd; + genlmsghdr->version = version; + genlmsghdr->reserved = 0; +} + +/* Appends the 'size' bytes of data in 'p', plus Netlink padding if needed, to + * the tail end of 'msg'. Data in 'msg' is reallocated and copied if + * necessary. */ +void +nl_msg_put(struct buffer *msg, const void *data, size_t size) +{ + memcpy(nl_msg_put_uninit(msg, size), data, size); +} + +/* Appends 'size' bytes of data, plus Netlink padding if needed, to the tail + * end of 'msg', reallocating and copying its data if necessary. Returns a + * pointer to the first byte of the new data, which is left uninitialized. */ +void * +nl_msg_put_uninit(struct buffer *msg, size_t size) +{ + size_t pad = NLMSG_ALIGN(size) - size; + char *p = buffer_put_uninit(msg, size + pad); + if (pad) { + memset(p + size, 0, pad); + } + return p; +} + +/* Appends a Netlink attribute of the given 'type' and room for 'size' bytes of + * data as its payload, plus Netlink padding if needed, to the tail end of + * 'msg', reallocating and copying its data if necessary. Returns a pointer to + * the first byte of data in the attribute, which is left uninitialized. */ +void * +nl_msg_put_unspec_uninit(struct buffer *msg, uint16_t type, size_t size) +{ + size_t total_size = NLA_HDRLEN + size; + struct nlattr* nla = nl_msg_put_uninit(msg, total_size); + assert(NLA_ALIGN(total_size) <= UINT16_MAX); + nla->nla_len = total_size; + nla->nla_type = type; + return nla + 1; +} + +/* Appends a Netlink attribute of the given 'type' and the 'size' bytes of + * 'data' as its payload, to the tail end of 'msg', reallocating and copying + * its data if necessary. Returns a pointer to the first byte of data in the + * attribute, which is left uninitialized. */ +void +nl_msg_put_unspec(struct buffer *msg, uint16_t type, + const void *data, size_t size) +{ + memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size); +} + +/* Appends a Netlink attribute of the given 'type' and no payload to 'msg'. + * (Some Netlink protocols use the presence or absence of an attribute as a + * Boolean flag.) */ +void +nl_msg_put_flag(struct buffer *msg, uint16_t type) +{ + nl_msg_put_unspec(msg, type, NULL, 0); +} + +/* Appends a Netlink attribute of the given 'type' and the given 8-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u8(struct buffer *msg, uint16_t type, uint8_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given 16-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u16(struct buffer *msg, uint16_t type, uint16_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given 32-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u32(struct buffer *msg, uint16_t type, uint32_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given 64-bit 'value' + * to 'msg'. */ +void +nl_msg_put_u64(struct buffer *msg, uint16_t type, uint64_t value) +{ + nl_msg_put_unspec(msg, type, &value, sizeof value); +} + +/* Appends a Netlink attribute of the given 'type' and the given + * null-terminated string 'value' to 'msg'. */ +void +nl_msg_put_string(struct buffer *msg, uint16_t type, const char *value) +{ + nl_msg_put_unspec(msg, type, value, strlen(value) + 1); +} + +/* Appends a Netlink attribute of the given 'type' and the given buffered + * netlink message in 'nested_msg' to 'msg'. The nlmsg_len field in + * 'nested_msg' is finalized to match 'nested_msg->size'. */ +void +nl_msg_put_nested(struct buffer *msg, + uint16_t type, struct buffer *nested_msg) +{ + nl_msg_nlmsghdr(nested_msg)->nlmsg_len = nested_msg->size; + nl_msg_put_unspec(msg, type, nested_msg->data, nested_msg->size); +} + +/* Returns the first byte in the payload of attribute 'nla'. */ +const void * +nl_attr_get(const struct nlattr *nla) +{ + assert(nla->nla_len >= NLA_HDRLEN); + return nla + 1; +} + +/* Returns the number of bytes in the payload of attribute 'nla'. */ +size_t +nl_attr_get_size(const struct nlattr *nla) +{ + assert(nla->nla_len >= NLA_HDRLEN); + return nla->nla_len - NLA_HDRLEN; +} + +/* Asserts that 'nla''s payload is at least 'size' bytes long, and returns the + * first byte of the payload. */ +const void * +nl_attr_get_unspec(const struct nlattr *nla, size_t size) +{ + assert(nla->nla_len >= NLA_HDRLEN + size); + return nla + 1; +} + +/* Returns true if 'nla' is nonnull. (Some Netlink protocols use the presence + * or absence of an attribute as a Boolean flag.) */ +bool +nl_attr_get_flag(const struct nlattr *nla) +{ + return nla != NULL; +} + +#define NL_ATTR_GET_AS(NLA, TYPE) \ + (*(TYPE*) nl_attr_get_unspec(nla, sizeof(TYPE))) + +/* Returns the 8-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 1 byte long. */ +uint8_t +nl_attr_get_u8(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint8_t); +} + +/* Returns the 16-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 2 bytes long. */ +uint16_t +nl_attr_get_u16(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint16_t); +} + +/* Returns the 32-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 4 bytes long. */ +uint32_t +nl_attr_get_u32(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint32_t); +} + +/* Returns the 64-bit value in 'nla''s payload. + * + * Asserts that 'nla''s payload is at least 8 bytes long. */ +uint64_t +nl_attr_get_u64(const struct nlattr *nla) +{ + return NL_ATTR_GET_AS(nla, uint64_t); +} + +/* Returns the null-terminated string value in 'nla''s payload. + * + * Asserts that 'nla''s payload contains a null-terminated string. */ +const char * +nl_attr_get_string(const struct nlattr *nla) +{ + assert(nla->nla_len > NLA_HDRLEN); + assert(memchr(nl_attr_get(nla), '\0', nla->nla_len - NLA_HDRLEN) != NULL); + return nl_attr_get(nla); +} + +/* Default minimum and maximum payload sizes for each type of attribute. */ +static const size_t attr_len_range[][2] = { + [0 ... N_NL_ATTR_TYPES - 1] = { 0, SIZE_MAX }, + [NL_A_U8] = { 1, 1 }, + [NL_A_U16] = { 2, 2 }, + [NL_A_U32] = { 4, 4 }, + [NL_A_U64] = { 8, 8 }, + [NL_A_STRING] = { 1, SIZE_MAX }, + [NL_A_FLAG] = { 0, SIZE_MAX }, + [NL_A_NESTED] = { NLMSG_HDRLEN, SIZE_MAX }, +}; + +/* Parses the Generic Netlink payload of 'msg' as a sequence of Netlink + * attributes. 'policy[i]', for 0 <= i < n_attrs, specifies how the attribute + * with nla_type == i is parsed; a pointer to attribute i is stored in + * attrs[i]. Returns true if successful, false on failure. */ +bool +nl_policy_parse(const struct buffer *msg, const struct nl_policy policy[], + struct nlattr *attrs[], size_t n_attrs) +{ + void *p, *tail; + size_t n_required; + size_t i; + + n_required = 0; + for (i = 0; i < n_attrs; i++) { + attrs[i] = NULL; + + assert(policy[i].type < N_NL_ATTR_TYPES); + if (policy[i].type != NL_A_NO_ATTR + && policy[i].type != NL_A_FLAG + && !policy[i].optional) { + n_required++; + } + } + + p = buffer_at(msg, NLMSG_HDRLEN + GENL_HDRLEN, 0); + if (p == NULL) { + VLOG_DBG("missing headers in nl_policy_parse"); + return false; + } + tail = buffer_tail(msg); + + while (p < tail) { + size_t offset = p - msg->data; + struct nlattr *nla = p; + size_t len, aligned_len; + uint16_t type; + + /* Make sure its claimed length is plausible. */ + if (nla->nla_len < NLA_HDRLEN) { + VLOG_DBG("%zu: attr shorter than NLA_HDRLEN (%"PRIu16")", + offset, nla->nla_len); + return false; + } + len = nla->nla_len - NLA_HDRLEN; + aligned_len = NLA_ALIGN(len); + if (aligned_len > tail - p) { + VLOG_DBG("%zu: attr %"PRIu16" aligned data len (%zu) " + "> bytes left (%tu)", + offset, nla->nla_type, aligned_len, tail - p); + return false; + } + + type = nla->nla_type; + if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) { + const struct nl_policy *p = &policy[type]; + size_t min_len, max_len; + + /* Validate length and content. */ + min_len = p->min_len ? p->min_len : attr_len_range[p->type][0]; + max_len = p->max_len ? p->max_len : attr_len_range[p->type][1]; + if (len < min_len || len > max_len) { + VLOG_DBG("%zu: attr %"PRIu16" length %zu not in allowed range " + "%zu...%zu", offset, type, len, min_len, max_len); + return false; + } + if (p->type == NL_A_STRING) { + if (((char *) nla)[nla->nla_len - 1]) { + VLOG_DBG("%zu: attr %"PRIu16" lacks null terminator", + offset, type); + return false; + } + if (memchr(nla + 1, '\0', len - 1) != NULL) { + VLOG_DBG("%zu: attr %"PRIu16" lies about string length", + offset, type); + return false; + } + } + if (!p->optional && attrs[type] == NULL) { + assert(n_required > 0); + --n_required; + } + attrs[type] = nla; + } else { + /* Skip attribute type that we don't care about. */ + } + p += NLA_ALIGN(nla->nla_len); + } + if (n_required) { + VLOG_DBG("%zu required attrs missing", n_required); + return false; + } + return true; +} + +/* Miscellaneous. */ + +static const struct nl_policy family_policy[CTRL_ATTR_MAX + 1] = { + [CTRL_ATTR_FAMILY_ID] = {.type = NL_A_U16}, +}; + +static int do_lookup_genl_family(const char *name) +{ + struct nl_sock *sock; + struct buffer request, *reply; + struct nlattr *attrs[ARRAY_SIZE(family_policy)]; + int retval; + + retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + if (retval) { + return -retval; + } + + buffer_init(&request, 0); + nl_msg_put_genlmsghdr(&request, sock, 0, GENL_ID_CTRL, NLM_F_REQUEST, + CTRL_CMD_GETFAMILY, 1); + nl_msg_put_string(&request, CTRL_ATTR_FAMILY_NAME, name); + retval = nl_sock_transact(sock, &request, &reply); + buffer_uninit(&request); + if (retval) { + nl_sock_destroy(sock); + return -retval; + } + + if (!nl_policy_parse(reply, family_policy, attrs, + ARRAY_SIZE(family_policy))) { + nl_sock_destroy(sock); + buffer_delete(reply); + return -EPROTO; + } + + retval = nl_attr_get_u16(attrs[CTRL_ATTR_FAMILY_ID]); + if (retval == 0) { + retval = -EPROTO; + } + nl_sock_destroy(sock); + buffer_delete(reply); + return retval; +} + +/* If '*number' is 0, translates the given Generic Netlink family 'name' to a + * number and stores it in '*number'. If successful, returns 0 and the caller + * may use '*number' as the family number. On failure, returns a positive + * errno value and '*number' caches the errno value. */ +int +nl_lookup_genl_family(const char *name, int *number) +{ + if (*number == 0) { + *number = do_lookup_genl_family(name); + assert(*number != 0); + } + return *number > 0 ? 0 : -*number; +} + +/* Netlink PID. + * + * Every Netlink socket must be bound to a unique 32-bit PID. By convention, + * programs that have a single Netlink socket use their Unix process ID as PID, + * and programs with multiple Netlink sockets add a unique per-socket + * identifier in the bits above the Unix process ID. + * + * The kernel has Netlink PID 0. + */ + +/* Parameters for how many bits in the PID should come from the Unix process ID + * and how many unique per-socket. */ +#define SOCKET_BITS 10 +#define MAX_SOCKETS (1u << SOCKET_BITS) + +#define PROCESS_BITS (32 - SOCKET_BITS) +#define MAX_PROCESSES (1u << PROCESS_BITS) +#define PROCESS_MASK ((uint32_t) (MAX_PROCESSES - 1)) + +/* Bit vector of unused socket identifiers. */ +static uint32_t avail_sockets[ROUND_UP(MAX_SOCKETS, 32)]; + +/* Allocates and returns a new Netlink PID. */ +static int +alloc_pid(uint32_t *pid) +{ + int i; + + for (i = 0; i < MAX_SOCKETS; i++) { + if ((avail_sockets[i / 32] & (1u << (i % 32))) == 0) { + avail_sockets[i / 32] |= 1u << (i % 32); + *pid = (getpid() & PROCESS_MASK) | (i << PROCESS_BITS); + return 0; + } + } + VLOG_ERR("netlink pid space exhausted"); + return ENOBUFS; +} + +/* Makes the specified 'pid' available for reuse. */ +static void +free_pid(uint32_t pid) +{ + int sock = pid >> PROCESS_BITS; + assert(avail_sockets[sock / 32] & (1u << (sock % 32))); + avail_sockets[sock / 32] &= ~(1u << (sock % 32)); +} diff --git a/lib/ofp-print.c b/lib/ofp-print.c new file mode 100644 index 00000000..f8bbc5fb --- /dev/null +++ b/lib/ofp-print.c @@ -0,0 +1,471 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ofp-print.h" +#include "xtoxll.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "ip.h" +#include "mac.h" +#include "compiler.h" +#include "util.h" +#include "openflow.h" + +/* Dumps the contents of the Ethernet frame in the 'len' bytes starting at + * 'data' to 'stream' using tcpdump. 'total_len' specifies the full length of + * the Ethernet frame (of which 'len' bytes were captured). + * + * This starts and kills a tcpdump subprocess so it's quite expensive. */ +void ofp_print_packet(FILE *stream, const void *data, size_t len, + size_t total_len) +{ + struct pcap_hdr { + uint32_t magic_number; /* magic number */ + uint16_t version_major; /* major version number */ + uint16_t version_minor; /* minor version number */ + int32_t thiszone; /* GMT to local correction */ + uint32_t sigfigs; /* accuracy of timestamps */ + uint32_t snaplen; /* max length of captured packets */ + uint32_t network; /* data link type */ + } PACKED; + + struct pcaprec_hdr { + uint32_t ts_sec; /* timestamp seconds */ + uint32_t ts_usec; /* timestamp microseconds */ + uint32_t incl_len; /* number of octets of packet saved in file */ + uint32_t orig_len; /* actual length of packet */ + } PACKED; + + struct pcap_hdr ph; + struct pcaprec_hdr prh; + + char command[128]; + FILE *tcpdump; + int status; + + fflush(stream); + snprintf(command, sizeof command, "tcpdump -n -r - %d>&1 2>/dev/null", + fileno(stream)); + tcpdump = popen(command, "w"); + if (!tcpdump) { + error(errno, "exec(\"%s\")", command); + return; + } + + /* The pcap reader is responsible for figuring out endianness based on the + * magic number, so the lack of htonX calls here is intentional. */ + ph.magic_number = 0xa1b2c3d4; + ph.version_major = 2; + ph.version_minor = 4; + ph.thiszone = 0; + ph.sigfigs = 0; + ph.snaplen = 1518; + ph.network = 1; /* Ethernet */ + + prh.ts_sec = 0; + prh.ts_usec = 0; + prh.incl_len = len; + prh.orig_len = total_len; + + fwrite(&ph, 1, sizeof ph, tcpdump); + fwrite(&prh, 1, sizeof prh, tcpdump); + fwrite(data, 1, len, tcpdump); + + fflush(tcpdump); + if (ferror(tcpdump)) + error(errno, "error writing \"%s\" subprocess", command); + + status = pclose(tcpdump); + if (WIFEXITED(status)) { + if (WEXITSTATUS(status)) + error(0, "tcpdump exited with status %d", WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + error(0, "tcpdump exited with signal %d", WTERMSIG(status)); + } +} + +/* Pretty-print the OFPT_PACKET_IN packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +static void ofp_packet_in(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_packet_in *op = oh; + size_t data_len; + + fprintf(stream, " total_len=%"PRIu16" in_port=%"PRIu8, + ntohs(op->total_len), ntohs(op->in_port)); + + if (op->reason == OFPR_ACTION) + fputs(" (via action)", stream); + else if (op->reason != OFPR_NO_MATCH) + fprintf(stream, " (***reason %"PRIu8"***)", op->reason); + + data_len = len - offsetof(struct ofp_packet_in, data); + fprintf(stream, " data_len=%zu", data_len); + if (htonl(op->buffer_id) == UINT32_MAX) { + fprintf(stream, " (unbuffered)"); + if (ntohs(op->total_len) != data_len) + fprintf(stream, " (***total_len != data_len***)"); + } else { + fprintf(stream, " buffer=%08"PRIx32, ntohl(op->buffer_id)); + if (ntohs(op->total_len) < data_len) + fprintf(stream, " (***total_len < data_len***)"); + } + putc('\n', stream); + + if (verbosity > 0) + ofp_print_packet(stream, op->data, data_len, ntohs(op->total_len)); +} + +static void ofp_print_port_name(FILE *stream, uint16_t port) +{ + if (port == UINT16_MAX) { + fputs("none", stream); + } else if (port == OFPP_FLOOD) { + fputs("flood", stream); + } else if (port == OFPP_CONTROLLER) { + fputs("controller", stream); + } else { + fprintf(stream, "%"PRIu16, port); + } +} + +static void ofp_print_action(FILE *stream, const struct ofp_action *a) +{ + switch (ntohs(a->type)) { + case OFPAT_OUTPUT: + fputs("output(", stream); + ofp_print_port_name(stream, ntohs(a->arg.output.port)); + if (a->arg.output.port == htons(OFPP_CONTROLLER)) { + fprintf(stream, ", max %"PRIu16" bytes", ntohs(a->arg.output.max_len)); + } + fputs(")", stream); + break; + + default: + fprintf(stream, "(decoder %"PRIu16" not implemented)", ntohs(a->type)); + break; + } +} + +static void ofp_print_actions(FILE *stream, + const struct ofp_action actions[], + size_t n_bytes) +{ + size_t i; + + fputs(" actions[", stream); + for (i = 0; i < n_bytes / sizeof *actions; i++) { + if (i) { + fputs("; ", stream); + } + ofp_print_action(stream, &actions[i]); + } + if (n_bytes % sizeof *actions) { + if (i) { + fputs("; ", stream); + } + fputs("; ***trailing garbage***", stream); + } + fputs("]", stream); +} + +/* Pretty-print the OFPT_PACKET_OUT packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +static void ofp_packet_out(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_packet_out *opo = oh; + + fputs(" in_port=", stream); + ofp_print_port_name(stream, ntohs(opo->in_port)); + + if (ntohl(opo->buffer_id) == UINT32_MAX) { + fputs(" out_port=", stream); + ofp_print_port_name(stream, ntohs(opo->out_port)); + if (verbosity > 0 && len > sizeof *opo) { + ofp_print_packet(stream, opo->u.data, len - sizeof *opo, + len - sizeof *opo); + } + } else { + fprintf(stream, " buffer=%08"PRIx32, ntohl(opo->buffer_id)); + ofp_print_actions(stream, opo->u.actions, len - sizeof *opo); + } + putc('\n', stream); +} + +/* qsort comparison function. */ +static int +compare_ports(const void *a_, const void *b_) +{ + const struct ofp_phy_port *a = a_; + const struct ofp_phy_port *b = b_; + uint16_t ap = ntohs(a->port_no); + uint16_t bp = ntohs(b->port_no); + + return ap < bp ? -1 : ap > bp; +} + +static +void ofp_print_phy_port(FILE *stream, const struct ofp_phy_port *port) +{ + uint8_t name[OFP_MAX_PORT_NAME_LEN]; + int j; + + memcpy(name, port->name, sizeof name); + for (j = 0; j < sizeof name - 1; j++) { + if (!isprint(name[j])) { + break; + } + } + name[j] = '\0'; + + fprintf(stream, " %2d(%s): addr:"MAC_FMT", speed:%d, flags:%#x, " + "feat:%#x\n", ntohs(port->port_no), name, + MAC_ARGS(port->hw_addr), ntohl(port->speed), ntohl(port->flags), + ntohl(port->features)); +} + +/* Pretty-print the OFPT_DATA_HELLO packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_data_hello(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_data_hello *odh = oh; + struct ofp_phy_port port_list[OFPP_MAX]; + int n_ports; + int i; + + + fprintf(stream, "dp id:%"PRIx64"\n", ntohll(odh->datapath_id)); + fprintf(stream, "tables: exact:%d, mac:%d, compressed:%d, general:%d\n", + ntohl(odh->n_exact), ntohl(odh->n_mac_only), + ntohl(odh->n_compression), ntohl(odh->n_general)); + fprintf(stream, "buffers: size:%d, number:%d, miss_len:%d\n", + ntohl(odh->buffer_mb), ntohl(odh->n_buffers), + ntohs(odh->miss_send_len)); + fprintf(stream, "features: capabilities:%#x, actions:%#x\n", + ntohl(odh->capabilities), ntohl(odh->actions)); + + if (ntohs(odh->header.length) >= sizeof *odh) { + len = MIN(len, ntohs(odh->header.length)); + } + n_ports = (len - sizeof *odh) / sizeof *odh->ports; + + memcpy(port_list, odh->ports, (len - sizeof *odh)); + qsort(port_list, n_ports, sizeof port_list[0], compare_ports); + for (i = 0; i < n_ports; i++) { + ofp_print_phy_port(stream, &port_list[i]); + } +} + +static void print_wild(FILE *stream, const char *leader, int is_wild, + const char *format, ...) __attribute__((format(printf, 4, 5))); + +static void print_wild(FILE *stream, const char *leader, int is_wild, + const char *format, ...) +{ + fputs(leader, stream); + if (!is_wild) { + va_list args; + + va_start(args, format); + vfprintf(stream, format, args); + va_end(args); + } else { + putc('?', stream); + } +} + +/* Pretty-print the ofp_match structure */ +static void ofp_print_match(FILE *f, const struct ofp_match *om) +{ + uint16_t w = ntohs(om->wildcards); + + print_wild(f, "inport", w & OFPFW_IN_PORT, "%04x", ntohs(om->in_port)); + print_wild(f, ":vlan", w & OFPFW_DL_VLAN, "%04x", ntohs(om->dl_vlan)); + print_wild(f, " mac[", w & OFPFW_DL_SRC, MAC_FMT, MAC_ARGS(om->dl_src)); + print_wild(f, "->", w & OFPFW_DL_DST, MAC_FMT, MAC_ARGS(om->dl_dst)); + print_wild(f, "] type", w & OFPFW_DL_TYPE, "%04x", ntohs(om->dl_type)); + print_wild(f, " ip[", w & OFPFW_NW_SRC, IP_FMT, IP_ARGS(&om->nw_src)); + print_wild(f, "->", w & OFPFW_NW_DST, IP_FMT, IP_ARGS(&om->nw_dst)); + print_wild(f, "] proto", w & OFPFW_NW_PROTO, "%u", om->nw_proto); + print_wild(f, " tport[", w & OFPFW_TP_SRC, "%d", ntohs(om->tp_src)); + print_wild(f, "->", w & OFPFW_TP_DST, "%d", ntohs(om->tp_dst)); + fputs("]\n", f); +} + +/* Pretty-print the OFPT_FLOW_MOD packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_flow_mod(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_flow_mod *ofm = oh; + + ofp_print_match(stream, &ofm->match); + fprintf(stream, " cmd:%d idle:%d buf:%#x grp:%d\n", ntohs(ofm->command), + ntohs(ofm->max_idle), ntohl(ofm->buffer_id), ntohl(ofm->group_id)); +} + +/* Pretty-print the OFPT_FLOW_EXPIRED packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_flow_expired(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_flow_expired *ofe = oh; + + ofp_print_match(stream, &ofe->match); + fprintf(stream, + " secs%d pkts%lld bytes%lld\n", ntohl(ofe->duration), + ntohll(ofe->packet_count), ntohll(ofe->byte_count)); +} + +/* Pretty-print the OFPT_PORT_STATUS packet of 'len' bytes at 'oh' to 'stream' + * at the given 'verbosity' level. */ +void ofp_print_port_status(FILE *stream, const void *oh, size_t len, + int verbosity) +{ + const struct ofp_port_status *ops = oh; + + if (ops->reason == OFPPR_ADD) { + fprintf(stream, "add:"); + } else if (ops->reason == OFPPR_DELETE) { + fprintf(stream, "del:"); + } else if (ops->reason == OFPPR_MOD) { + fprintf(stream, "mod:"); + } else { + fprintf(stream, "err:"); + } + + ofp_print_phy_port(stream, &ops->desc); +} + +struct openflow_packet { + const char *name; + size_t min_size; + void (*printer)(FILE *, const void *, size_t len, int verbosity); +}; + +static const struct openflow_packet packets[] = { + [OFPT_CONTROL_HELLO] = { + "ofp_control_hello", + sizeof (struct ofp_control_hello), + NULL, + }, + [OFPT_DATA_HELLO] = { + "ofp_data_hello", + sizeof (struct ofp_data_hello), + ofp_print_data_hello, + }, + [OFPT_PACKET_IN] = { + "ofp_packet_in", + offsetof(struct ofp_packet_in, data), + ofp_packet_in, + }, + [OFPT_PACKET_OUT] = { + "ofp_packet_out", + sizeof (struct ofp_packet_out), + ofp_packet_out, + }, + [OFPT_FLOW_MOD] = { + "ofp_flow_mod", + sizeof (struct ofp_flow_mod), + ofp_print_flow_mod, + }, + [OFPT_FLOW_EXPIRED] = { + "ofp_flow_expired", + sizeof (struct ofp_flow_expired), + ofp_print_flow_expired, + }, + [OFPT_PORT_MOD] = { + "ofp_port_mod", + sizeof (struct ofp_port_mod), + NULL, + }, + [OFPT_PORT_STATUS] = { + "ofp_port_status", + sizeof (struct ofp_port_status), + ofp_print_port_status + }, +}; + +/* Pretty-print the OpenFlow packet of 'len' bytes at 'oh' to 'stream' at the + * given 'verbosity' level. 0 is a minimal amount of verbosity and higher + * numbers increase verbosity. */ +void ofp_print(FILE *stream, const void *oh_, size_t len, int verbosity) +{ + const struct ofp_header *oh = oh_; + const struct openflow_packet *pkt; + + if (len < sizeof(struct ofp_header)) { + fprintf(stream, "OpenFlow packet too short:\n"); + hex_dump(stream, oh, len, 0, true); + return; + } else if (oh->version != 1) { + fprintf(stream, "Bad OpenFlow version %"PRIu8":\n", oh->version); + hex_dump(stream, oh, len, 0, true); + return; + } else if (oh->type >= ARRAY_SIZE(packets) || !packets[oh->type].name) { + fprintf(stream, "Unknown OpenFlow packet type %"PRIu8":\n", + oh->type); + hex_dump(stream, oh, len, 0, true); + return; + } + + pkt = &packets[oh->type]; + fprintf(stream, "%s (xid=%"PRIx32"):", pkt->name, oh->xid); + + if (ntohs(oh->length) > len) + fprintf(stream, " (***truncated to %zu bytes from %"PRIu16"***)", + len, ntohs(oh->length)); + else if (ntohs(oh->length) < len) { + fprintf(stream, " (***only uses %"PRIu16" bytes out of %zu***)\n", + ntohs(oh->length), len); + len = ntohs(oh->length); + } + + if (len < pkt->min_size) { + fprintf(stream, " (***length=%zu < min_size=%zu***)\n", + len, pkt->min_size); + } else if (!pkt->printer) { + fprintf(stream, " length=%zu (decoder not implemented)\n", + ntohs(oh->length)); + } else { + pkt->printer(stream, oh, len, verbosity); + } + if (verbosity >= 3) + hex_dump(stream, oh, len, 0, true); +} + +/* Pretty print a openflow table */ +void ofp_print_table(FILE *stream, const struct ofp_table* ot) +{ + fprintf(stream, "id: %d name: %-8s n_flows: %6d max_flows: %6d", + ntohs(ot->table_id), ot->name, ntohl(ot->n_flows), + ntohl(ot->max_flows)); +} diff --git a/lib/socket-util.c b/lib/socket-util.c new file mode 100644 index 00000000..3397fdb3 --- /dev/null +++ b/lib/socket-util.c @@ -0,0 +1,65 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "socket-util.h" +#include +#include +#include +#include +#include + +#include "vlog.h" +#define THIS_MODULE VLM_socket_util + +/* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a + * positive errno value. */ +int +set_nonblocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, 0); + if (flags != -1) { + return fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1 ? 0 : errno; + } else { + return errno; + } +} + +/* Translates 'host_name', which may be a DNS name or an IP address, into a + * numeric IP address in '*addr'. Returns 0 if successful, otherwise a + * positive errno value. */ +int +lookup_ip(const char *host_name, struct in_addr *addr) +{ + if (!inet_aton(host_name, addr)) { + struct hostent *he = gethostbyname(host_name); + if (he == NULL) { + VLOG_ERR("gethostbyname(%s): %s", host_name, + (h_errno == HOST_NOT_FOUND ? "host not found" + : h_errno == TRY_AGAIN ? "try again" + : h_errno == NO_RECOVERY ? "non-recoverable error" + : h_errno == NO_ADDRESS ? "no address" + : "unknown error")); + return ENOENT; + } + addr->s_addr = *(uint32_t *) he->h_addr; + } + return 0; +} diff --git a/lib/util.c b/lib/util.c new file mode 100644 index 00000000..8f038e5f --- /dev/null +++ b/lib/util.c @@ -0,0 +1,195 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util.h" +#include +#include +#include +#include + +const char *program_name; + +static void +out_of_memory(void) +{ + fatal(0, "virtual memory exhausted"); +} + +void * +xcalloc(size_t count, size_t size) +{ + void *p = count && size ? calloc(count, size) : malloc(1); + if (p == NULL) { + out_of_memory(); + } + return p; +} + +void * +xmalloc(size_t size) +{ + void *p = malloc(size ? size : 1); + if (p == NULL) { + out_of_memory(); + } + return p; +} + +void * +xrealloc(void *p, size_t size) +{ + p = realloc(p, size ? size : 1); + if (p == NULL) { + out_of_memory(); + } + return p; +} + +char * +xstrdup(const char *s_) +{ + size_t size = strlen(s_) + 1; + char *s = xmalloc(size); + memcpy(s, s_, size); + return s; +} + +char * +xasprintf(const char *format, ...) +{ + va_list args; + size_t needed; + char *s; + + va_start(args, format); + needed = vsnprintf(NULL, 0, format, args); + va_end(args); + + s = xmalloc(needed + 1); + + va_start(args, format); + vsnprintf(s, needed + 1, format, args); + va_end(args); + + return s; +} + +void fatal(int err_no, const char *format, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", program_name); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err_no != 0) + fprintf(stderr, " (%s)", strerror(err_no)); + putc('\n', stderr); + + exit(EXIT_FAILURE); +} + +void error(int err_no, const char *format, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", program_name); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err_no != 0) + fprintf(stderr, " (%s)", strerror(err_no)); + putc('\n', stderr); +} + +void debug(int err_no, const char *format, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", program_name); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err_no != 0) + fprintf(stderr, " (%s)", strerror(err_no)); + putc('\n', stderr); +} + +/* Sets program_name based on 'argv0'. Should be called at the beginning of + * main(), as "set_program_name(argv[0]);". */ +void set_program_name(const char *argv0) +{ + const char *slash = strrchr(argv0, '/'); + program_name = slash ? slash + 1 : argv0; +} + +/* Writes the 'size' bytes in 'buf' to 'stream' as hex bytes arranged 16 per + * line. Numeric offsets are also included, starting at 'ofs' for the first + * byte in 'buf'. If 'ascii' is true then the corresponding ASCII characters + * are also rendered alongside. */ +void +hex_dump(FILE *stream, const void *buf_, size_t size, + uintptr_t ofs, bool ascii) +{ + const uint8_t *buf = buf_; + const size_t per_line = 16; /* Maximum bytes per line. */ + + while (size > 0) + { + size_t start, end, n; + size_t i; + + /* Number of bytes on this line. */ + start = ofs % per_line; + end = per_line; + if (end - start > size) + end = start + size; + n = end - start; + + /* Print line. */ + fprintf(stream, "%08jx ", (uintmax_t) ROUND_DOWN(ofs, per_line)); + for (i = 0; i < start; i++) + fprintf(stream, " "); + for (; i < end; i++) + fprintf(stream, "%02hhx%c", + buf[i - start], i == per_line / 2 - 1? '-' : ' '); + if (ascii) + { + for (; i < per_line; i++) + fprintf(stream, " "); + fprintf(stream, "|"); + for (i = 0; i < start; i++) + fprintf(stream, " "); + for (; i < end; i++) { + int c = buf[i - start]; + putc(c >= 32 && c < 127 ? c : '.', stream); + } + for (; i < per_line; i++) + fprintf(stream, " "); + fprintf(stream, "|"); + } + fprintf(stream, "\n"); + + ofs += n; + buf += n; + size -= n; + } +} diff --git a/lib/vconn-netlink.c b/lib/vconn-netlink.c new file mode 100644 index 00000000..7ab54705 --- /dev/null +++ b/lib/vconn-netlink.c @@ -0,0 +1,126 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vconn.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "openflow-netlink.h" +#include "buffer.h" +#include "dpif.h" +#include "netlink.h" +#include "socket-util.h" +#include "util.h" +#include "openflow.h" + +#include "vlog.h" +#define THIS_MODULE VLM_VCONN_NETLINK + +struct netlink_vconn +{ + struct vconn vconn; + struct dpif dp; +}; + +static struct netlink_vconn * +netlink_vconn_cast(struct vconn *vconn) +{ + assert(vconn->class == &netlink_vconn_class); + return CONTAINER_OF(vconn, struct netlink_vconn, vconn); +} + +static int +netlink_open(const char *name, char *suffix, struct vconn **vconnp) +{ + struct netlink_vconn *netlink; + int dp_idx; + int retval; + + if (sscanf(suffix, "%d", &dp_idx) != 1) { + fatal(0, "%s: bad peer name format", name); + } + + netlink = xmalloc(sizeof *netlink); + netlink->vconn.class = &netlink_vconn_class; + retval = dpif_open(dp_idx, true, &netlink->dp); + if (retval) { + free(netlink); + *vconnp = NULL; + return retval; + } + *vconnp = &netlink->vconn; + return 0; +} + +static void +netlink_close(struct vconn *vconn) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + dpif_close(&netlink->dp); + free(netlink); +} + +static void +netlink_prepoll(struct vconn *vconn, int want, struct pollfd *pfd) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + pfd->fd = nl_sock_fd(netlink->dp.sock); + if (want & WANT_RECV) { + pfd->events |= POLLIN; + } + if (want & WANT_SEND) { + pfd->events |= POLLOUT; + } +} + +static int +netlink_recv(struct vconn *vconn, struct buffer **bufferp) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + return dpif_recv_openflow(&netlink->dp, bufferp, false); +} + +static int +netlink_send(struct vconn *vconn, struct buffer *buffer) +{ + struct netlink_vconn *netlink = netlink_vconn_cast(vconn); + int retval = dpif_send_openflow(&netlink->dp, buffer, false); + if (!retval) { + buffer_delete(buffer); + } + return retval; +} + +struct vconn_class netlink_vconn_class = { + .name = "nl", + .open = netlink_open, + .close = netlink_close, + .prepoll = netlink_prepoll, + .recv = netlink_recv, + .send = netlink_send, +}; diff --git a/lib/vconn-tcp.c b/lib/vconn-tcp.c new file mode 100644 index 00000000..1878d2db --- /dev/null +++ b/lib/vconn-tcp.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vconn.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "buffer.h" +#include "socket-util.h" +#include "util.h" +#include "openflow.h" +#include "ofp-print.h" + +#include "vlog.h" +#define THIS_MODULE VLM_vconn_tcp + +/* Active TCP. */ + +struct tcp_vconn +{ + struct vconn vconn; + int fd; + struct buffer *rxbuf; + struct buffer *txbuf; +}; + +static int +new_tcp_vconn(const char *name, int fd, struct vconn **vconnp) +{ + struct tcp_vconn *tcp; + int on = 1; + int retval; + + retval = set_nonblocking(fd); + if (retval) { + VLOG_ERR("%s: set_nonblocking: %s", name, strerror(retval)); + close(fd); + return retval; + } + + retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); + if (retval) { + VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, strerror(errno)); + close(fd); + return errno; + } + + tcp = xmalloc(sizeof *tcp); + tcp->vconn.class = &tcp_vconn_class; + tcp->fd = fd; + tcp->txbuf = NULL; + tcp->rxbuf = NULL; + *vconnp = &tcp->vconn; + return 0; +} + +static struct tcp_vconn * +tcp_vconn_cast(struct vconn *vconn) +{ + assert(vconn->class == &tcp_vconn_class); + return CONTAINER_OF(vconn, struct tcp_vconn, vconn); +} + + +static int +tcp_open(const char *name, char *suffix, struct vconn **vconnp) +{ + char *save_ptr; + const char *host_name; + const char *port_string; + struct sockaddr_in sin; + int retval; + int fd; + + /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that + * can cause segfaults here: + * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. + * Using "::" instead of the obvious ":" works around it. */ + host_name = strtok_r(suffix, "::", &save_ptr); + port_string = strtok_r(NULL, "::", &save_ptr); + if (!host_name) { + fatal(0, "%s: bad peer name format", name); + } + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + if (lookup_ip(host_name, &sin.sin_addr)) { + return ENOENT; + } + sin.sin_port = htons(port_string ? atoi(port_string) : OFP_TCP_PORT); + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + VLOG_ERR("%s: socket: %s", name, strerror(errno)); + return errno; + } + + retval = connect(fd, (struct sockaddr *) &sin, sizeof sin); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: connect: %s", name, strerror(error)); + close(fd); + return error; + } + + return new_tcp_vconn(name, fd, vconnp); +} + +static void +tcp_close(struct vconn *vconn) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + close(tcp->fd); + free(tcp); +} + +static void +tcp_prepoll(struct vconn *vconn, int want, struct pollfd *pfd) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + pfd->fd = tcp->fd; + if (want & WANT_RECV) { + pfd->events |= POLLIN; + } + if (want & WANT_SEND || tcp->txbuf) { + pfd->events |= POLLOUT; + } +} + +static void +tcp_postpoll(struct vconn *vconn, short int *revents) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + if (*revents & POLLOUT && tcp->txbuf) { + ssize_t n = write(tcp->fd, tcp->txbuf->data, tcp->txbuf->size); + if (n < 0) { + if (errno != EAGAIN) { + VLOG_ERR("send: %s", strerror(errno)); + *revents |= POLLERR; + } + } else if (n > 0) { + buffer_pull(tcp->txbuf, n); + if (tcp->txbuf->size == 0) { + buffer_delete(tcp->txbuf); + tcp->txbuf = NULL; + } + } + if (tcp->txbuf) { + *revents &= ~POLLOUT; + } + } +} + +static int +tcp_recv(struct vconn *vconn, struct buffer **bufferp) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + struct buffer *rx; + size_t want_bytes; + ssize_t retval; + + if (tcp->rxbuf == NULL) { + tcp->rxbuf = buffer_new(1564); + } + rx = tcp->rxbuf; + +again: + if (sizeof(struct ofp_header) > rx->size) { + want_bytes = sizeof(struct ofp_header) - rx->size; + } else { + struct ofp_header *oh = rx->data; + size_t length = ntohs(oh->length); + if (length < sizeof(struct ofp_header)) { + VLOG_ERR("received too-short ofp_header (%zu bytes)", length); + return EPROTO; + } + want_bytes = length - rx->size; + } + buffer_reserve_tailroom(rx, want_bytes); + + retval = read(tcp->fd, buffer_tail(rx), want_bytes); + if (retval > 0) { + rx->size += retval; + if (retval == want_bytes) { + if (rx->size > sizeof(struct ofp_header)) { + *bufferp = rx; + tcp->rxbuf = NULL; + return 0; + } else { + goto again; + } + } + return EAGAIN; + } else if (retval == 0) { + return rx->size ? EPROTO : EOF; + } else { + return retval ? errno : EAGAIN; + } +} + +static int +tcp_send(struct vconn *vconn, struct buffer *buffer) +{ + struct tcp_vconn *tcp = tcp_vconn_cast(vconn); + ssize_t retval; + + if (tcp->txbuf) { + return EAGAIN; + } + + retval = write(tcp->fd, buffer->data, buffer->size); + if (retval == buffer->size) { + buffer_delete(buffer); + return 0; + } else if (retval >= 0 || errno == EAGAIN) { + tcp->txbuf = buffer; + if (retval > 0) { + buffer_pull(buffer, retval); + } + return 0; + } else { + return errno; + } +} + +struct vconn_class tcp_vconn_class = { + .name = "tcp", + .open = tcp_open, + .close = tcp_close, + .prepoll = tcp_prepoll, + .postpoll = tcp_postpoll, + .recv = tcp_recv, + .send = tcp_send, +}; + +/* Passive TCP. */ + +struct ptcp_vconn +{ + struct vconn vconn; + int fd; +}; + +static struct ptcp_vconn * +ptcp_vconn_cast(struct vconn *vconn) +{ + assert(vconn->class == &ptcp_vconn_class); + return CONTAINER_OF(vconn, struct ptcp_vconn, vconn); +} + +static int +ptcp_open(const char *name, char *suffix, struct vconn **vconnp) +{ + struct sockaddr_in sin; + struct ptcp_vconn *ptcp; + int retval; + int fd; + unsigned int yes = 1; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + VLOG_ERR("%s: socket: %s", name, strerror(errno)); + return errno; + } + + if ( setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,&yes,sizeof(yes)) < 0) { + VLOG_ERR("%s: setsockopt::SO_REUSEADDR: %s", name, strerror(errno)); + return errno; + } + + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(atoi(suffix) ? atoi(suffix) : OFP_TCP_PORT); + retval = bind(fd, (struct sockaddr *) &sin, sizeof sin); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: bind: %s", name, strerror(error)); + close(fd); + return error; + } + + retval = listen(fd, 10); + if (retval < 0) { + int error = errno; + VLOG_ERR("%s: listen: %s", name, strerror(error)); + close(fd); + return error; + } + + retval = set_nonblocking(fd); + if (retval) { + VLOG_ERR("%s: set_nonblocking: %s", name, strerror(retval)); + close(fd); + return retval; + } + + ptcp = xmalloc(sizeof *ptcp); + ptcp->vconn.class = &ptcp_vconn_class; + ptcp->fd = fd; + *vconnp = &ptcp->vconn; + return 0; +} + +static void +ptcp_close(struct vconn *vconn) +{ + struct ptcp_vconn *ptcp = ptcp_vconn_cast(vconn); + close(ptcp->fd); + free(ptcp); +} + +static void +ptcp_prepoll(struct vconn *vconn, int want, struct pollfd *pfd) +{ + struct ptcp_vconn *ptcp = ptcp_vconn_cast(vconn); + pfd->fd = ptcp->fd; + if (want & WANT_ACCEPT) { + pfd->events |= POLLIN; + } +} + +static int +ptcp_accept(struct vconn *vconn, struct vconn **new_vconnp) +{ + struct ptcp_vconn *ptcp = ptcp_vconn_cast(vconn); + int new_fd; + + new_fd = accept(ptcp->fd, NULL, NULL); + if (new_fd < 0) { + return errno; + } + + return new_tcp_vconn("tcp" /* FIXME */, new_fd, new_vconnp); +} + +struct vconn_class ptcp_vconn_class = { + .name = "ptcp", + .open = ptcp_open, + .close = ptcp_close, + .prepoll = ptcp_prepoll, + .accept = ptcp_accept, +}; + diff --git a/lib/vconn.c b/lib/vconn.c new file mode 100644 index 00000000..2fedc2eb --- /dev/null +++ b/lib/vconn.c @@ -0,0 +1,289 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vconn.h" +#include +#include +#include +#include +#include +#include +#include "buffer.h" +#include "flow.h" +#include "openflow.h" +#include "util.h" + +static struct vconn_class *vconn_classes[] = { + &tcp_vconn_class, + &ptcp_vconn_class, +#ifdef HAVE_NETLINK + &netlink_vconn_class, +#endif +}; + +/* Check the validity of the vconn class structures. */ +static void +check_vconn_classes(void) +{ +#ifndef NDEBUG + size_t i; + + for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) { + struct vconn_class *class = vconn_classes[i]; + assert(class->name != NULL); + assert(class->open != NULL); + assert(class->close != NULL); + assert(class->prepoll != NULL); + assert(class->accept + ? !class->recv && !class->send + : class->recv && class->send); + } +#endif +} + +/* Attempts to connect to an OpenFlow device. 'name' is a connection name in + * the form "TYPE:ARGS", where TYPE is the vconn class's name and ARGS are + * vconn class-specific. + * + * Returns 0 if successful, otherwise a positive errno value. If successful, + * stores a pointer to the new connection in '*vconnp', otherwise a null + * pointer. */ +int +vconn_open(const char *name, struct vconn **vconnp) +{ + size_t prefix_len; + size_t i; + + check_vconn_classes(); + + prefix_len = strcspn(name, ":"); + if (prefix_len == strlen(name)) { + fatal(0, "`%s' not correct format for peer name", name); + } + for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) { + struct vconn_class *class = vconn_classes[i]; + if (strlen(class->name) == prefix_len + && !memcmp(class->name, name, prefix_len)) { + char *suffix_copy = xstrdup(name + prefix_len + 1); + int retval = class->open(name, suffix_copy, vconnp); + free(suffix_copy); + if (retval) { + *vconnp = NULL; + } + return retval; + } + } + fatal(0, "unknown peer type `%.*s'", (int) prefix_len, name); + abort(); +} + +/* Closes 'vconn'. */ +void +vconn_close(struct vconn *vconn) +{ + if (vconn != NULL) { + (vconn->class->close)(vconn); + } +} + +/* Returns true if 'vconn' is a passive vconn, that is, its purpose is to + * wait for connections to arrive, not to transfer data. Returns false if + * 'vconn' is an active vconn, that is, its purpose is to transfer data, not + * to wait for new connections to arrive. */ +bool +vconn_is_passive(const struct vconn *vconn) +{ + return vconn->class->accept != NULL; +} + +/* Initializes 'pfd->fd' and 'pfd->events' appropriately so that poll() will + * wake up when the connection becomes available for the operations specified + * in 'want', or for performing the vconn's needed internal processing. */ +void +vconn_prepoll(struct vconn *vconn, int want, struct pollfd *pollfd) +{ + (vconn->class->prepoll)(vconn, want, pollfd); +} + +/* Perform any internal processing needed by the connections. The vconn file + * descriptor's status, as reported by poll(), must be provided in '*revents'. + * + * The postpoll function adjusts '*revents' to reflect the status of the + * connection from the caller's point of view. That is, upon return '*revents + * & POLLIN' indicates that a packet is (potentially) ready to be read (for an + * active vconn) or a new connection is ready to be accepted (for a passive + * vconn) and '*revents & POLLOUT' indicates that a packet is (potentially) + * ready to be written. */ +void +vconn_postpoll(struct vconn *vconn, short int *revents) +{ + if (vconn->class->postpoll) { + (vconn->class->postpoll)(vconn, revents); + } +} + +/* Tries to accept a new connection on 'vconn', which must be a passive vconn. + * If successful, stores the new connection in '*new_vconn' and returns 0. + * Otherwise, returns a positive errno value. + * + * vconn_accept will not block waiting for a connection. If no connection is + * ready to be accepted, it returns EAGAIN immediately. */ +int +vconn_accept(struct vconn *vconn, struct vconn **new_vconn) +{ + int retval = (vconn->class->accept)(vconn, new_vconn); + if (retval) { + *new_vconn = NULL; + } + return retval; +} + +/* Tries to receive an OpenFlow message from 'vconn', which must be an active + * vconn. If successful, stores the received message into '*msgp' and returns + * 0. The caller is responsible for destroying the message with + * buffer_delete(). On failure, returns a positive errno value and stores a + * null pointer into '*msgp'. On normal connection close, returns EOF. + * + * vconn_recv will not block waiting for a packet to arrive. If no packets + * have been received, it returns EAGAIN immediately. */ +int +vconn_recv(struct vconn *vconn, struct buffer **msgp) +{ + int retval = (vconn->class->recv)(vconn, msgp); + if (retval) { + *msgp = NULL; + } + return retval; +} + +/* Tries to queue 'msg' for transmission on 'vconn', which must be an active + * vconn. If successful, returns 0, in which case ownership of 'msg' is + * transferred to the vconn. Success does not guarantee that 'msg' has been or + * ever will be delivered to the peer, only that it has been queued for + * transmission. + * + * Returns a positive errno value on failure, in which case the caller + * retains ownership of 'msg'. + * + * vconn_send will not block. If 'msg' cannot be immediately accepted for + * transmission, it returns EAGAIN immediately. */ +int +vconn_send(struct vconn *vconn, struct buffer *msg) +{ + return (vconn->class->send)(vconn, msg); +} + +/* Same as vconn_send, except that it waits until 'msg' can be transmitted. */ +int +vconn_send_wait(struct vconn *vconn, struct buffer *msg) +{ + int retval; + while ((retval = vconn_send(vconn, msg)) == EAGAIN) { + struct pollfd pfd; + + pfd.fd = -1; + pfd.events = 0; + vconn_prepoll(vconn, WANT_SEND, &pfd); + do { + retval = poll(&pfd, 1, -1); + } while (retval < 0 && errno == EINTR); + if (retval < 0) { + return errno; + } + assert(retval == 1); + vconn_postpoll(vconn, &pfd.revents); + } + return retval; +} + +struct buffer * +make_add_simple_flow(const struct flow *flow, + uint32_t buffer_id, uint16_t out_port) +{ + struct ofp_flow_mod *ofm; + size_t size = sizeof *ofm + sizeof ofm->actions[0]; + struct buffer *out = buffer_new(size); + ofm = buffer_put_uninit(out, size); + memset(ofm, 0, size); + ofm->header.version = OFP_VERSION; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(size); + ofm->match.wildcards = htons(0); + ofm->match.in_port = flow->in_port; + memcpy(ofm->match.dl_src, flow->dl_src, sizeof ofm->match.dl_src); + memcpy(ofm->match.dl_dst, flow->dl_dst, sizeof ofm->match.dl_dst); + ofm->match.dl_vlan = flow->dl_vlan; + ofm->match.dl_type = flow->dl_type; + ofm->match.nw_src = flow->nw_src; + ofm->match.nw_dst = flow->nw_dst; + ofm->match.nw_proto = flow->nw_proto; + ofm->match.tp_src = flow->tp_src; + ofm->match.tp_dst = flow->tp_dst; + ofm->command = htons(OFPFC_ADD); + ofm->max_idle = htons(60); + ofm->buffer_id = htonl(buffer_id); + ofm->group_id = htonl(0); + ofm->actions[0].type = htons(OFPAT_OUTPUT); + ofm->actions[0].arg.output.max_len = htons(0); + ofm->actions[0].arg.output.port = htons(out_port); + return out; +} + +struct buffer * +make_unbuffered_packet_out(const struct buffer *packet, + uint16_t in_port, uint16_t out_port) +{ + struct ofp_packet_out *opo; + size_t size = sizeof *opo + packet->size; + struct buffer *out = buffer_new(size); + opo = buffer_put_uninit(out, size); + memset(opo, 0, sizeof *opo); + opo->header.version = OFP_VERSION; + opo->header.type = OFPT_PACKET_OUT; + opo->header.length = htons(size); + opo->buffer_id = htonl(UINT32_MAX); + opo->in_port = htons(in_port); + opo->out_port = htons(out_port); + memcpy(opo->u.data, packet->data, packet->size); + return out; +} + +struct buffer * +make_buffered_packet_out(uint32_t buffer_id, + uint16_t in_port, uint16_t out_port) +{ + struct ofp_packet_out *opo; + size_t size = sizeof *opo + sizeof opo->u.actions[0]; + struct buffer *out = buffer_new(size); + opo = buffer_put_uninit(out, size); + memset(opo, 0, size); + opo->header.version = OFP_VERSION; + opo->header.type = OFPT_PACKET_OUT; + opo->header.length = htons(size); + opo->buffer_id = htonl(buffer_id); + opo->in_port = htons(in_port); + opo->out_port = htons(out_port); + opo->u.actions[0].type = htons(OFPAT_OUTPUT); + opo->u.actions[0].arg.output.max_len = htons(0); + opo->u.actions[0].arg.output.port = htons(out_port); + return out; +} + diff --git a/lib/vlog-socket.c b/lib/vlog-socket.c new file mode 100644 index 00000000..b3c2a285 --- /dev/null +++ b/lib/vlog-socket.c @@ -0,0 +1,504 @@ +/* Copyright (C) 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vlog-socket.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fatal-signal.h" +#include "util.h" +#include "vlog.h" + +#ifndef SCM_CREDENTIALS +#include +#endif + +static int make_unix_socket(bool nonblock, bool passcred, + const char *bind_path, const char *connect_path); + +/* Server for Vlog control connection. */ +struct vlog_server { + char *path; + int fd; +}; + +/* Start listening for connections from clients and processing their + * requests. 'path' may be: + * + * - NULL, in which case the default socket path is used. (Only one + * Vlog_server_socket per process can use the default path.) + * + * - A name that does not start with '/', in which case it is appended to + * the default socket path. + * + * - An absolute path (starting with '/') that gives the exact name of + * the Unix domain socket to listen on. + * + * Returns 0 if successful, otherwise a positive errno value. If successful, + * sets '*serverp' to the new vlog_server, otherwise to NULL. */ +int +vlog_server_listen(const char *path, struct vlog_server **serverp) +{ + struct vlog_server *server = xmalloc(sizeof *server); + + if (path && path[0] == '/') { + server->path = xstrdup(path); + } else { + server->path = xasprintf("/tmp/vlogs.%ld%s", + (long int) getpid(), path ? path : ""); + } + + server->fd = make_unix_socket(true, true, server->path, NULL); + if (server->fd < 0) { + int fd = server->fd; + free(server->path); + free(server); + fprintf(stderr, "Could not initialize vlog configuration socket: %s\n", + strerror(-server->fd)); + *serverp = NULL; + return fd; + } + *serverp = server; + return 0; +} + +/* Destroys 'server' and stops listening for connections. */ +void +vlog_server_close(struct vlog_server *server) +{ + if (server) { + close(server->fd); + unlink(server->path); + fatal_signal_remove_file_to_unlink(server->path); + free(server->path); + free(server); + } +} + +/* Returns the fd used by 'server'. The caller can poll this fd (POLLIN) to + * determine when to call vlog_server_poll(). */ +int +vlog_server_get_fd(const struct vlog_server *server) +{ + return server->fd; +} + +static int +recv_with_creds(const struct vlog_server *server, + char *cmd_buf, size_t cmd_buf_size, + struct sockaddr_un *un, socklen_t *un_len) +{ +#ifdef SCM_CREDENTIALS + /* Read a message and control messages from 'fd'. */ + char cred_buf[CMSG_SPACE(sizeof(struct ucred))]; + ssize_t n; + struct iovec iov; + struct msghdr msg; + struct ucred* cred; + struct cmsghdr* cmsg; + + iov.iov_base = cmd_buf; + iov.iov_len = cmd_buf_size - 1; + + memset(&msg, 0, sizeof msg); + msg.msg_name = un; + msg.msg_namelen = sizeof *un; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cred_buf; + msg.msg_controllen = sizeof cred_buf; + + n = recvmsg(server->fd, &msg, 0); + *un_len = msg.msg_namelen; + if (n < 0) { + return errno; + } + cmd_buf[n] = '\0'; + + /* Ensure that the message has credentials ensuring that it was sent + * from the same user who started us, or by root. */ + cred = NULL; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_CREDENTIALS) { + cred = (struct ucred *) CMSG_DATA(cmsg); + } else if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS) { + /* Anyone can send us fds. If we don't close them, then that's + * a DoS: the sender can overflow our fd table. */ + int* fds = (int *) CMSG_DATA(cmsg); + size_t n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof *fds; + size_t i; + for (i = 0; i < n_fds; i++) { + close(fds[i]); + } + } + } + if (!cred) { + fprintf(stderr, "vlog: config message lacks credentials\n"); + return -1; + } else if (cred->uid && cred->uid != getuid()) { + fprintf(stderr, "vlog: config message uid=%ld is not 0 or %ld\n", + (long int) cred->uid, (long int) getuid()); + return -1; + } + + return 0; +#else /* !SCM_CREDENTIALS */ + socklen_t len; + ssize_t n; + struct stat s; + time_t recent; + + /* Receive a message. */ + len = sizeof *un; + n = recvfrom(server->fd, cmd_buf, cmd_buf_size - 1, 0, + (struct sockaddr *) un, &len); + *un_len = len; + if (n < 0) { + return errno; + } + cmd_buf[n] = '\0'; + + len -= offsetof(struct sockaddr_un, sun_path); + un->sun_path[len] = '\0'; + if (stat(un->sun_path, &s) < 0) { + fprintf(stderr, "vlog: config message from inaccessible socket: %s\n", + strerror(errno)); + return -1; + } + if (!S_ISSOCK(s.st_mode)) { + fprintf(stderr, "vlog: config message not from a socket\n"); + return -1; + } + recent = time(0) - 30; + if (s.st_atime < recent || s.st_ctime < recent || s.st_mtime < recent) { + fprintf(stderr, "vlog: config socket too old\n"); + return -1; + } + if (s.st_uid && s.st_uid != getuid()) { + fprintf(stderr, "vlog: config message uid=%ld is not 0 or %ld\n", + (long int) s.st_uid, (long int) getuid()); + return -1; + } + return 0; +#endif /* !SCM_CREDENTIALS */ +} + +/* Processes incoming requests for 'server'. */ +void +vlog_server_poll(struct vlog_server *server) +{ + for (;;) { + char cmd_buf[512]; + struct sockaddr_un un; + socklen_t un_len; + char *reply; + int error; + + error = recv_with_creds(server, cmd_buf, sizeof cmd_buf, &un, &un_len); + if (error > 0) { + if (error != EAGAIN && error != EWOULDBLOCK) { + fprintf(stderr, "vlog: reading configuration socket: %s", + strerror(errno)); + } + return; + } else if (error < 0) { + continue; + } + + /* Process message and send reply. */ + if (!strncmp(cmd_buf, "set ", 4)) { + char *msg = vlog_set_levels_from_string(cmd_buf + 4); + reply = msg ? msg : xstrdup("ack"); + } else if (!strcmp(cmd_buf, "list")) { + reply = vlog_get_levels(); + } else { + reply = xstrdup("nak"); + } + sendto(server->fd, reply, strlen(reply), 0, + (struct sockaddr*) &un, un_len); + free(reply); + } +} + +/* Client for Vlog control connection. */ + +struct vlog_client { + char *connect_path; + char *bind_path; + int fd; +}; + +/* Connects to a Vlog server socket. If 'path' does not start with '/', then + * it start with a PID as a string. If a non-null, non-absolute name was + * passed to Vlog_server_socket::listen(), then it must follow the PID in + * 'path'. If 'path' starts with '/', then it must be an absolute path that + * gives the exact name of the Unix domain socket to connect to. + * + * Returns 0 if successful, otherwise a positive errno value. If successful, + * sets '*clientp' to the new vlog_client, otherwise to NULL. */ +int +vlog_client_connect(const char *path, struct vlog_client **clientp) +{ + struct vlog_client *client; + int fd; + + client = xmalloc(sizeof *client); + client->connect_path = (path[0] == '/' + ? xstrdup(path) + : xasprintf("/tmp/vlogs.%s", path)); + + client->bind_path = xasprintf("/tmp/vlog.%ld", (long int) getpid()); + fd = make_unix_socket(false, false, + client->bind_path, client->connect_path); + + if (fd >= 0) { + client->fd = fd; + *clientp = client; + return 0; + } else { + free(client->connect_path); + free(client->bind_path); + free(client); + *clientp = NULL; + return errno; + } +} + +/* Destroys 'client'. */ +void +vlog_client_close(struct vlog_client *client) +{ + if (client) { + unlink(client->bind_path); + fatal_signal_remove_file_to_unlink(client->bind_path); + free(client->bind_path); + free(client->connect_path); + close(client->fd); + free(client); + } +} + +/* Sends 'request' to the server socket that 'client' is connected to. Returns + * 0 if successful, otherwise a positive errno value. */ +int +vlog_client_send(struct vlog_client *client, const char *request) +{ +#ifdef SCM_CREDENTIALS + struct ucred cred; + struct iovec iov; + char buf[CMSG_SPACE(sizeof cred)]; + struct msghdr msg; + struct cmsghdr* cmsg; + ssize_t nbytes; + + cred.pid = getpid(); + cred.uid = getuid(); + cred.gid = getgid(); + + iov.iov_base = (void*) request; + iov.iov_len = strlen(request); + + memset(&msg, 0, sizeof msg); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof buf; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof cred); + memcpy(CMSG_DATA(cmsg), &cred, sizeof cred); + msg.msg_controllen = cmsg->cmsg_len; + + nbytes = sendmsg(client->fd, &msg, 0); +#else /* !SCM_CREDENTIALS */ + ssize_t nbytes = send(client->fd, request, strlen(request), 0); +#endif /* !SCM_CREDENTIALS */ + if (nbytes > 0) { + return nbytes == strlen(request) ? 0 : ENOBUFS; + } else { + return errno; + } +} + +/* Attempts to receive a response from the server socket that 'client' is + * connected to. Returns 0 if successful, otherwise a positive errno value. + * If successful, sets '*reply' to the reply, which the caller must free, + * otherwise to NULL. */ +int +vlog_client_recv(struct vlog_client *client, char **reply) +{ + struct pollfd pfd; + int nfds; + char buffer[65536]; + ssize_t nbytes; + + *reply = NULL; + + pfd.fd = client->fd; + pfd.events = POLLIN; + nfds = poll(&pfd, 1, 1000); + if (nfds == 0) { + return ETIMEDOUT; + } else if (nfds < 0) { + return errno; + } + + nbytes = read(client->fd, buffer, sizeof buffer - 1); + if (nbytes < 0) { + return errno; + } else { + buffer[nbytes] = '\0'; + *reply = xstrdup(buffer); + return 0; + } +} + +/* Sends 'request' to the server socket and waits for a reply. Returns 0 if + * successful, otherwise to a positive errno value. If successful, sets + * '*reply' to the reply, which the caller must free, otherwise to NULL. */ +int +vlog_client_transact(struct vlog_client *client, + const char *request, char **reply) +{ + int i; + + /* Retry up to 3 times. */ + for (i = 0; i < 3; ++i) { + int error = vlog_client_send(client, request); + if (error) { + *reply = NULL; + return error; + } + error = vlog_client_recv(client, reply); + if (error != ETIMEDOUT) { + return error; + } + } + *reply = NULL; + return ETIMEDOUT; +} + +/* Returns the path of the server socket to which 'client' is connected. The + * caller must not modify or free the returned string. */ +const char * +vlog_client_target(const struct vlog_client *client) +{ + return client->connect_path; +} + +/* Helper functions. */ + +/* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in + * '*un_len' the size of the sockaddr_un. */ +static void +make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len) +{ + un->sun_family = AF_UNIX; + strncpy(un->sun_path, name, sizeof un->sun_path); + un->sun_path[sizeof un->sun_path - 1] = '\0'; + *un_len = (offsetof(struct sockaddr_un, sun_path) + + strlen (un->sun_path) + 1); +} + +/* Creates a Unix domain datagram socket that is bound to '*bind_path' (if + * 'bind_path' is non-null) and connected to '*connect_path' (if 'connect_path' + * is non-null). If 'nonblock' is true, the socket is made non-blocking. If + * 'passcred' is true, the socket is configured to receive SCM_CREDENTIALS + * control messages. + * + * Returns the socket's fd if successful, otherwise a negative errno value. */ +static int +make_unix_socket(bool nonblock, bool passcred UNUSED, + const char *bind_path, const char *connect_path) +{ + int error; + int fd; + + fd = socket(PF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + return -errno; + } + + if (nonblock) { + int flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + goto error; + } + if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) { + goto error; + } + } + + if (bind_path) { + struct sockaddr_un un; + socklen_t un_len; + make_sockaddr_un(bind_path, &un, &un_len); + if (unlink(un.sun_path) && errno != ENOENT) { + fprintf(stderr, "unlinking \"%s\": %s\n", + un.sun_path, strerror(errno)); + } + fatal_signal_add_file_to_unlink(bind_path); + if (bind(fd, (struct sockaddr*) &un, un_len) + || fchmod(fd, S_IRWXU)) { + goto error; + } + } + + if (connect_path) { + struct sockaddr_un un; + socklen_t un_len; + make_sockaddr_un(connect_path, &un, &un_len); + if (connect(fd, (struct sockaddr*) &un, un_len)) { + goto error; + } + } + +#ifdef SCM_CREDENTIALS + if (passcred) { + int enable = 1; + if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &enable, sizeof(enable))) { + goto error; + } + } +#endif + + return fd; + +error: + if (bind_path) { + fatal_signal_remove_file_to_unlink(bind_path); + } + error = errno; + close(fd); + return -error; +} diff --git a/lib/vlog.c b/lib/vlog.c new file mode 100644 index 00000000..66d57101 --- /dev/null +++ b/lib/vlog.c @@ -0,0 +1,309 @@ +/* Copyright (C) 2007, 2008 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vlog.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "dynamic-string.h" +#include "util.h" + +/* Name for each logging level. */ +static const char *level_names[VLL_N_LEVELS] = { + [VLL_EMER] = "EMER", + [VLL_ERR] = "ERR", + [VLL_WARN] = "WARN", + [VLL_DBG] = "DBG", +}; + +/* Name for each logging facility. */ +static const char *facility_names[VLF_N_FACILITIES] = { + [VLF_CONSOLE] = "console", + [VLF_SYSLOG] = "syslog", +}; + +/* Name for each logging module */ +static const char *module_names[VLM_N_MODULES] = { +#define VLOG_MODULE(NAME) #NAME, + VLOG_MODULES +#undef VLOG_MODULES +}; + +static int levels[VLM_N_MODULES][VLF_N_FACILITIES]; + +/* Searches the 'n_names' in 'names'. Returns the index of a match for + * 'target', or 'n_names' if no name matches. */ +static size_t +search_name_array(const char *target, const char **names, size_t n_names) +{ + size_t i; + + for (i = 0; i < n_names; i++) { + assert(names[i]); + if (!strcasecmp(names[i], target)) { + break; + } + } + return i; +} + +/* Returns the name for logging level 'level'. */ +const char * +vlog_get_level_name(enum vlog_level level) +{ + assert(level < VLL_N_LEVELS); + return level_names[level]; +} + +/* Returns the logging level with the given 'name', or VLL_N_LEVELS if 'name' + * is not the name of a logging level. */ +enum vlog_level +vlog_get_level_val(const char *name) +{ + return search_name_array(name, level_names, ARRAY_SIZE(level_names)); +} + +/* Returns the name for logging facility 'facility'. */ +const char * +vlog_get_facility_name(enum vlog_facility facility) +{ + assert(facility < VLF_N_FACILITIES); + return facility_names[facility]; +} + +/* Returns the logging facility named 'name', or VLF_N_FACILITIES if 'name' is + * not the name of a logging facility. */ +enum vlog_facility +vlog_get_facility_val(const char *name) +{ + return search_name_array(name, facility_names, ARRAY_SIZE(facility_names)); +} + +/* Returns the name for logging module 'module'. */ +const char *vlog_get_module_name(enum vlog_module module) +{ + assert(module < VLM_N_MODULES); + return module_names[module]; +} + +/* Returns the logging module named 'name', or VLM_N_MODULES if 'name' is not + * the name of a logging module. */ +enum vlog_module +vlog_get_module_val(const char *name) +{ + return search_name_array(name, module_names, ARRAY_SIZE(module_names)); +} + +/* Returns the current logging level for the given 'module' and 'facility'. */ +enum vlog_level +vlog_get_level(enum vlog_module module, enum vlog_facility facility) +{ + assert(module < VLM_N_MODULES); + assert(facility < VLF_N_FACILITIES); + return levels[module][facility]; +} + +static void +set_facility_level(enum vlog_facility facility, enum vlog_module module, + enum vlog_level level) +{ + assert(facility >= 0 && facility < VLF_N_FACILITIES); + assert(level < VLL_N_LEVELS); + + if (module == VLM_ANY_MODULE) { + for (module = 0; module < VLM_N_MODULES; module++) { + levels[module][facility] = level; + } + } else { + levels[module][facility] = level; + } +} + +/* Sets the logging level for the given 'module' and 'facility' to 'level'. */ +void +vlog_set_levels(enum vlog_module module, enum vlog_facility facility, + enum vlog_level level) +{ + assert(facility < VLF_N_FACILITIES || facility == VLF_ANY_FACILITY); + if (facility == VLF_ANY_FACILITY) { + for (facility = 0; facility < VLF_N_FACILITIES; facility++) { + set_facility_level(facility, module, level); + } + } else { + set_facility_level(facility, module, level); + } +} + +/* Set debugging levels: + * + * mod:facility:level mod2:facility:level ... + * + * Return null if successful, otherwise an error message that the caller must + * free(). + */ +char * +vlog_set_levels_from_string(const char *s_) +{ + char *save_ptr; + char *s = xstrdup(s_); + char *module, *level, *facility; + + for (module = strtok_r(s, ": \t", &save_ptr); module != NULL; + module = strtok_r(NULL, ": \t", &save_ptr)) { + enum vlog_module e_module; + enum vlog_level e_level; + enum vlog_facility e_facility; + + facility = strtok_r(NULL, ":", &save_ptr); + level = strtok_r(NULL, ":", &save_ptr); + if (level == NULL || facility == NULL) { + free(s); + return xstrdup("syntax error in level string"); + } + + if (!strcmp(module, "ANY")) { + e_module = VLM_ANY_MODULE; + } else { + e_module = vlog_get_module_val(module); + if (e_module >= VLM_N_MODULES) { + char *msg = xasprintf("unknown module \"%s\"", module); + free(s); + return msg; + } + } + + if (!strcmp(facility, "ANY")) { + e_facility = VLF_ANY_FACILITY; + } else { + e_facility = vlog_get_facility_val(facility); + if (e_facility >= VLF_N_FACILITIES) { + char *msg = xasprintf("unknown facility \"%s\"", facility); + free(s); + return msg; + } + } + + e_level = vlog_get_level_val(level); + if (e_level >= VLL_N_LEVELS) { + char *msg = xasprintf("unknown level \"%s\"", level); + free(s); + return msg; + } + + vlog_set_levels(e_module, e_facility, e_level); + } + free(s); + return NULL; +} + +/* If 'arg' is null, configure maximum verbosity. Otherwise, sets + * configuration according to 'arg' (see vlog_set_levels_from_string()). If + * parsing fails, default to maximum verbosity. */ +void +vlog_set_verbosity(const char *arg) +{ + if (arg == NULL || !vlog_set_levels_from_string(arg)) { + vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_DBG); + } +} + +/* Initializes the logging subsystem. */ +void +vlog_init(void) +{ + openlog(program_name, LOG_NDELAY, LOG_DAEMON); + vlog_set_levels(VLM_ANY_MODULE, VLF_CONSOLE, VLL_WARN); +} + +/* Closes the logging subsystem. */ +void +vlog_exit(void) +{ + closelog(); +} + +/* Print the current logging level for each module. */ +char * +vlog_get_levels(void) +{ + struct ds s = DS_EMPTY_INITIALIZER; + enum vlog_module module; + + ds_put_format(&s, " console syslog\n"); + ds_put_format(&s, " ------- ------\n"); + + for (module = 0; module < VLM_N_MODULES; module++) { + ds_put_format(&s, "%-16s %4s %4s\n", + vlog_get_module_name(module), + vlog_get_level_name(vlog_get_level(module, VLF_CONSOLE)), + vlog_get_level_name(vlog_get_level(module, VLF_SYSLOG))); + } + + return ds_cstr(&s); +} + +/* Writes 'message' to the log at the given 'level' and as coming from the + * given 'module'. */ +void +vlog(enum vlog_module module, enum vlog_level level, const char *message, ...) +{ + bool log_console = levels[module][VLF_CONSOLE] >= level; + bool log_syslog = levels[module][VLF_SYSLOG] >= level; + if (log_console || log_syslog) { + static int msg_num; + const char *module_name = vlog_get_module_name(module); + const char *level_name = vlog_get_level_name(level); + va_list args; + char s[1024]; + size_t len; + + len = sprintf(s, "%05d|%s|%s:", ++msg_num, module_name, level_name); + va_start(args, message); + len += vsnprintf(s + len, sizeof s - len, message, args); + va_end(args); + if (len >= sizeof s) { + len = sizeof s; + } + if (s[len - 1] == '\n') { + s[len - 1] = '\0'; + } + + if (log_console) { + fprintf(stderr, "%s\n", s); + } + + if (log_syslog) { + static const int syslog_levels[VLL_N_LEVELS] = { + [VLL_EMER] = LOG_EMERG, + [VLL_ERR] = LOG_ERR, + [VLL_WARN] = LOG_WARNING, + [VLL_DBG] = LOG_DEBUG, + }; + + syslog(syslog_levels[level], "%s", s); + } + } +} diff --git a/man/.gitignore b/man/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/man/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/man/Makefile.am b/man/Makefile.am new file mode 100644 index 00000000..9bc48012 --- /dev/null +++ b/man/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = man8 diff --git a/man/man1/.gitignore b/man/man1/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/man/man1/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/man/man8/.gitignore b/man/man8/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/man/man8/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am new file mode 100644 index 00000000..0f28df8c --- /dev/null +++ b/man/man8/Makefile.am @@ -0,0 +1 @@ +dist_man_MANS = controller.8 dpctl.8 secchan.8 vlogconf.8 diff --git a/man/man8/controller.8 b/man/man8/controller.8 new file mode 100644 index 00000000..4af54807 --- /dev/null +++ b/man/man8/controller.8 @@ -0,0 +1,69 @@ +.TH controller 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +controller \- OpenFlow controller reference implementation + +.SH SYNOPSIS +.B controller +[OPTIONS] ptcp:[\fIPORT\fR] | nl:\fIDP_IDX\fR + +.SH DESCRIPTION +A sample OpenFlow controller which functions as an L2 MAC-learning +switch or hub. \fBcontroller\fR can manage a remote datapath through +a secure channel (see \fBsecchan(8)\fR). It can also connect directly +to a local datapath via Netlink. + +To connect to local datapath number \fIDP_IDX\fR (Linux only), specify +nl:\fIDP_IDX\fR on the command line. To listen for TCP connections +from remote datapaths on port \fIPORT\fR, specify ptcp:[\fIPORT\fR]. +(\fIPORT\fR defaults to 975 if omitted.) + +\fBcontroller\fR can control multiple datapaths. Multiple ptcp: or +nl: arguments may be given. Multiple TCP clients may connect to a +single TCP server port. + +.SH OPTIONS +.TP +.BR \-H ", " \-\^\-hub +By default, the controller acts as an L2 MAC-learning switch. This +option changes its behavior to that of a hub that floods packets on +all ports. + +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +.BR \-n ", " \-\^\-noflow +This is similar to the \fB\-\^\-hub\fR option, but does not add a +flow entry in the switch. This causes all traffic seen by the switch +to be passed to the controller before being sent out all ports. + +.TP +.BR \-v ", " \-\^\-verbose +Prints debug messages to the console. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH EXAMPLES + +.TP +To connect directly to local datapath 0 over Netlink (Linux only): + +.B % controller nl:0 + +.TP +To bind locally to port 975 (the default) and wait for incoming connections from secure channels: + +.B % controller ptcp: + +.SH "SEE ALSO" + +.BR dpctl (8), +.BR secchan (8) +.BR vlogconf (8) + +.SH BUGS +Currently \fBcontroller\fR does not support SSL. diff --git a/man/man8/dpctl.8 b/man/man8/dpctl.8 new file mode 100644 index 00000000..e030c6d4 --- /dev/null +++ b/man/man8/dpctl.8 @@ -0,0 +1,141 @@ +.TH dpctl 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +dpctl \- command line tool to administer OpenFlow datapaths + +.SH SYNOPSIS +.B dpctl +[OPTIONS] COMMAND [ARGS...] + +.SH DESCRIPTION +The +.B dpctl +program is a command line tool through which OpenFlow datapaths on the +local host can be created, deleted, modified, and monitored. A single +machine may host up to 32 datapaths (numbered 0 to 31). In most +situations, a machine hosts only one datapath. + +A newly created datapath is not associated with any of the +host's network interfaces and thus does not process any incoming +traffic. To intercept and process traffic on a given interface, the +interface must be explicitly added to a datapath through the +\fBaddif\fR command. + +.SH OPTIONS +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +.BR \-v ", " \-\^\-verbose +Prints debug messages to the console. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH COMMANDS +.TP +.BI adddp " DP_IDX" +Creates datapath numbered \fIDP_IDX\fR on the local host. Will fail +if \fIDP_IDX\fR is not in the range 0 to 31, or if the datapath with +that number already exists on the host. + +.TP +.BI deldp " DP_IDX" +Deletes datapath \fIDP_IDX\fR on the local host. \fIDP_IDX\fR must be +an existing datapath. All of a datapath's interfaces must be +explicitly removed before the datapath can be deleted (see \fBdelif\fR +command). + +.TP +.BI show " DP_IDX" +Prints to the console information on datapath \fIDP_IDX\fR including +information on its flow tables and ports. + +.TP +.BI addif " DP_IDX INTERFACE" +Adds \fIINTERFACE\fR to the list of network interfaces datapath +\fIDP_IDX\fR monitors, where \fIDP_IDX\fR is the ID of an existing +datapath, and \fIINTERFACE\fR is the name of one of the host's +interfaces, e.g. \fBeth0\fR. Once an interface has been added +to a datapath, the datapath has complete ownership of the interface's +traffic and the interface appears silent to the rest of the system. + +.TP +.BI delif " DP_IDX INTERFACE" +Removes \fIINTERFACE\fR from the list of network interfaces datapath +\fIDP_IDX\fR monitors. + +.TP +.BI monitor " DP_IDX" +Prints to the console all OpenFlow packets sent by datapath +\fIDP_IDX\fR to its controller, where \fIDP_IDX\fR is the ID of an +existing datapath. + +.TP +.BI dump-tables " DP_IDX" +Prints to the console statistics for each of the flow tables used by +datapath \fIDP_IDX\fR, where \fIDP_IDX\fR is the ID of an existing +datapath. + +.TP +.BI dump-flows " DP_IDX TABLE_ID" +Prints to the console all flow entries in datapath \fIDP_IDX\fR's table +\fITABLE_ID\fR, where \fIDP_IDX\fR is the ID of an existing datapath, +and \fITABLE_ID\fR is the integer ID of one of the datapath's tables +as displayed in the output produced by \fBdump-tables\fR. + +.SH EXAMPLES + +A typical dpctl command sequence: +.nf +.TP +Create datapath numbered 0: + +.B % dpctl adddp 0 + +.TP +Add two interfaces to the new datapath: + +.B % dpctl addif 0 eth0 +.B % dpctl addif 0 eth1 + +.TP +Monitor traffic received by the datapath (exit with control-C): + +.B % dpctl monitor 0 + + +.TP +View the datapath's table stats after some traffic has passed through: + +.B % dpctl dump-tables 0 + +.TP +View the flow entries in one of the datapath's tables (shown is the command for the table 1). (This assumes that there is running controller adding flows to the flowtables) + +.B % dpctl dump-flows 0 1 + +.TP +Remote interfaces from the datapath when finished: + +.B % dpctl delif 0 eth0 +.B % dpctl delif 0 eth1 + +.TP +Delete the datapath: + +.B % dpctl deldp 0 +.fi +.SH "SEE ALSO" + +.BR secchan (8), +.BR controller (8) +.BR vlogconf (8) + +.SH BUGS + +dump-flows currently only prints the first action of each flow. This is +a shortcoming in the modules netlink flow query functionality and will +be addressed in future releases diff --git a/man/man8/secchan.8 b/man/man8/secchan.8 new file mode 100644 index 00000000..3f783319 --- /dev/null +++ b/man/man8/secchan.8 @@ -0,0 +1,47 @@ +.TH secchan 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +secchan \- secure channel connecting an OpenFlow datapath to a controller + +.SH SYNOPSIS +.B secchan +[OPTIONS] nl:\fIDP_IDX\fR tcp:\fICONTROLLER_IP\fR[:\fICONTROLLER_TCP_PORT\fR] + +.SH DESCRIPTION +The \fBsecchan\fR program sets up a secure channel between a local +OpenFlow datapath and a remote controller. \fBsecchan\fR connects to +the datapath over netlink and to the controller over TCP, and then +proceeds to forward packets from one endpoint to the other. + +\fIDP_IDX\fR \- the ID of the local datapath to connect to + +\fICONTROLLER_IP\fR \- the controller's IP address + +\fICONTROLLER_TCP_PORT\fR \- the controller's TCP port to connect to \- defaults to 975 + +.SH OPTIONS +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +.BR \-u ", " \-\^\-unreliable +Do not attempt to reconnect the channel if a connection drops. By +default, \fBsecchan\fR attempts to reconnect. + +.TP +.BR \-v ", " \-\^\-verbose +Prints debug messages to the console. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH "SEE ALSO" + +.BR dpctl (8), +.BR controller (8) +.BR vlogconf (8) + +.SH BUGS +Currently \fBsecchan\fR does not support SSL diff --git a/man/man8/vlogconf.8 b/man/man8/vlogconf.8 new file mode 100644 index 00000000..720a68cd --- /dev/null +++ b/man/man8/vlogconf.8 @@ -0,0 +1,45 @@ +.TH vlogconf 8 "December 2007" "OpenFlow" "OpenFlow Manual" + +.SH NAME +vlogconf \- configuration utility for OpenFlow logging in userspace + +.SH SYNOPSIS +\fBvlogconf\fR [OPTIONS] +.br +\fBvlogconf\fR \fIMODULE\fR \fIFACILITY\fR \fILEVEL\fR + +.SH DESCRIPTION +The \fBvlogconf\fR program configures the logging system used by the +OpenFlow userspace programs. The logging configuration may be modified +while OpenFlow programs are running. + +\fIMODULE\fR \- The module for which the logging level is being +modified. To see a list of supported modules, rerun \fBvlogconf\fR with +the \fI\-print\fR option. + +\fIFACILITY\fR \- The method of logging. Valid values are \fBSYSLOG\fR and +\fBCONSOLE\fR. + +\fILEVEL\fR \- The level with which the module should be logged. Valid +values are \fBDBG\fR (debug), \fBWARN\fR (warning), \fBERR\fR (error), +and \fBEMER\fR (emergency). + +.SH OPTIONS +.TP +\fB\-p\fR, \fB\-\^\-print\fR +Prints the current logging configuration. + +.TP +\fB\-a\fR, \fB\-\^\-all\fR \fIlevel\fR +Sets all modules and facilities to the specified level. + +.TP +\fB\-h\fR, \fB\-\^\-help\fR +Prints a brief help message to the console. + + +.SH "SEE ALSO" + +.BR dpctl (8), +.BR secchan (8), +.BR controller (8) diff --git a/secchan/.gitignore b/secchan/.gitignore new file mode 100644 index 00000000..b3cdd994 --- /dev/null +++ b/secchan/.gitignore @@ -0,0 +1,6 @@ +/Makefile +/Makefile.in +/controller-lite +/ctlpath-lite +/dpctl-lite +/secchan diff --git a/secchan/Makefile.am b/secchan/Makefile.am new file mode 100644 index 00000000..bfb0d262 --- /dev/null +++ b/secchan/Makefile.am @@ -0,0 +1,6 @@ +include ../Make.vars + +bin_PROGRAMS = secchan + +secchan_SOURCES = secchan.c +secchan_LDADD = ../lib/libopenflow.la diff --git a/secchan/secchan.c b/secchan/secchan.c new file mode 100644 index 00000000..b8a4c4c3 --- /dev/null +++ b/secchan/secchan.c @@ -0,0 +1,256 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "buffer.h" +#include "command-line.h" +#include "compiler.h" +#include "fault.h" +#include "util.h" +#include "vconn.h" +#include "vlog-socket.h" +#include "openflow.h" + +#include "vlog.h" +#define THIS_MODULE VLM_secchan + +static void parse_options(int argc, char *argv[]); +static void usage(void) NO_RETURN; + +static bool reliable = true; + +struct half { + const char *name; + struct vconn *vconn; + struct pollfd *pollfd; + struct buffer *rxbuf; +}; + +static void reconnect(struct half *); + +int +main(int argc, char *argv[]) +{ + struct half halves[2]; + struct pollfd pollfds[2 + 1]; + struct vlog_server *vlog_server; + int retval; + int i; + + set_program_name(argv[0]); + register_fault_handlers(); + vlog_init(); + parse_options(argc, argv); + + if (argc - optind != 2) { + fatal(0, "exactly two peer arguments required; use --help for usage"); + } + + retval = vlog_server_listen(NULL, &vlog_server); + if (retval) { + fatal(retval, "Could not listen for vlog connections"); + } + + for (i = 0; i < 2; i++) { + halves[i].name = argv[optind + i]; + halves[i].vconn = NULL; + halves[i].pollfd = &pollfds[i]; + halves[i].rxbuf = NULL; + reconnect(&halves[i]); + } + for (;;) { + /* Wait until there's something to do. */ + for (i = 0; i < 2; i++) { + struct half *this = &halves[i]; + struct half *peer = &halves[!i]; + int want = 0; + if (peer->rxbuf) { + want |= WANT_SEND; + } + if (!this->rxbuf) { + want |= WANT_RECV; + } + this->pollfd->fd = -1; + this->pollfd->events = 0; + vconn_prepoll(this->vconn, want, this->pollfd); + } + if (vlog_server) { + pollfds[2].fd = vlog_server_get_fd(vlog_server); + pollfds[2].events = POLLIN; + } + do { + retval = poll(pollfds, 2 + (vlog_server != NULL), -1); + } while (retval < 0 && errno == EINTR); + if (retval <= 0) { + fatal(retval < 0 ? errno : 0, "poll"); + } + + /* Let each connection deal with any pending operations. */ + for (i = 0; i < 2; i++) { + struct half *this = &halves[i]; + vconn_postpoll(this->vconn, &this->pollfd->revents); + if (this->pollfd->revents & POLLERR) { + this->pollfd->revents |= POLLIN | POLLOUT; + } + } + if (vlog_server && pollfds[2].revents) { + vlog_server_poll(vlog_server); + } + + /* Do as much work as we can without waiting. */ + for (i = 0; i < 2; i++) { + struct half *this = &halves[i]; + struct half *peer = &halves[!i]; + + if (this->pollfd->revents & POLLIN && !this->rxbuf) { + retval = vconn_recv(this->vconn, &this->rxbuf); + if (retval && retval != EAGAIN) { + VLOG_DBG("%s: recv: closing connection: %s", + this->name, strerror(retval)); + reconnect(this); + break; + } + } + + if (peer->pollfd->revents & POLLOUT && this->rxbuf) { + retval = vconn_send(peer->vconn, this->rxbuf); + if (!retval) { + this->rxbuf = NULL; + } else if (retval != EAGAIN) { + VLOG_DBG("%s: send: closing connection: %s", + peer->name, strerror(retval)); + reconnect(peer); + break; + } + } + } + } + + return 0; +} + +static void +reconnect(struct half *this) +{ + int backoff; + + if (this->vconn != NULL) { + if (!reliable) { + fatal(0, "%s: connection dropped", this->name); + } + + VLOG_WARN("%s: connection dropped, reconnecting", this->name); + vconn_close(this->vconn); + this->vconn = NULL; + buffer_delete(this->rxbuf); + this->rxbuf = NULL; + } + this->pollfd->revents = POLLIN | POLLOUT; + + for (backoff = 1; ; backoff = MIN(backoff * 2, 60)) { + int retval = vconn_open(this->name, &this->vconn); + if (!retval) { + VLOG_WARN("%s: connected", this->name); + if (vconn_is_passive(this->vconn)) { + fatal(0, "%s: passive vconn not supported in control path", + this->name); + } + return; + } + + if (!reliable) { + fatal(0, "%s: connection failed", this->name); + } + VLOG_WARN("%s: connection failed (%s), reconnecting", + this->name, strerror(errno)); + sleep(backoff); + } +} + +static void +parse_options(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"unreliable", no_argument, 0, 'u'}, + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'u': + reliable = false; + break; + + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: Secure Channel\n" + "usage: %s [OPTIONS] nl:DP_ID tcp:HOST:[PORT]\n" + "\nConnects to local datapath DP_ID via Netlink and \n" + "controller on HOST via TCP to PORT (default: %d).\n" + "\nNetworking options:\n" + " -u, --unreliable do not reconnect after connections drop\n" + "\nOther options:\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + program_name, program_name, OFP_TCP_PORT); + exit(EXIT_SUCCESS); +} diff --git a/third-party/.gitignore b/third-party/.gitignore new file mode 100644 index 00000000..b336cc7c --- /dev/null +++ b/third-party/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/third-party/Makefile.am b/third-party/Makefile.am new file mode 100644 index 00000000..587a7e8c --- /dev/null +++ b/third-party/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST = README ofp-tcpdump.patch diff --git a/third-party/README b/third-party/README new file mode 100644 index 00000000..2621cdc3 --- /dev/null +++ b/third-party/README @@ -0,0 +1,35 @@ +This directory contains third-party software that may be useful for +debugging. + +tcpdump +------- +The "ofp-tcpdump.patch" patch adds the ability to parse OpenFlow +messages to tcpdump. These instructions assume that tcpdump 3.9.8 +is going to be used, but it should work with other versions that are not +substantially different. To begin, download tcpdump and apply the +patch: + + wget http://www.tcpdump.org/release/tcpdump-3.9.8.tar.gz + tar xzf tcpdump-3.9.8.tar.gz + ln -s tcpdump-3.9.8 tcpdump + patch -p0 < ofp-tcpdump.patch + +Then build the new version of tcpdump: + + cd tcpdump + ./configure + make + +Clearly, tcpdump can only parse unencrypted packets, so you will need to +connect the controller and datapath using plain TCP. To look at the +traffic, tcpdump will be started in a manner similar to the following: + + sudo ./tcpdump -s0 -i eth0 port 975 + +The "-s0" flag indicates that tcpdump should capture the entire packet. +If the OpenFlow message is not received in its entirety, "[|openflow]" will +be printed instead of the OpenFlow message contents. + +The verbosity of the output may be increased by adding additional "-v" +flags. If "-vvv" is used, the raw OpenFlow data is also printed in +hex and ASCII. diff --git a/third-party/ofp-tcpdump.patch b/third-party/ofp-tcpdump.patch new file mode 100644 index 00000000..9ee241cf --- /dev/null +++ b/third-party/ofp-tcpdump.patch @@ -0,0 +1,119 @@ +diff -rNu tcpdump/interface.h tcpdump/interface.h +--- tcpdump/interface.h 2007-06-13 18:03:20.000000000 -0700 ++++ tcpdump/interface.h 2008-02-06 15:06:30.000000000 -0800 +@@ -148,7 +148,8 @@ + + extern const char *dnaddr_string(u_short); + +-extern void error(const char *, ...) ++#define error(fmt, args...) tcpdump_error(fmt, ## args) ++extern void tcpdump_error(const char *, ...) + __attribute__((noreturn, format (printf, 1, 2))); + extern void warning(const char *, ...) __attribute__ ((format (printf, 1, 2))); + +@@ -176,6 +177,7 @@ + extern void hex_print_with_offset(const char *, const u_char *, u_int, u_int); + extern void hex_print(const char *, const u_char *, u_int); + extern void telnet_print(const u_char *, u_int); ++extern void openflow_print(const u_char *, u_int); + extern int ether_encap_print(u_short, const u_char *, u_int, u_int, u_short *); + extern int llc_print(const u_char *, u_int, u_int, const u_char *, + const u_char *, u_short *); +diff -rNu tcpdump/Makefile.in tcpdump/Makefile.in +--- tcpdump/Makefile.in 2007-09-25 18:59:52.000000000 -0700 ++++ tcpdump/Makefile.in 2008-02-07 11:46:03.000000000 -0800 +@@ -49,10 +49,10 @@ + CFLAGS = $(CCOPT) $(DEFS) $(INCLS) + + # Standard LDFLAGS +-LDFLAGS = @LDFLAGS@ ++LDFLAGS = @LDFLAGS@ -L../../lib/.libs + + # Standard LIBS +-LIBS = @LIBS@ ++LIBS = @LIBS@ -lopenflow + + INSTALL = @INSTALL@ + INSTALL_PROGRAM = @INSTALL_PROGRAM@ +@@ -87,7 +87,8 @@ + print-slow.c print-snmp.c print-stp.c print-sunatm.c print-sunrpc.c \ + print-symantec.c print-syslog.c print-tcp.c print-telnet.c print-tftp.c \ + print-timed.c print-token.c print-udp.c print-vjc.c print-vrrp.c \ +- print-wb.c print-zephyr.c setsignal.c tcpdump.c util.c ++ print-wb.c print-zephyr.c setsignal.c tcpdump.c util.c \ ++ print-openflow.c + + LOCALSRC = @LOCALSRC@ + GENSRC = version.c +diff -rNu tcpdump/print-openflow.c tcpdump/print-openflow.c +--- tcpdump/print-openflow.c 1969-12-31 16:00:00.000000000 -0800 ++++ tcpdump/print-openflow.c 2008-02-07 11:29:01.000000000 -0800 +@@ -0,0 +1,46 @@ ++/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include ++ ++#include "interface.h" ++#include "../../include/openflow.h" ++#include "../../include/ofp-print.h" ++ ++void ++openflow_print(const u_char *sp, u_int length) ++{ ++ const struct ofp_header *ofp = (struct ofp_header *)sp; ++ ++ if (!TTEST2(*sp, ntohs(ofp->length))) ++ goto trunc; ++ ++ ofp_print(stdout, sp, length, vflag); ++ return; ++ ++trunc: ++ printf("[|openflow]"); ++} +diff -rNu tcpdump/print-tcp.c tcpdump/print-tcp.c +--- tcpdump/print-tcp.c 2006-09-19 12:07:57.000000000 -0700 ++++ tcpdump/print-tcp.c 2008-02-07 13:07:58.000000000 -0800 +@@ -52,6 +52,8 @@ + + #include "nameser.h" + ++#include "../../include/openflow.h" ++ + #ifdef HAVE_LIBCRYPTO + #include + +@@ -680,7 +682,8 @@ + } + else if (length > 0 && (sport == LDP_PORT || dport == LDP_PORT)) { + ldp_print(bp, length); +- } ++ } else if (sport == OFP_TCP_PORT || dport == OFP_TCP_PORT) ++ openflow_print(bp, length); + } + return; + bad: diff --git a/utilities/.gitignore b/utilities/.gitignore new file mode 100644 index 00000000..b2c322f2 --- /dev/null +++ b/utilities/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/dpctl +/vlogconf diff --git a/utilities/Makefile.am b/utilities/Makefile.am new file mode 100644 index 00000000..8367bac9 --- /dev/null +++ b/utilities/Makefile.am @@ -0,0 +1,12 @@ +include ../Make.vars + +bin_PROGRAMS = vlogconf +if HAVE_NETLINK +bin_PROGRAMS += dpctl +endif + +dpctl_SOURCES = dpctl.c +dpctl_LDADD = ../lib/libopenflow.la + +vlogconf_SOURCES = vlogconf.c +vlogconf_LDADD = ../lib/libopenflow.la diff --git a/utilities/dpctl.c b/utilities/dpctl.c new file mode 100644 index 00000000..1bf36996 --- /dev/null +++ b/utilities/dpctl.c @@ -0,0 +1,519 @@ +/* Copyright (C) 2007 Board of Trustees, Leland Stanford Jr. University. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "compiler.h" +#include "buffer.h" +#include "dpif.h" +#ifdef HAVE_NETLINK +#include "netlink.h" +#include "openflow-netlink.h" +#endif +#include "util.h" +#include "socket-util.h" +#include "openflow.h" +#include "ofp-print.h" +#include "vconn.h" + +#include "vlog.h" +#define THIS_MODULE VLM_DPCTL + +static const char* ifconfigbin = "/sbin/ifconfig"; + +struct command { + const char *name; + int min_args; + int max_args; + void (*handler)(int argc, char *argv[]); +}; + +static struct command all_commands[]; + +static void usage(void) NO_RETURN; +static void parse_options(int argc, char *argv[]); + +int main(int argc, char *argv[]) +{ + struct command *p; + + set_program_name(argv[0]); + vlog_init(); + parse_options(argc, argv); + + argc -= optind; + argv += optind; + if (argc < 1) + fatal(0, "missing command name; use --help for help"); + + for (p = all_commands; p->name != NULL; p++) { + if (!strcmp(p->name, argv[0])) { + int n_arg = argc - 1; + if (n_arg < p->min_args) + fatal(0, "'%s' command requires at least %d arguments", + p->name, p->min_args); + else if (n_arg > p->max_args) + fatal(0, "'%s' command takes at most %d arguments", + p->name, p->max_args); + else { + p->handler(argc, argv); + exit(0); + } + } + } + fatal(0, "unknown command '%s'; use --help for help", argv[0]); + + return 0; +} + +static void +parse_options(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'h': + usage(); + + case 'V': + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: Datapath Utility\n" + "usage: %s [OPTIONS] COMMAND [ARG...]\n" + "\nAvailable commands:\n" + " adddp DP_ID add a new datapath with ID DP_ID\n" + " deldp DP_ID delete datapath DP_ID\n" + " show DP show information about DP\n" + " addif DP_ID IFACE add IFACE as a port on DP_ID\n" + " delif DP_ID IFACE delete IFACE as a port on DP_ID\n" + " monitor DP_ID print packets received on DP_ID\n" + " dump-tables DP_ID print stats for all tables in DP_ID\n" + " dump-flows DP_ID T_ID print all flow entries in table T_ID of DP_ID\n" + " dump-flows DP_ID T_ID FLOW print matching FLOWs in table T_ID of DP_ID\n" + " add-flows DP FILE add flows from FILE to DP\n" + " benchmark-nl DP_ID N SIZE send N packets of SIZE bytes up netlink\n" + "\nOptions:\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + program_name, program_name); + exit(EXIT_SUCCESS); +} + +static void run(int retval, const char *name) +{ + if (retval) { + fatal(retval, "%s", name); + } +} + +static int if_up(const char* intf) +{ + char command[256]; + snprintf(command, sizeof command, "%s %s up &> /dev/null", + ifconfigbin, intf); + return system(command); +} + +static void do_add_dp(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_add_dp(&dp), "add_dp"); + dpif_close(&dp); +} + +static void do_del_dp(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_del_dp(&dp), "del_dp"); + dpif_close(&dp); +} + +static void do_show(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_show(&dp), "show"); + dpif_close(&dp); +} + +static void do_add_port(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + if_up(argv[2]); + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_add_port(&dp, argv[2]), "add_port"); + dpif_close(&dp); +} + +static void do_del_port(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_del_port(&dp, argv[2]), "del_port"); + dpif_close(&dp); +} + +#define BENCHMARK_INCR 100 + +static void do_benchmark_nl(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + uint32_t num_packets, i, milestone; + struct timeval start, end; + + run(dpif_open(atoi(argv[1]), true, &dp), "dpif_open"); + num_packets = atoi(argv[2]); + milestone = BENCHMARK_INCR; + run(dpif_benchmark_nl(&dp, num_packets, atoi(argv[3])), "benchmark_nl"); + if (gettimeofday(&start, NULL) == -1) { + run(errno, "gettimeofday"); + } + for (i = 0; i < num_packets;i++) { + struct buffer *b; + run(dpif_recv_openflow(&dp, &b, true), "dpif_recv_openflow"); + if (i == milestone) { + gettimeofday(&end, NULL); + printf("%u packets received in %f ms\n", + BENCHMARK_INCR, + (1000*(double)(end.tv_sec - start.tv_sec)) + + (.001*(end.tv_usec - start.tv_usec))); + milestone += BENCHMARK_INCR; + start = end; + } + buffer_delete(b); + } + gettimeofday(&end, NULL); + printf("%u packets received in %f ms\n", + i - (milestone - BENCHMARK_INCR), + (1000*(double)(end.tv_sec - start.tv_sec)) + + (.001*(end.tv_usec - start.tv_usec))); + + dpif_close(&dp); +} + +static void do_monitor(int argc UNUSED, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), true, &dp), "dpif_open"); + for (;;) { + struct buffer *b; + run(dpif_recv_openflow(&dp, &b, true), "dpif_recv_openflow"); + ofp_print(stderr, b->data, b->size, 2); + buffer_delete(b); + } +} + +static void do_dump_tables(int argc, char *argv[]) +{ + struct dpif dp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + run(dpif_dump_tables(&dp), "dump_tables"); + dpif_close(&dp); +} + + +static uint32_t +str_to_int(const char *str) +{ + uint32_t value; + if (sscanf(str, "%"SCNu32, &value) != 1) { + fatal(0, "invalid numeric format %s", str); + } + return value; +} + +static void +str_to_mac(const char *str, uint8_t mac[6]) +{ + if (sscanf(str, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8, + &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) { + fatal(0, "invalid mac address %s", str); + } +} + +static void +str_to_ip(const char *str, uint32_t *ip) +{ + struct in_addr in_addr; + int retval; + + retval = lookup_ip(str, &in_addr); + if (retval) { + fatal(0, "%s: could not convert to IP address", str); + } + *ip = in_addr.s_addr; +} + +static void +str_to_action(const char *str, struct ofp_action *action) +{ + uint16_t port; + + if (!strcasecmp(str, "flood")) { + port = OFPP_FLOOD; + } else if (!strcasecmp(str, "controller")) { + port = OFPP_CONTROLLER; + } else { + port = str_to_int(str); + } + + memset(action, 0, sizeof *action); + action->type = OFPAT_OUTPUT; + action->arg.output.port = htons(port); +} + +static void +str_to_flow(char *string, struct ofp_match *match, struct ofp_action *action) +{ + struct field { + const char *name; + uint32_t wildcard; + enum { F_U8, F_U16, F_MAC, F_IP } type; + size_t offset; + }; + +#define F_OFS(MEMBER) offsetof(struct ofp_match, MEMBER) + static const struct field fields[] = { + { "in_port", OFPFW_IN_PORT, F_U16, F_OFS(in_port) }, + { "dl_vlan", OFPFW_DL_VLAN, F_U16, F_OFS(dl_vlan) }, + { "dl_src", OFPFW_DL_SRC, F_MAC, F_OFS(dl_src) }, + { "dl_dst", OFPFW_DL_DST, F_MAC, F_OFS(dl_dst) }, + { "dl_type", OFPFW_DL_TYPE, F_U16, F_OFS(dl_type) }, + { "nw_src", OFPFW_NW_SRC, F_IP, F_OFS(nw_src) }, + { "nw_dst", OFPFW_NW_DST, F_IP, F_OFS(nw_dst) }, + { "nw_proto", OFPFW_NW_PROTO, F_U8, F_OFS(nw_proto) }, + { "tp_src", OFPFW_TP_SRC, F_U16, F_OFS(tp_src) }, + { "tp_dst", OFPFW_TP_DST, F_U16, F_OFS(tp_dst) }, + }; + + char *name, *value; + uint32_t wildcards; + bool got_action = false; + + memset(match, 0, sizeof *match); + wildcards = OFPFW_ALL; + for (name = strtok(string, "="), value = strtok(NULL, " \t\n"); + name && value; + name = strtok(NULL, "="), value = strtok(NULL, " \t\n")) + { + const struct field *f; + void *data; + + if (action && !strcmp(name, "action")) { + got_action = true; + str_to_action(value, action); + continue; + } + + for (f = fields; f < &fields[ARRAY_SIZE(fields)]; f++) { + if (!strcmp(f->name, name)) { + goto found; + } + } + fprintf(stderr, "%s: unknown field %s (fields are", + program_name, name); + for (f = fields; f < &fields[ARRAY_SIZE(fields)]; f++) { + if (f != fields) { + putc(',', stderr); + } + fprintf(stderr, " %s", f->name); + } + fprintf(stderr, ")\n"); + exit(1); + + found: + data = (char *) match + f->offset; + if (!strcmp(value, "*")) { + wildcards |= f->wildcard; + } else { + wildcards &= ~f->wildcard; + if (f->type == F_U8) { + *(uint8_t *) data = str_to_int(value); + } else if (f->type == F_U16) { + *(uint16_t *) data = htons(str_to_int(value)); + } else if (f->type == F_MAC) { + str_to_mac(value, data); + } else if (f->type == F_IP) { + str_to_ip(value, data); + } else { + NOT_REACHED(); + } + } + } + if (name && !value) { + fatal(0, "field %s missing value", name); + } + if (action && !got_action) { + fatal(0, "must specify an action"); + } + match->wildcards = htons(wildcards); +} + +static void do_dump_flows(int argc, char *argv[]) +{ + struct dpif dp; + struct ofp_match match, *matchp; + run(dpif_open(atoi(argv[1]), false, &dp), "dpif_open"); + if (argc == 4) { + str_to_flow(argv[3], &match, NULL); + matchp = &match; + } else { + matchp = NULL; + } + run(dpif_dump_flows(&dp, atoi(argv[2]), matchp), "dump_flows"); + dpif_close(&dp); +} + +static void do_add_flows(int argc, char *argv[]) +{ + struct vconn *vconn; + char vconn_name[16]; + + FILE *file; + char line[1024]; + + int retval; + + file = fopen(argv[2], "r"); + if (file == NULL) { + fatal(errno, "%s: open", argv[2]); + } + + sprintf(vconn_name, "nl:%d", atoi(argv[1])); + retval = vconn_open(vconn_name, &vconn); + if (retval) { + fatal(retval, "opening datapath"); + } + + while (fgets(line, sizeof line, file)) { + struct buffer *buffer; + struct ofp_flow_mod *ofm; + size_t size; + + char *comment; + + /* Delete comments. */ + comment = strchr(line, '#'); + if (comment) { + *comment = '\0'; + } + + /* Drop empty lines. */ + if (line[strspn(line, " \t\n")] == '\0') { + continue; + } + + size = sizeof *ofm + sizeof ofm->actions[0]; + buffer = buffer_new(size); + ofm = buffer_put_uninit(buffer, size); + + /* Parse. */ + memset(ofm, 0, size); + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.version = OFP_VERSION; + ofm->header.length = htons(size); + ofm->command = htons(OFPFC_ADD); + ofm->max_idle = htons(50); + ofm->buffer_id = htonl(UINT32_MAX); + ofm->group_id = htonl(0); + str_to_flow(line, &ofm->match, &ofm->actions[0]); + + retval = vconn_send_wait(vconn, buffer); + if (retval) { + fatal(retval, "sending to datapath"); + } + } + vconn_close(vconn); + fclose(file); +} + +static void do_help(int argc UNUSED, char *argv[] UNUSED) +{ + usage(); +} + +static struct command all_commands[] = { + { "add-dp", 1, 1, do_add_dp }, + { "adddp", 1, 1, do_add_dp }, + + { "del-dp", 1, 1, do_del_dp }, + { "deldp", 1, 1, do_del_dp }, + + { "show", 1, 1, do_show }, + + { "add-port", 2, 2, do_add_port }, + { "addif", 2, 2, do_add_port }, + + { "del-port", 2, 2, do_del_port }, + { "delif", 2, 2, do_del_port }, + + { "help", 0, INT_MAX, do_help }, + { "monitor", 1, 1, do_monitor }, + { "dump-tables", 1, 1, do_dump_tables }, + { "dump-flows", 2, 3, do_dump_flows }, + { "add-flows", 2, 2, do_add_flows }, + + { "benchmark-nl", 3, 3, do_benchmark_nl }, +}; diff --git a/utilities/vlogconf.c b/utilities/vlogconf.c new file mode 100644 index 00000000..6ffe99de --- /dev/null +++ b/utilities/vlogconf.c @@ -0,0 +1,185 @@ +#include "vlog.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "compiler.h" +#include "util.h" +#include "vlog-socket.h" + +void +usage(char *prog_name, int exit_code) +{ + printf("Usage: %s [TARGET] [ACTION...]\n" + "Targets:\n" + " -a, --all Apply to all targets (default)\n" + " -t, --target=TARGET Specify target program, as a pid or an\n" + " absolute path to a Unix domain socket\n" + "Actions:\n" + " -l, --list List current settings\n" + " -s, --set=MODULE:FACILITY:LEVEL\n" + " Set MODULE and FACILITY log level to LEVEL\n" + " MODULE may be any valid module name or 'ANY'\n" + " FACILITY may be 'syslog' or 'console' or 'ANY'\n" + " LEVEL may be 'emer', 'err', 'warn', or 'dbg'\n" + " -h, --help Print this helpful information\n", + prog_name); + exit(exit_code); +} + +static char * +transact(struct vlog_client *client, const char *request, bool *ok) +{ + char *reply; + int error = vlog_client_transact(client, request, &reply); + if (error) { + fprintf(stderr, "%s: transaction error: %s\n", + vlog_client_target(client), strerror(error)); + *ok = false; + } + return reply ? reply : xstrdup(""); +} + +static void +transact_ack(struct vlog_client *client, const char* request, bool *ok) +{ + char *reply; + int error = vlog_client_transact(client, request, &reply); + if (error) { + fprintf(stderr, "%s: transaction error: %s\n", + vlog_client_target(client), strerror(error)); + *ok = false; + } else if (strcmp(reply, "ack")) { + fprintf(stderr, "Received unexpected reply from %s: %s\n", + vlog_client_target(client), reply); + *ok = false; + } + free(reply); +} + +static void +add_target(struct vlog_client ***clients, size_t *n_clients, + const char *path, bool *ok) +{ + struct vlog_client *client; + int error = vlog_client_connect(path, &client); + if (error) { + fprintf(stderr, "Error connecting to \"%s\": %s\n", + path, strerror(error)); + *ok = false; + } else { + *clients = xrealloc(*clients, sizeof *clients * (*n_clients + 1)); + (*clients)[*n_clients] = client; + ++*n_clients; + } +} + +static void +add_all_targets(struct vlog_client ***clients, size_t *n_clients, bool *ok) +{ + DIR *directory; + struct dirent* de; + + directory = opendir("/tmp"); + if (!directory) { + fprintf(stderr, "/tmp: opendir: %s\n", strerror(errno)); + } + + while ((de = readdir(directory)) != NULL) { + if (!strncmp(de->d_name, "vlogs.", 5)) { + char *path = xasprintf("/tmp/%s", de->d_name); + add_target(clients, n_clients, path, ok); + free(path); + } + } + + closedir(directory); +} + +int main(int argc, char *argv[]) +{ + static const struct option long_options[] = { + /* Target options must come first. */ + {"all", no_argument, NULL, 'a'}, + {"target", required_argument, NULL, 't'}, + {"help", no_argument, NULL, 'h'}, + + /* Action options come afterward. */ + {"list", no_argument, NULL, 'l'}, + {"set", required_argument, NULL, 's'}, + {0, 0, 0, 0}, + }; + char *short_options; + + /* Determine targets. */ + bool ok = true; + int n_actions = 0; + struct vlog_client **clients = NULL; + size_t n_clients = 0; + + set_program_name(argv[0]); + + short_options = long_options_to_short_options(long_options); + for (;;) { + int option; + size_t i; + + option = getopt_long(argc, argv, short_options, long_options, NULL); + if (option == -1) { + break; + } + if (!strchr("ath", option) && n_clients == 0) { + fatal(0, "no targets specified (use --help for help)"); + } else { + ++n_actions; + } + switch (option) { + case 'a': + add_all_targets(&clients, &n_clients, &ok); + break; + + case 't': + add_target(&clients, &n_clients, optarg, &ok); + break; + + case 'l': + for (i = 0; i < n_clients; i++) { + struct vlog_client *client = clients[i]; + char *reply; + + printf("%s:\n", vlog_client_target(client)); + reply = transact(client, "list", &ok); + fputs(reply, stdout); + free(reply); + } + break; + + case 's': + for (i = 0; i < n_clients; i++) { + struct vlog_client *client = clients[i]; + char *request = xasprintf("set %s", optarg); + transact_ack(client, request, &ok); + free(request); + } + break; + + case 'h': + usage(argv[0], EXIT_SUCCESS); + break; + + default: + NOT_REACHED(); + } + } + if (!n_actions) { + fprintf(stderr, + "warning: no actions specified (use --help for help)\n"); + } + exit(ok ? 0 : 1); +}